1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/CodeGen/LivePhysRegs.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineConstantPool.h" 25 #include "llvm/CodeGen/MachineFunction.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstr.h" 28 #include "llvm/CodeGen/MachineInstrBuilder.h" 29 #include "llvm/CodeGen/MachineOperand.h" 30 #include "llvm/CodeGen/TargetSubtargetInfo.h" 31 #include "llvm/IR/DebugLoc.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/Pass.h" 34 #include "llvm/Support/CodeGen.h" 35 #include "llvm/Support/MathExtras.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/TargetParser/Triple.h" 38 #include <cassert> 39 #include <cstdint> 40 #include <iterator> 41 #include <utility> 42 43 using namespace llvm; 44 45 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 46 47 namespace { 48 49 class AArch64ExpandPseudo : public MachineFunctionPass { 50 public: 51 const AArch64InstrInfo *TII; 52 53 static char ID; 54 55 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 56 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 57 } 58 59 bool runOnMachineFunction(MachineFunction &Fn) override; 60 61 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 62 63 private: 64 bool expandMBB(MachineBasicBlock &MBB); 65 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 66 MachineBasicBlock::iterator &NextMBBI); 67 bool expandMultiVecPseudo(MachineBasicBlock &MBB, 68 MachineBasicBlock::iterator MBBI, 69 TargetRegisterClass ContiguousClass, 70 TargetRegisterClass StridedClass, 71 unsigned ContiguousOpc, unsigned StridedOpc); 72 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 73 unsigned BitSize); 74 75 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, 76 MachineBasicBlock::iterator MBBI); 77 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 78 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 79 unsigned ExtendImm, unsigned ZeroReg, 80 MachineBasicBlock::iterator &NextMBBI); 81 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI, 83 MachineBasicBlock::iterator &NextMBBI); 84 bool expandSetTagLoop(MachineBasicBlock &MBB, 85 MachineBasicBlock::iterator MBBI, 86 MachineBasicBlock::iterator &NextMBBI); 87 bool expandSVESpillFill(MachineBasicBlock &MBB, 88 MachineBasicBlock::iterator MBBI, unsigned Opc, 89 unsigned N); 90 bool expandCALL_RVMARKER(MachineBasicBlock &MBB, 91 MachineBasicBlock::iterator MBBI); 92 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 93 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, 94 MachineBasicBlock::iterator MBBI); 95 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB, 96 MachineBasicBlock::iterator MBBI); 97 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB, 98 MachineBasicBlock::iterator MBBI); 99 }; 100 101 } // end anonymous namespace 102 103 char AArch64ExpandPseudo::ID = 0; 104 105 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 106 AARCH64_EXPAND_PSEUDO_NAME, false, false) 107 108 /// Transfer implicit operands on the pseudo instruction to the 109 /// instructions created from the expansion. 110 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 111 MachineInstrBuilder &DefMI) { 112 const MCInstrDesc &Desc = OldMI.getDesc(); 113 for (const MachineOperand &MO : 114 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) { 115 assert(MO.isReg() && MO.getReg()); 116 if (MO.isUse()) 117 UseMI.add(MO); 118 else 119 DefMI.add(MO); 120 } 121 } 122 123 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 124 /// real move-immediate instructions to synthesize the immediate. 125 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 126 MachineBasicBlock::iterator MBBI, 127 unsigned BitSize) { 128 MachineInstr &MI = *MBBI; 129 Register DstReg = MI.getOperand(0).getReg(); 130 uint64_t RenamableState = 131 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0; 132 uint64_t Imm = MI.getOperand(1).getImm(); 133 134 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 135 // Useless def, and we don't want to risk creating an invalid ORR (which 136 // would really write to sp). 137 MI.eraseFromParent(); 138 return true; 139 } 140 141 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 142 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 143 assert(Insn.size() != 0); 144 145 SmallVector<MachineInstrBuilder, 4> MIBS; 146 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 147 bool LastItem = std::next(I) == E; 148 switch (I->Opcode) 149 { 150 default: llvm_unreachable("unhandled!"); break; 151 152 case AArch64::ORRWri: 153 case AArch64::ORRXri: 154 if (I->Op1 == 0) { 155 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 156 .add(MI.getOperand(0)) 157 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 158 .addImm(I->Op2)); 159 } else { 160 Register DstReg = MI.getOperand(0).getReg(); 161 bool DstIsDead = MI.getOperand(0).isDead(); 162 MIBS.push_back( 163 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 164 .addReg(DstReg, RegState::Define | 165 getDeadRegState(DstIsDead && LastItem) | 166 RenamableState) 167 .addReg(DstReg) 168 .addImm(I->Op2)); 169 } 170 break; 171 case AArch64::ORRWrs: 172 case AArch64::ORRXrs: { 173 Register DstReg = MI.getOperand(0).getReg(); 174 bool DstIsDead = MI.getOperand(0).isDead(); 175 MIBS.push_back( 176 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 177 .addReg(DstReg, RegState::Define | 178 getDeadRegState(DstIsDead && LastItem) | 179 RenamableState) 180 .addReg(DstReg) 181 .addReg(DstReg) 182 .addImm(I->Op2)); 183 } break; 184 case AArch64::ANDXri: 185 case AArch64::EORXri: 186 if (I->Op1 == 0) { 187 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 188 .add(MI.getOperand(0)) 189 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 190 .addImm(I->Op2)); 191 } else { 192 Register DstReg = MI.getOperand(0).getReg(); 193 bool DstIsDead = MI.getOperand(0).isDead(); 194 MIBS.push_back( 195 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 196 .addReg(DstReg, RegState::Define | 197 getDeadRegState(DstIsDead && LastItem) | 198 RenamableState) 199 .addReg(DstReg) 200 .addImm(I->Op2)); 201 } 202 break; 203 case AArch64::MOVNWi: 204 case AArch64::MOVNXi: 205 case AArch64::MOVZWi: 206 case AArch64::MOVZXi: { 207 bool DstIsDead = MI.getOperand(0).isDead(); 208 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 209 .addReg(DstReg, RegState::Define | 210 getDeadRegState(DstIsDead && LastItem) | 211 RenamableState) 212 .addImm(I->Op1) 213 .addImm(I->Op2)); 214 } break; 215 case AArch64::MOVKWi: 216 case AArch64::MOVKXi: { 217 Register DstReg = MI.getOperand(0).getReg(); 218 bool DstIsDead = MI.getOperand(0).isDead(); 219 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 220 .addReg(DstReg, 221 RegState::Define | 222 getDeadRegState(DstIsDead && LastItem) | 223 RenamableState) 224 .addReg(DstReg) 225 .addImm(I->Op1) 226 .addImm(I->Op2)); 227 } break; 228 } 229 } 230 transferImpOps(MI, MIBS.front(), MIBS.back()); 231 MI.eraseFromParent(); 232 return true; 233 } 234 235 bool AArch64ExpandPseudo::expandCMP_SWAP( 236 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 237 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 238 MachineBasicBlock::iterator &NextMBBI) { 239 MachineInstr &MI = *MBBI; 240 MIMetadata MIMD(MI); 241 const MachineOperand &Dest = MI.getOperand(0); 242 Register StatusReg = MI.getOperand(1).getReg(); 243 bool StatusDead = MI.getOperand(1).isDead(); 244 // Duplicating undef operands into 2 instructions does not guarantee the same 245 // value on both; However undef should be replaced by xzr anyway. 246 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 247 Register AddrReg = MI.getOperand(2).getReg(); 248 Register DesiredReg = MI.getOperand(3).getReg(); 249 Register NewReg = MI.getOperand(4).getReg(); 250 251 MachineFunction *MF = MBB.getParent(); 252 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 253 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 254 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 255 256 MF->insert(++MBB.getIterator(), LoadCmpBB); 257 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 258 MF->insert(++StoreBB->getIterator(), DoneBB); 259 260 // .Lloadcmp: 261 // mov wStatus, 0 262 // ldaxr xDest, [xAddr] 263 // cmp xDest, xDesired 264 // b.ne .Ldone 265 if (!StatusDead) 266 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg) 267 .addImm(0).addImm(0); 268 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg()) 269 .addReg(AddrReg); 270 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg) 271 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 272 .addReg(DesiredReg) 273 .addImm(ExtendImm); 274 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc)) 275 .addImm(AArch64CC::NE) 276 .addMBB(DoneBB) 277 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 278 LoadCmpBB->addSuccessor(DoneBB); 279 LoadCmpBB->addSuccessor(StoreBB); 280 281 // .Lstore: 282 // stlxr wStatus, xNew, [xAddr] 283 // cbnz wStatus, .Lloadcmp 284 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg) 285 .addReg(NewReg) 286 .addReg(AddrReg); 287 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) 288 .addReg(StatusReg, getKillRegState(StatusDead)) 289 .addMBB(LoadCmpBB); 290 StoreBB->addSuccessor(LoadCmpBB); 291 StoreBB->addSuccessor(DoneBB); 292 293 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 294 DoneBB->transferSuccessors(&MBB); 295 296 MBB.addSuccessor(LoadCmpBB); 297 298 NextMBBI = MBB.end(); 299 MI.eraseFromParent(); 300 301 // Recompute livein lists. 302 LivePhysRegs LiveRegs; 303 computeAndAddLiveIns(LiveRegs, *DoneBB); 304 computeAndAddLiveIns(LiveRegs, *StoreBB); 305 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 306 // Do an extra pass around the loop to get loop carried registers right. 307 StoreBB->clearLiveIns(); 308 computeAndAddLiveIns(LiveRegs, *StoreBB); 309 LoadCmpBB->clearLiveIns(); 310 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 311 312 return true; 313 } 314 315 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 316 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 317 MachineBasicBlock::iterator &NextMBBI) { 318 MachineInstr &MI = *MBBI; 319 MIMetadata MIMD(MI); 320 MachineOperand &DestLo = MI.getOperand(0); 321 MachineOperand &DestHi = MI.getOperand(1); 322 Register StatusReg = MI.getOperand(2).getReg(); 323 bool StatusDead = MI.getOperand(2).isDead(); 324 // Duplicating undef operands into 2 instructions does not guarantee the same 325 // value on both; However undef should be replaced by xzr anyway. 326 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 327 Register AddrReg = MI.getOperand(3).getReg(); 328 Register DesiredLoReg = MI.getOperand(4).getReg(); 329 Register DesiredHiReg = MI.getOperand(5).getReg(); 330 Register NewLoReg = MI.getOperand(6).getReg(); 331 Register NewHiReg = MI.getOperand(7).getReg(); 332 333 unsigned LdxpOp, StxpOp; 334 335 switch (MI.getOpcode()) { 336 case AArch64::CMP_SWAP_128_MONOTONIC: 337 LdxpOp = AArch64::LDXPX; 338 StxpOp = AArch64::STXPX; 339 break; 340 case AArch64::CMP_SWAP_128_RELEASE: 341 LdxpOp = AArch64::LDXPX; 342 StxpOp = AArch64::STLXPX; 343 break; 344 case AArch64::CMP_SWAP_128_ACQUIRE: 345 LdxpOp = AArch64::LDAXPX; 346 StxpOp = AArch64::STXPX; 347 break; 348 case AArch64::CMP_SWAP_128: 349 LdxpOp = AArch64::LDAXPX; 350 StxpOp = AArch64::STLXPX; 351 break; 352 default: 353 llvm_unreachable("Unexpected opcode"); 354 } 355 356 MachineFunction *MF = MBB.getParent(); 357 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 358 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 359 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 360 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 361 362 MF->insert(++MBB.getIterator(), LoadCmpBB); 363 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 364 MF->insert(++StoreBB->getIterator(), FailBB); 365 MF->insert(++FailBB->getIterator(), DoneBB); 366 367 // .Lloadcmp: 368 // ldaxp xDestLo, xDestHi, [xAddr] 369 // cmp xDestLo, xDesiredLo 370 // sbcs xDestHi, xDesiredHi 371 // b.ne .Ldone 372 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp)) 373 .addReg(DestLo.getReg(), RegState::Define) 374 .addReg(DestHi.getReg(), RegState::Define) 375 .addReg(AddrReg); 376 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) 377 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 378 .addReg(DesiredLoReg) 379 .addImm(0); 380 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) 381 .addUse(AArch64::WZR) 382 .addUse(AArch64::WZR) 383 .addImm(AArch64CC::EQ); 384 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR) 385 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 386 .addReg(DesiredHiReg) 387 .addImm(0); 388 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg) 389 .addUse(StatusReg, RegState::Kill) 390 .addUse(StatusReg, RegState::Kill) 391 .addImm(AArch64CC::EQ); 392 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW)) 393 .addUse(StatusReg, getKillRegState(StatusDead)) 394 .addMBB(FailBB); 395 LoadCmpBB->addSuccessor(FailBB); 396 LoadCmpBB->addSuccessor(StoreBB); 397 398 // .Lstore: 399 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 400 // cbnz wStatus, .Lloadcmp 401 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg) 402 .addReg(NewLoReg) 403 .addReg(NewHiReg) 404 .addReg(AddrReg); 405 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW)) 406 .addReg(StatusReg, getKillRegState(StatusDead)) 407 .addMBB(LoadCmpBB); 408 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB); 409 StoreBB->addSuccessor(LoadCmpBB); 410 StoreBB->addSuccessor(DoneBB); 411 412 // .Lfail: 413 // stlxp wStatus, xDestLo, xDestHi, [xAddr] 414 // cbnz wStatus, .Lloadcmp 415 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg) 416 .addReg(DestLo.getReg()) 417 .addReg(DestHi.getReg()) 418 .addReg(AddrReg); 419 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW)) 420 .addReg(StatusReg, getKillRegState(StatusDead)) 421 .addMBB(LoadCmpBB); 422 FailBB->addSuccessor(LoadCmpBB); 423 FailBB->addSuccessor(DoneBB); 424 425 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 426 DoneBB->transferSuccessors(&MBB); 427 428 MBB.addSuccessor(LoadCmpBB); 429 430 NextMBBI = MBB.end(); 431 MI.eraseFromParent(); 432 433 // Recompute liveness bottom up. 434 LivePhysRegs LiveRegs; 435 computeAndAddLiveIns(LiveRegs, *DoneBB); 436 computeAndAddLiveIns(LiveRegs, *FailBB); 437 computeAndAddLiveIns(LiveRegs, *StoreBB); 438 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 439 440 // Do an extra pass in the loop to get the loop carried dependencies right. 441 FailBB->clearLiveIns(); 442 computeAndAddLiveIns(LiveRegs, *FailBB); 443 StoreBB->clearLiveIns(); 444 computeAndAddLiveIns(LiveRegs, *StoreBB); 445 LoadCmpBB->clearLiveIns(); 446 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 447 448 return true; 449 } 450 451 /// \brief Expand Pseudos to Instructions with destructive operands. 452 /// 453 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes 454 /// or for fixing relaxed register allocation conditions to comply with 455 /// the instructions register constraints. The latter case may be cheaper 456 /// than setting the register constraints in the register allocator, 457 /// since that will insert regular MOV instructions rather than MOVPRFX. 458 /// 459 /// Example (after register allocation): 460 /// 461 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 462 /// 463 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. 464 /// * We cannot map directly to FSUB_ZPmZ_B because the register 465 /// constraints of the instruction are not met. 466 /// * Also the _ZERO specifies the false lanes need to be zeroed. 467 /// 468 /// We first try to see if the destructive operand == result operand, 469 /// if not, we try to swap the operands, e.g. 470 /// 471 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 472 /// 473 /// But because FSUB_ZPmZ is not commutative, this is semantically 474 /// different, so we need a reverse instruction: 475 /// 476 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 477 /// 478 /// Then we implement the zeroing of the false lanes of Z0 by adding 479 /// a zeroing MOVPRFX instruction: 480 /// 481 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 482 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 483 /// 484 /// Note that this can only be done for _ZERO or _UNDEF variants where 485 /// we can guarantee the false lanes to be zeroed (by implementing this) 486 /// or that they are undef (don't care / not used), otherwise the 487 /// swapping of operands is illegal because the operation is not 488 /// (or cannot be emulated to be) fully commutative. 489 bool AArch64ExpandPseudo::expand_DestructiveOp( 490 MachineInstr &MI, 491 MachineBasicBlock &MBB, 492 MachineBasicBlock::iterator MBBI) { 493 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode()); 494 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; 495 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; 496 bool FalseZero = FalseLanes == AArch64::FalseLanesZero; 497 Register DstReg = MI.getOperand(0).getReg(); 498 bool DstIsDead = MI.getOperand(0).isDead(); 499 bool UseRev = false; 500 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; 501 502 switch (DType) { 503 case AArch64::DestructiveBinaryComm: 504 case AArch64::DestructiveBinaryCommWithRev: 505 if (DstReg == MI.getOperand(3).getReg()) { 506 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 507 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2); 508 UseRev = true; 509 break; 510 } 511 [[fallthrough]]; 512 case AArch64::DestructiveBinary: 513 case AArch64::DestructiveBinaryImm: 514 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); 515 break; 516 case AArch64::DestructiveUnaryPassthru: 517 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3); 518 break; 519 case AArch64::DestructiveTernaryCommWithRev: 520 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4); 521 if (DstReg == MI.getOperand(3).getReg()) { 522 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za 523 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2); 524 UseRev = true; 525 } else if (DstReg == MI.getOperand(4).getReg()) { 526 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za 527 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2); 528 UseRev = true; 529 } 530 break; 531 default: 532 llvm_unreachable("Unsupported Destructive Operand type"); 533 } 534 535 // MOVPRFX can only be used if the destination operand 536 // is the destructive operand, not as any other operand, 537 // so the Destructive Operand must be unique. 538 bool DOPRegIsUnique = false; 539 switch (DType) { 540 case AArch64::DestructiveBinary: 541 DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg(); 542 break; 543 case AArch64::DestructiveBinaryComm: 544 case AArch64::DestructiveBinaryCommWithRev: 545 DOPRegIsUnique = 546 DstReg != MI.getOperand(DOPIdx).getReg() || 547 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); 548 break; 549 case AArch64::DestructiveUnaryPassthru: 550 case AArch64::DestructiveBinaryImm: 551 DOPRegIsUnique = true; 552 break; 553 case AArch64::DestructiveTernaryCommWithRev: 554 DOPRegIsUnique = 555 DstReg != MI.getOperand(DOPIdx).getReg() || 556 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() && 557 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); 558 break; 559 } 560 561 // Resolve the reverse opcode 562 if (UseRev) { 563 int NewOpcode; 564 // e.g. DIV -> DIVR 565 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) 566 Opcode = NewOpcode; 567 // e.g. DIVR -> DIV 568 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) 569 Opcode = NewOpcode; 570 } 571 572 // Get the right MOVPRFX 573 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); 574 unsigned MovPrfx, LSLZero, MovPrfxZero; 575 switch (ElementSize) { 576 case AArch64::ElementSizeNone: 577 case AArch64::ElementSizeB: 578 MovPrfx = AArch64::MOVPRFX_ZZ; 579 LSLZero = AArch64::LSL_ZPmI_B; 580 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; 581 break; 582 case AArch64::ElementSizeH: 583 MovPrfx = AArch64::MOVPRFX_ZZ; 584 LSLZero = AArch64::LSL_ZPmI_H; 585 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; 586 break; 587 case AArch64::ElementSizeS: 588 MovPrfx = AArch64::MOVPRFX_ZZ; 589 LSLZero = AArch64::LSL_ZPmI_S; 590 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; 591 break; 592 case AArch64::ElementSizeD: 593 MovPrfx = AArch64::MOVPRFX_ZZ; 594 LSLZero = AArch64::LSL_ZPmI_D; 595 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; 596 break; 597 default: 598 llvm_unreachable("Unsupported ElementSize"); 599 } 600 601 // 602 // Create the destructive operation (if required) 603 // 604 MachineInstrBuilder PRFX, DOP; 605 if (FalseZero) { 606 // If we cannot prefix the requested instruction we'll instead emit a 607 // prefixed_zeroing_mov for DestructiveBinary. 608 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary || 609 DType == AArch64::DestructiveBinaryComm || 610 DType == AArch64::DestructiveBinaryCommWithRev) && 611 "The destructive operand should be unique"); 612 assert(ElementSize != AArch64::ElementSizeNone && 613 "This instruction is unpredicated"); 614 615 // Merge source operand into destination register 616 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero)) 617 .addReg(DstReg, RegState::Define) 618 .addReg(MI.getOperand(PredIdx).getReg()) 619 .addReg(MI.getOperand(DOPIdx).getReg()); 620 621 // After the movprfx, the destructive operand is same as Dst 622 DOPIdx = 0; 623 624 // Create the additional LSL to zero the lanes when the DstReg is not 625 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence 626 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0; 627 if ((DType == AArch64::DestructiveBinary || 628 DType == AArch64::DestructiveBinaryComm || 629 DType == AArch64::DestructiveBinaryCommWithRev) && 630 !DOPRegIsUnique) { 631 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero)) 632 .addReg(DstReg, RegState::Define) 633 .add(MI.getOperand(PredIdx)) 634 .addReg(DstReg) 635 .addImm(0); 636 } 637 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { 638 assert(DOPRegIsUnique && "The destructive operand should be unique"); 639 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) 640 .addReg(DstReg, RegState::Define) 641 .addReg(MI.getOperand(DOPIdx).getReg()); 642 DOPIdx = 0; 643 } 644 645 // 646 // Create the destructive operation 647 // 648 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) 649 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); 650 651 switch (DType) { 652 case AArch64::DestructiveUnaryPassthru: 653 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 654 .add(MI.getOperand(PredIdx)) 655 .add(MI.getOperand(SrcIdx)); 656 break; 657 case AArch64::DestructiveBinary: 658 case AArch64::DestructiveBinaryImm: 659 case AArch64::DestructiveBinaryComm: 660 case AArch64::DestructiveBinaryCommWithRev: 661 DOP.add(MI.getOperand(PredIdx)) 662 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 663 .add(MI.getOperand(SrcIdx)); 664 break; 665 case AArch64::DestructiveTernaryCommWithRev: 666 DOP.add(MI.getOperand(PredIdx)) 667 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 668 .add(MI.getOperand(SrcIdx)) 669 .add(MI.getOperand(Src2Idx)); 670 break; 671 } 672 673 if (PRFX) { 674 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); 675 transferImpOps(MI, PRFX, DOP); 676 } else 677 transferImpOps(MI, DOP, DOP); 678 679 MI.eraseFromParent(); 680 return true; 681 } 682 683 bool AArch64ExpandPseudo::expandSetTagLoop( 684 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 685 MachineBasicBlock::iterator &NextMBBI) { 686 MachineInstr &MI = *MBBI; 687 DebugLoc DL = MI.getDebugLoc(); 688 Register SizeReg = MI.getOperand(0).getReg(); 689 Register AddressReg = MI.getOperand(1).getReg(); 690 691 MachineFunction *MF = MBB.getParent(); 692 693 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; 694 const unsigned OpCode1 = 695 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; 696 const unsigned OpCode2 = 697 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 698 699 unsigned Size = MI.getOperand(2).getImm(); 700 assert(Size > 0 && Size % 16 == 0); 701 if (Size % (16 * 2) != 0) { 702 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) 703 .addReg(AddressReg) 704 .addReg(AddressReg) 705 .addImm(1); 706 Size -= 16; 707 } 708 MachineBasicBlock::iterator I = 709 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) 710 .addImm(Size); 711 expandMOVImm(MBB, I, 64); 712 713 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 714 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 715 716 MF->insert(++MBB.getIterator(), LoopBB); 717 MF->insert(++LoopBB->getIterator(), DoneBB); 718 719 BuildMI(LoopBB, DL, TII->get(OpCode2)) 720 .addDef(AddressReg) 721 .addReg(AddressReg) 722 .addReg(AddressReg) 723 .addImm(2) 724 .cloneMemRefs(MI) 725 .setMIFlags(MI.getFlags()); 726 BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri)) 727 .addDef(SizeReg) 728 .addReg(SizeReg) 729 .addImm(16 * 2) 730 .addImm(0); 731 BuildMI(LoopBB, DL, TII->get(AArch64::Bcc)) 732 .addImm(AArch64CC::NE) 733 .addMBB(LoopBB) 734 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 735 736 LoopBB->addSuccessor(LoopBB); 737 LoopBB->addSuccessor(DoneBB); 738 739 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 740 DoneBB->transferSuccessors(&MBB); 741 742 MBB.addSuccessor(LoopBB); 743 744 NextMBBI = MBB.end(); 745 MI.eraseFromParent(); 746 // Recompute liveness bottom up. 747 LivePhysRegs LiveRegs; 748 computeAndAddLiveIns(LiveRegs, *DoneBB); 749 computeAndAddLiveIns(LiveRegs, *LoopBB); 750 // Do an extra pass in the loop to get the loop carried dependencies right. 751 // FIXME: is this necessary? 752 LoopBB->clearLiveIns(); 753 computeAndAddLiveIns(LiveRegs, *LoopBB); 754 DoneBB->clearLiveIns(); 755 computeAndAddLiveIns(LiveRegs, *DoneBB); 756 757 return true; 758 } 759 760 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, 761 MachineBasicBlock::iterator MBBI, 762 unsigned Opc, unsigned N) { 763 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI || 764 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) && 765 "Unexpected opcode"); 766 unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI) 767 ? RegState::Define 768 : 0; 769 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI) 770 ? AArch64::zsub0 771 : AArch64::psub0; 772 const TargetRegisterInfo *TRI = 773 MBB.getParent()->getSubtarget().getRegisterInfo(); 774 MachineInstr &MI = *MBBI; 775 for (unsigned Offset = 0; Offset < N; ++Offset) { 776 int ImmOffset = MI.getOperand(2).getImm() + Offset; 777 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; 778 assert(ImmOffset >= -256 && ImmOffset < 256 && 779 "Immediate spill offset out of range"); 780 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 781 .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset), 782 RState) 783 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) 784 .addImm(ImmOffset); 785 } 786 MI.eraseFromParent(); 787 return true; 788 } 789 790 // Create a call with the passed opcode and explicit operands, copying over all 791 // the implicit operands from *MBBI, starting at the regmask. 792 static MachineInstr *createCallWithOps(MachineBasicBlock &MBB, 793 MachineBasicBlock::iterator MBBI, 794 const AArch64InstrInfo *TII, 795 unsigned Opcode, 796 ArrayRef<MachineOperand> ExplicitOps, 797 unsigned RegMaskStartIdx) { 798 // Build the MI, with explicit operands first (including the call target). 799 MachineInstr *Call = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode)) 800 .add(ExplicitOps) 801 .getInstr(); 802 803 // Register arguments are added during ISel, but cannot be added as explicit 804 // operands of the branch as it expects to be B <target> which is only one 805 // operand. Instead they are implicit operands used by the branch. 806 while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) { 807 const MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx); 808 assert(MOP.isReg() && "can only add register operands"); 809 Call->addOperand(MachineOperand::CreateReg( 810 MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false, 811 /*isDead=*/false, /*isUndef=*/MOP.isUndef())); 812 RegMaskStartIdx++; 813 } 814 for (const MachineOperand &MO : 815 llvm::drop_begin(MBBI->operands(), RegMaskStartIdx)) 816 Call->addOperand(MO); 817 818 return Call; 819 } 820 821 // Create a call to CallTarget, copying over all the operands from *MBBI, 822 // starting at the regmask. 823 static MachineInstr *createCall(MachineBasicBlock &MBB, 824 MachineBasicBlock::iterator MBBI, 825 const AArch64InstrInfo *TII, 826 MachineOperand &CallTarget, 827 unsigned RegMaskStartIdx) { 828 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 829 830 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 831 "invalid operand for regular call"); 832 return createCallWithOps(MBB, MBBI, TII, Opc, CallTarget, RegMaskStartIdx); 833 } 834 835 bool AArch64ExpandPseudo::expandCALL_RVMARKER( 836 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 837 // Expand CALL_RVMARKER pseudo to: 838 // - a branch to the call target, followed by 839 // - the special `mov x29, x29` marker, and 840 // - another branch, to the runtime function 841 // Mark the sequence as bundle, to avoid passes moving other code in between. 842 MachineInstr &MI = *MBBI; 843 MachineOperand &RVTarget = MI.getOperand(0); 844 assert(RVTarget.isGlobal() && "invalid operand for attached call"); 845 846 MachineInstr *OriginalCall = nullptr; 847 848 if (MI.getOpcode() == AArch64::BLRA_RVMARKER) { 849 // ptrauth call. 850 const MachineOperand &CallTarget = MI.getOperand(1); 851 const MachineOperand &Key = MI.getOperand(2); 852 const MachineOperand &IntDisc = MI.getOperand(3); 853 const MachineOperand &AddrDisc = MI.getOperand(4); 854 855 assert((Key.getImm() == AArch64PACKey::IA || 856 Key.getImm() == AArch64PACKey::IB) && 857 "Invalid auth call key"); 858 859 MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc}; 860 861 OriginalCall = createCallWithOps(MBB, MBBI, TII, AArch64::BLRA, Ops, 862 /*RegMaskStartIdx=*/5); 863 } else { 864 assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI"); 865 OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(1), 866 // Regmask starts after the RV and call targets. 867 /*RegMaskStartIdx=*/2); 868 } 869 870 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 871 .addReg(AArch64::FP, RegState::Define) 872 .addReg(AArch64::XZR) 873 .addReg(AArch64::FP) 874 .addImm(0); 875 876 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL)) 877 .add(RVTarget) 878 .getInstr(); 879 880 if (MI.shouldUpdateCallSiteInfo()) 881 MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall); 882 883 MI.eraseFromParent(); 884 finalizeBundle(MBB, OriginalCall->getIterator(), 885 std::next(RVCall->getIterator())); 886 return true; 887 } 888 889 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, 890 MachineBasicBlock::iterator MBBI) { 891 // Expand CALL_BTI pseudo to: 892 // - a branch to the call target 893 // - a BTI instruction 894 // Mark the sequence as a bundle, to avoid passes moving other code in 895 // between. 896 MachineInstr &MI = *MBBI; 897 MachineInstr *Call = createCall(MBB, MBBI, TII, MI.getOperand(0), 898 // Regmask starts after the call target. 899 /*RegMaskStartIdx=*/1); 900 901 Call->setCFIType(*MBB.getParent(), MI.getCFIType()); 902 903 MachineInstr *BTI = 904 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT)) 905 // BTI J so that setjmp can to BR to this. 906 .addImm(36) 907 .getInstr(); 908 909 if (MI.shouldUpdateCallSiteInfo()) 910 MBB.getParent()->moveCallSiteInfo(&MI, Call); 911 912 MI.eraseFromParent(); 913 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator())); 914 return true; 915 } 916 917 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( 918 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 919 Register CtxReg = MBBI->getOperand(0).getReg(); 920 Register BaseReg = MBBI->getOperand(1).getReg(); 921 int Offset = MBBI->getOperand(2).getImm(); 922 DebugLoc DL(MBBI->getDebugLoc()); 923 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 924 925 if (STI.getTargetTriple().getArchName() != "arm64e") { 926 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 927 .addUse(CtxReg) 928 .addUse(BaseReg) 929 .addImm(Offset / 8) 930 .setMIFlag(MachineInstr::FrameSetup); 931 MBBI->eraseFromParent(); 932 return true; 933 } 934 935 // We need to sign the context in an address-discriminated way. 0xc31a is a 936 // fixed random value, chosen as part of the ABI. 937 // add x16, xBase, #Offset 938 // movk x16, #0xc31a, lsl #48 939 // mov x17, x22/xzr 940 // pacdb x17, x16 941 // str x17, [xBase, #Offset] 942 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; 943 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16) 944 .addUse(BaseReg) 945 .addImm(abs(Offset)) 946 .addImm(0) 947 .setMIFlag(MachineInstr::FrameSetup); 948 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16) 949 .addUse(AArch64::X16) 950 .addImm(0xc31a) 951 .addImm(48) 952 .setMIFlag(MachineInstr::FrameSetup); 953 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so 954 // move it somewhere before signing. 955 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) 956 .addUse(AArch64::XZR) 957 .addUse(CtxReg) 958 .addImm(0) 959 .setMIFlag(MachineInstr::FrameSetup); 960 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17) 961 .addUse(AArch64::X17) 962 .addUse(AArch64::X16) 963 .setMIFlag(MachineInstr::FrameSetup); 964 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 965 .addUse(AArch64::X17) 966 .addUse(BaseReg) 967 .addImm(Offset / 8) 968 .setMIFlag(MachineInstr::FrameSetup); 969 970 MBBI->eraseFromParent(); 971 return true; 972 } 973 974 MachineBasicBlock * 975 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB, 976 MachineBasicBlock::iterator MBBI) { 977 MachineInstr &MI = *MBBI; 978 assert((std::next(MBBI) != MBB.end() || 979 MI.getParent()->successors().begin() != 980 MI.getParent()->successors().end()) && 981 "Unexpected unreachable in block that restores ZA"); 982 983 // Compare TPIDR2_EL0 value against 0. 984 DebugLoc DL = MI.getDebugLoc(); 985 MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX)) 986 .add(MI.getOperand(0)); 987 988 // Split MBB and create two new blocks: 989 // - MBB now contains all instructions before RestoreZAPseudo. 990 // - SMBB contains the RestoreZAPseudo instruction only. 991 // - EndBB contains all instructions after RestoreZAPseudo. 992 MachineInstr &PrevMI = *std::prev(MBBI); 993 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); 994 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() 995 ? *SMBB->successors().begin() 996 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); 997 998 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. 999 Cbz.addMBB(SMBB); 1000 BuildMI(&MBB, DL, TII->get(AArch64::B)) 1001 .addMBB(EndBB); 1002 MBB.addSuccessor(EndBB); 1003 1004 // Replace the pseudo with a call (BL). 1005 MachineInstrBuilder MIB = 1006 BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL)); 1007 MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit); 1008 for (unsigned I = 2; I < MI.getNumOperands(); ++I) 1009 MIB.add(MI.getOperand(I)); 1010 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); 1011 1012 MI.eraseFromParent(); 1013 return EndBB; 1014 } 1015 1016 MachineBasicBlock * 1017 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB, 1018 MachineBasicBlock::iterator MBBI) { 1019 MachineInstr &MI = *MBBI; 1020 // In the case of a smstart/smstop before a unreachable, just remove the pseudo. 1021 // Exception handling code generated by Clang may introduce unreachables and it 1022 // seems unnecessary to restore pstate.sm when that happens. Note that it is 1023 // not just an optimisation, the code below expects a successor instruction/block 1024 // in order to split the block at MBBI. 1025 if (std::next(MBBI) == MBB.end() && 1026 MI.getParent()->successors().begin() == 1027 MI.getParent()->successors().end()) { 1028 MI.eraseFromParent(); 1029 return &MBB; 1030 } 1031 1032 // Expand the pseudo into smstart or smstop instruction. The pseudo has the 1033 // following operands: 1034 // 1035 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask> 1036 // 1037 // The pseudo is expanded into a conditional smstart/smstop, with a 1038 // check if pstate.sm (register) equals the expected value, and if not, 1039 // invokes the smstart/smstop. 1040 // 1041 // As an example, the following block contains a normal call from a 1042 // streaming-compatible function: 1043 // 1044 // OrigBB: 1045 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP 1046 // bl @normal_callee 1047 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART 1048 // 1049 // ...which will be transformed into: 1050 // 1051 // OrigBB: 1052 // TBNZx %0:gpr64, 0, SMBB 1053 // b EndBB 1054 // 1055 // SMBB: 1056 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP 1057 // 1058 // EndBB: 1059 // bl @normal_callee 1060 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART 1061 // 1062 DebugLoc DL = MI.getDebugLoc(); 1063 1064 // Create the conditional branch based on the third operand of the 1065 // instruction, which tells us if we are wrapping a normal or streaming 1066 // function. 1067 // We test the live value of pstate.sm and toggle pstate.sm if this is not the 1068 // expected value for the callee (0 for a normal callee and 1 for a streaming 1069 // callee). 1070 unsigned Opc; 1071 switch (MI.getOperand(2).getImm()) { 1072 case AArch64SME::Always: 1073 llvm_unreachable("Should have matched to instruction directly"); 1074 case AArch64SME::IfCallerIsStreaming: 1075 Opc = AArch64::TBNZW; 1076 break; 1077 case AArch64SME::IfCallerIsNonStreaming: 1078 Opc = AArch64::TBZW; 1079 break; 1080 } 1081 auto PStateSM = MI.getOperand(3).getReg(); 1082 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 1083 unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32); 1084 MachineInstrBuilder Tbx = 1085 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0); 1086 1087 // Split MBB and create two new blocks: 1088 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1. 1089 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only. 1090 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1. 1091 MachineInstr &PrevMI = *std::prev(MBBI); 1092 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true); 1093 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end() 1094 ? *SMBB->successors().begin() 1095 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true); 1096 1097 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB. 1098 Tbx.addMBB(SMBB); 1099 BuildMI(&MBB, DL, TII->get(AArch64::B)) 1100 .addMBB(EndBB); 1101 MBB.addSuccessor(EndBB); 1102 1103 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB. 1104 MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(), 1105 TII->get(AArch64::MSRpstatesvcrImm1)); 1106 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as 1107 // these contain the CopyFromReg for the first argument and the flag to 1108 // indicate whether the callee is streaming or normal). 1109 MIB.add(MI.getOperand(0)); 1110 MIB.add(MI.getOperand(1)); 1111 for (unsigned i = 4; i < MI.getNumOperands(); ++i) 1112 MIB.add(MI.getOperand(i)); 1113 1114 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB); 1115 1116 MI.eraseFromParent(); 1117 return EndBB; 1118 } 1119 1120 bool AArch64ExpandPseudo::expandMultiVecPseudo( 1121 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 1122 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass, 1123 unsigned ContiguousOp, unsigned StridedOpc) { 1124 MachineInstr &MI = *MBBI; 1125 Register Tuple = MI.getOperand(0).getReg(); 1126 1127 auto ContiguousRange = ContiguousClass.getRegisters(); 1128 auto StridedRange = StridedClass.getRegisters(); 1129 unsigned Opc; 1130 if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) { 1131 Opc = ContiguousOp; 1132 } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) { 1133 Opc = StridedOpc; 1134 } else 1135 llvm_unreachable("Cannot expand Multi-Vector pseudo"); 1136 1137 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 1138 .add(MI.getOperand(0)) 1139 .add(MI.getOperand(1)) 1140 .add(MI.getOperand(2)) 1141 .add(MI.getOperand(3)); 1142 transferImpOps(MI, MIB, MIB); 1143 MI.eraseFromParent(); 1144 return true; 1145 } 1146 1147 /// If MBBI references a pseudo instruction that should be expanded here, 1148 /// do the expansion and return true. Otherwise return false. 1149 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 1150 MachineBasicBlock::iterator MBBI, 1151 MachineBasicBlock::iterator &NextMBBI) { 1152 MachineInstr &MI = *MBBI; 1153 unsigned Opcode = MI.getOpcode(); 1154 1155 // Check if we can expand the destructive op 1156 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode()); 1157 if (OrigInstr != -1) { 1158 auto &Orig = TII->get(OrigInstr); 1159 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) != 1160 AArch64::NotDestructive) { 1161 return expand_DestructiveOp(MI, MBB, MBBI); 1162 } 1163 } 1164 1165 switch (Opcode) { 1166 default: 1167 break; 1168 1169 case AArch64::BSPv8i8: 1170 case AArch64::BSPv16i8: { 1171 Register DstReg = MI.getOperand(0).getReg(); 1172 if (DstReg == MI.getOperand(3).getReg()) { 1173 // Expand to BIT 1174 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1175 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 1176 : AArch64::BITv16i8)) 1177 .add(MI.getOperand(0)) 1178 .add(MI.getOperand(3)) 1179 .add(MI.getOperand(2)) 1180 .add(MI.getOperand(1)); 1181 } else if (DstReg == MI.getOperand(2).getReg()) { 1182 // Expand to BIF 1183 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1184 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 1185 : AArch64::BIFv16i8)) 1186 .add(MI.getOperand(0)) 1187 .add(MI.getOperand(2)) 1188 .add(MI.getOperand(3)) 1189 .add(MI.getOperand(1)); 1190 } else { 1191 // Expand to BSL, use additional move if required 1192 if (DstReg == MI.getOperand(1).getReg()) { 1193 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1194 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 1195 : AArch64::BSLv16i8)) 1196 .add(MI.getOperand(0)) 1197 .add(MI.getOperand(1)) 1198 .add(MI.getOperand(2)) 1199 .add(MI.getOperand(3)); 1200 } else { 1201 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1202 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 1203 : AArch64::ORRv16i8)) 1204 .addReg(DstReg, 1205 RegState::Define | 1206 getRenamableRegState(MI.getOperand(0).isRenamable())) 1207 .add(MI.getOperand(1)) 1208 .add(MI.getOperand(1)); 1209 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1210 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 1211 : AArch64::BSLv16i8)) 1212 .add(MI.getOperand(0)) 1213 .addReg(DstReg, 1214 RegState::Kill | 1215 getRenamableRegState(MI.getOperand(0).isRenamable())) 1216 .add(MI.getOperand(2)) 1217 .add(MI.getOperand(3)); 1218 } 1219 } 1220 MI.eraseFromParent(); 1221 return true; 1222 } 1223 1224 case AArch64::ADDWrr: 1225 case AArch64::SUBWrr: 1226 case AArch64::ADDXrr: 1227 case AArch64::SUBXrr: 1228 case AArch64::ADDSWrr: 1229 case AArch64::SUBSWrr: 1230 case AArch64::ADDSXrr: 1231 case AArch64::SUBSXrr: 1232 case AArch64::ANDWrr: 1233 case AArch64::ANDXrr: 1234 case AArch64::BICWrr: 1235 case AArch64::BICXrr: 1236 case AArch64::ANDSWrr: 1237 case AArch64::ANDSXrr: 1238 case AArch64::BICSWrr: 1239 case AArch64::BICSXrr: 1240 case AArch64::EONWrr: 1241 case AArch64::EONXrr: 1242 case AArch64::EORWrr: 1243 case AArch64::EORXrr: 1244 case AArch64::ORNWrr: 1245 case AArch64::ORNXrr: 1246 case AArch64::ORRWrr: 1247 case AArch64::ORRXrr: { 1248 unsigned Opcode; 1249 switch (MI.getOpcode()) { 1250 default: 1251 return false; 1252 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 1253 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 1254 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 1255 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 1256 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 1257 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 1258 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 1259 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 1260 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 1261 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 1262 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 1263 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 1264 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 1265 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 1266 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 1267 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 1268 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 1269 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 1270 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 1271 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 1272 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 1273 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 1274 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 1275 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 1276 } 1277 MachineFunction &MF = *MBB.getParent(); 1278 // Try to create new inst without implicit operands added. 1279 MachineInstr *NewMI = MF.CreateMachineInstr( 1280 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true); 1281 MBB.insert(MBBI, NewMI); 1282 MachineInstrBuilder MIB1(MF, NewMI); 1283 MIB1->setPCSections(MF, MI.getPCSections()); 1284 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define) 1285 .add(MI.getOperand(1)) 1286 .add(MI.getOperand(2)) 1287 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 1288 transferImpOps(MI, MIB1, MIB1); 1289 if (auto DebugNumber = MI.peekDebugInstrNum()) 1290 NewMI->setDebugInstrNum(DebugNumber); 1291 MI.eraseFromParent(); 1292 return true; 1293 } 1294 1295 case AArch64::LOADgot: { 1296 MachineFunction *MF = MBB.getParent(); 1297 Register DstReg = MI.getOperand(0).getReg(); 1298 const MachineOperand &MO1 = MI.getOperand(1); 1299 unsigned Flags = MO1.getTargetFlags(); 1300 1301 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 1302 // Tiny codemodel expand to LDR 1303 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1304 TII->get(AArch64::LDRXl), DstReg); 1305 1306 if (MO1.isGlobal()) { 1307 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 1308 } else if (MO1.isSymbol()) { 1309 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 1310 } else { 1311 assert(MO1.isCPI() && 1312 "Only expect globals, externalsymbols, or constant pools"); 1313 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 1314 } 1315 } else { 1316 // Small codemodel expand into ADRP + LDR. 1317 MachineFunction &MF = *MI.getParent()->getParent(); 1318 DebugLoc DL = MI.getDebugLoc(); 1319 MachineInstrBuilder MIB1 = 1320 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 1321 1322 MachineInstrBuilder MIB2; 1323 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 1324 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 1325 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 1326 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 1327 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 1328 .addDef(Reg32) 1329 .addReg(DstReg, RegState::Kill) 1330 .addReg(DstReg, DstFlags | RegState::Implicit); 1331 } else { 1332 Register DstReg = MI.getOperand(0).getReg(); 1333 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 1334 .add(MI.getOperand(0)) 1335 .addUse(DstReg, RegState::Kill); 1336 } 1337 1338 if (MO1.isGlobal()) { 1339 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 1340 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 1341 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1342 } else if (MO1.isSymbol()) { 1343 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 1344 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 1345 AArch64II::MO_PAGEOFF | 1346 AArch64II::MO_NC); 1347 } else { 1348 assert(MO1.isCPI() && 1349 "Only expect globals, externalsymbols, or constant pools"); 1350 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1351 Flags | AArch64II::MO_PAGE); 1352 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1353 Flags | AArch64II::MO_PAGEOFF | 1354 AArch64II::MO_NC); 1355 } 1356 1357 transferImpOps(MI, MIB1, MIB2); 1358 } 1359 MI.eraseFromParent(); 1360 return true; 1361 } 1362 case AArch64::MOVaddrBA: { 1363 MachineFunction &MF = *MI.getParent()->getParent(); 1364 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) { 1365 // blockaddress expressions have to come from a constant pool because the 1366 // largest addend (and hence offset within a function) allowed for ADRP is 1367 // only 8MB. 1368 const BlockAddress *BA = MI.getOperand(1).getBlockAddress(); 1369 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset"); 1370 1371 MachineConstantPool *MCP = MF.getConstantPool(); 1372 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8)); 1373 1374 Register DstReg = MI.getOperand(0).getReg(); 1375 auto MIB1 = 1376 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1377 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 1378 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1379 TII->get(AArch64::LDRXui), DstReg) 1380 .addUse(DstReg) 1381 .addConstantPoolIndex( 1382 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1383 transferImpOps(MI, MIB1, MIB2); 1384 MI.eraseFromParent(); 1385 return true; 1386 } 1387 } 1388 [[fallthrough]]; 1389 case AArch64::MOVaddr: 1390 case AArch64::MOVaddrJT: 1391 case AArch64::MOVaddrCP: 1392 case AArch64::MOVaddrTLS: 1393 case AArch64::MOVaddrEXT: { 1394 // Expand into ADRP + ADD. 1395 Register DstReg = MI.getOperand(0).getReg(); 1396 assert(DstReg != AArch64::XZR); 1397 MachineInstrBuilder MIB1 = 1398 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1399 .add(MI.getOperand(1)); 1400 1401 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 1402 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 1403 // We do so by creating a MOVK that sets bits 48-63 of the register to 1404 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 1405 // the small code model so we can assume a binary size of <= 4GB, which 1406 // makes the untagged PC relative offset positive. The binary must also be 1407 // loaded into address range [0, 2^48). Both of these properties need to 1408 // be ensured at runtime when using tagged addresses. 1409 auto Tag = MI.getOperand(1); 1410 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 1411 Tag.setOffset(0x100000000); 1412 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 1413 .addReg(DstReg) 1414 .add(Tag) 1415 .addImm(48); 1416 } 1417 1418 MachineInstrBuilder MIB2 = 1419 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1420 .add(MI.getOperand(0)) 1421 .addReg(DstReg) 1422 .add(MI.getOperand(2)) 1423 .addImm(0); 1424 1425 transferImpOps(MI, MIB1, MIB2); 1426 MI.eraseFromParent(); 1427 return true; 1428 } 1429 case AArch64::ADDlowTLS: 1430 // Produce a plain ADD 1431 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1432 .add(MI.getOperand(0)) 1433 .add(MI.getOperand(1)) 1434 .add(MI.getOperand(2)) 1435 .addImm(0); 1436 MI.eraseFromParent(); 1437 return true; 1438 1439 case AArch64::MOVbaseTLS: { 1440 Register DstReg = MI.getOperand(0).getReg(); 1441 auto SysReg = AArch64SysReg::TPIDR_EL0; 1442 MachineFunction *MF = MBB.getParent(); 1443 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 1444 SysReg = AArch64SysReg::TPIDR_EL3; 1445 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 1446 SysReg = AArch64SysReg::TPIDR_EL2; 1447 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 1448 SysReg = AArch64SysReg::TPIDR_EL1; 1449 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP()) 1450 SysReg = AArch64SysReg::TPIDRRO_EL0; 1451 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 1452 .addImm(SysReg); 1453 MI.eraseFromParent(); 1454 return true; 1455 } 1456 1457 case AArch64::MOVi32imm: 1458 return expandMOVImm(MBB, MBBI, 32); 1459 case AArch64::MOVi64imm: 1460 return expandMOVImm(MBB, MBBI, 64); 1461 case AArch64::RET_ReallyLR: { 1462 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 1463 // function and missing live-ins. We are fine in practice because callee 1464 // saved register handling ensures the register value is restored before 1465 // RET, but we need the undef flag here to appease the MachineVerifier 1466 // liveness checks. 1467 MachineInstrBuilder MIB = 1468 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 1469 .addReg(AArch64::LR, RegState::Undef); 1470 transferImpOps(MI, MIB, MIB); 1471 MI.eraseFromParent(); 1472 return true; 1473 } 1474 case AArch64::CMP_SWAP_8: 1475 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 1476 AArch64::SUBSWrx, 1477 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 1478 AArch64::WZR, NextMBBI); 1479 case AArch64::CMP_SWAP_16: 1480 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 1481 AArch64::SUBSWrx, 1482 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 1483 AArch64::WZR, NextMBBI); 1484 case AArch64::CMP_SWAP_32: 1485 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 1486 AArch64::SUBSWrs, 1487 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1488 AArch64::WZR, NextMBBI); 1489 case AArch64::CMP_SWAP_64: 1490 return expandCMP_SWAP(MBB, MBBI, 1491 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 1492 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1493 AArch64::XZR, NextMBBI); 1494 case AArch64::CMP_SWAP_128: 1495 case AArch64::CMP_SWAP_128_RELEASE: 1496 case AArch64::CMP_SWAP_128_ACQUIRE: 1497 case AArch64::CMP_SWAP_128_MONOTONIC: 1498 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 1499 1500 case AArch64::AESMCrrTied: 1501 case AArch64::AESIMCrrTied: { 1502 MachineInstrBuilder MIB = 1503 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1504 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 1505 AArch64::AESIMCrr)) 1506 .add(MI.getOperand(0)) 1507 .add(MI.getOperand(1)); 1508 transferImpOps(MI, MIB, MIB); 1509 MI.eraseFromParent(); 1510 return true; 1511 } 1512 case AArch64::IRGstack: { 1513 MachineFunction &MF = *MBB.getParent(); 1514 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1515 const AArch64FrameLowering *TFI = 1516 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 1517 1518 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 1519 // almost always point to SP-after-prologue; if not, emit a longer 1520 // instruction sequence. 1521 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 1522 Register FrameReg; 1523 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 1524 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 1525 /*PreferFP=*/false, 1526 /*ForSimm=*/true); 1527 Register SrcReg = FrameReg; 1528 if (FrameRegOffset) { 1529 // Use output register as temporary. 1530 SrcReg = MI.getOperand(0).getReg(); 1531 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 1532 FrameRegOffset, TII); 1533 } 1534 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 1535 .add(MI.getOperand(0)) 1536 .addUse(SrcReg) 1537 .add(MI.getOperand(2)); 1538 MI.eraseFromParent(); 1539 return true; 1540 } 1541 case AArch64::TAGPstack: { 1542 int64_t Offset = MI.getOperand(2).getImm(); 1543 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1544 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) 1545 .add(MI.getOperand(0)) 1546 .add(MI.getOperand(1)) 1547 .addImm(std::abs(Offset)) 1548 .add(MI.getOperand(4)); 1549 MI.eraseFromParent(); 1550 return true; 1551 } 1552 case AArch64::STGloop_wback: 1553 case AArch64::STZGloop_wback: 1554 return expandSetTagLoop(MBB, MBBI, NextMBBI); 1555 case AArch64::STGloop: 1556 case AArch64::STZGloop: 1557 report_fatal_error( 1558 "Non-writeback variants of STGloop / STZGloop should not " 1559 "survive past PrologEpilogInserter."); 1560 case AArch64::STR_ZZZZXI: 1561 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); 1562 case AArch64::STR_ZZZXI: 1563 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); 1564 case AArch64::STR_ZZXI: 1565 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); 1566 case AArch64::STR_PPXI: 1567 return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2); 1568 case AArch64::LDR_ZZZZXI: 1569 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); 1570 case AArch64::LDR_ZZZXI: 1571 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); 1572 case AArch64::LDR_ZZXI: 1573 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); 1574 case AArch64::LDR_PPXI: 1575 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2); 1576 case AArch64::BLR_RVMARKER: 1577 case AArch64::BLRA_RVMARKER: 1578 return expandCALL_RVMARKER(MBB, MBBI); 1579 case AArch64::BLR_BTI: 1580 return expandCALL_BTI(MBB, MBBI); 1581 case AArch64::StoreSwiftAsyncContext: 1582 return expandStoreSwiftAsyncContext(MBB, MBBI); 1583 case AArch64::RestoreZAPseudo: { 1584 auto *NewMBB = expandRestoreZA(MBB, MBBI); 1585 if (NewMBB != &MBB) 1586 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. 1587 return true; 1588 } 1589 case AArch64::MSRpstatePseudo: { 1590 auto *NewMBB = expandCondSMToggle(MBB, MBBI); 1591 if (NewMBB != &MBB) 1592 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated. 1593 return true; 1594 } 1595 case AArch64::COALESCER_BARRIER_FPR16: 1596 case AArch64::COALESCER_BARRIER_FPR32: 1597 case AArch64::COALESCER_BARRIER_FPR64: 1598 case AArch64::COALESCER_BARRIER_FPR128: 1599 MI.eraseFromParent(); 1600 return true; 1601 case AArch64::LD1B_2Z_IMM_PSEUDO: 1602 return expandMultiVecPseudo( 1603 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1604 AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM); 1605 case AArch64::LD1H_2Z_IMM_PSEUDO: 1606 return expandMultiVecPseudo( 1607 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1608 AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM); 1609 case AArch64::LD1W_2Z_IMM_PSEUDO: 1610 return expandMultiVecPseudo( 1611 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1612 AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM); 1613 case AArch64::LD1D_2Z_IMM_PSEUDO: 1614 return expandMultiVecPseudo( 1615 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1616 AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM); 1617 case AArch64::LDNT1B_2Z_IMM_PSEUDO: 1618 return expandMultiVecPseudo( 1619 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1620 AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM); 1621 case AArch64::LDNT1H_2Z_IMM_PSEUDO: 1622 return expandMultiVecPseudo( 1623 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1624 AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM); 1625 case AArch64::LDNT1W_2Z_IMM_PSEUDO: 1626 return expandMultiVecPseudo( 1627 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1628 AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM); 1629 case AArch64::LDNT1D_2Z_IMM_PSEUDO: 1630 return expandMultiVecPseudo( 1631 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1632 AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM); 1633 case AArch64::LD1B_2Z_PSEUDO: 1634 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1635 AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z, 1636 AArch64::LD1B_2Z_STRIDED); 1637 case AArch64::LD1H_2Z_PSEUDO: 1638 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1639 AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z, 1640 AArch64::LD1H_2Z_STRIDED); 1641 case AArch64::LD1W_2Z_PSEUDO: 1642 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1643 AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z, 1644 AArch64::LD1W_2Z_STRIDED); 1645 case AArch64::LD1D_2Z_PSEUDO: 1646 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass, 1647 AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z, 1648 AArch64::LD1D_2Z_STRIDED); 1649 case AArch64::LDNT1B_2Z_PSEUDO: 1650 return expandMultiVecPseudo( 1651 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1652 AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED); 1653 case AArch64::LDNT1H_2Z_PSEUDO: 1654 return expandMultiVecPseudo( 1655 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1656 AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED); 1657 case AArch64::LDNT1W_2Z_PSEUDO: 1658 return expandMultiVecPseudo( 1659 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1660 AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED); 1661 case AArch64::LDNT1D_2Z_PSEUDO: 1662 return expandMultiVecPseudo( 1663 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass, 1664 AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED); 1665 case AArch64::LD1B_4Z_IMM_PSEUDO: 1666 return expandMultiVecPseudo( 1667 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1668 AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM); 1669 case AArch64::LD1H_4Z_IMM_PSEUDO: 1670 return expandMultiVecPseudo( 1671 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1672 AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM); 1673 case AArch64::LD1W_4Z_IMM_PSEUDO: 1674 return expandMultiVecPseudo( 1675 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1676 AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM); 1677 case AArch64::LD1D_4Z_IMM_PSEUDO: 1678 return expandMultiVecPseudo( 1679 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1680 AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM); 1681 case AArch64::LDNT1B_4Z_IMM_PSEUDO: 1682 return expandMultiVecPseudo( 1683 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1684 AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM); 1685 case AArch64::LDNT1H_4Z_IMM_PSEUDO: 1686 return expandMultiVecPseudo( 1687 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1688 AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM); 1689 case AArch64::LDNT1W_4Z_IMM_PSEUDO: 1690 return expandMultiVecPseudo( 1691 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1692 AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM); 1693 case AArch64::LDNT1D_4Z_IMM_PSEUDO: 1694 return expandMultiVecPseudo( 1695 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1696 AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM); 1697 case AArch64::LD1B_4Z_PSEUDO: 1698 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1699 AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z, 1700 AArch64::LD1B_4Z_STRIDED); 1701 case AArch64::LD1H_4Z_PSEUDO: 1702 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1703 AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z, 1704 AArch64::LD1H_4Z_STRIDED); 1705 case AArch64::LD1W_4Z_PSEUDO: 1706 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1707 AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z, 1708 AArch64::LD1W_4Z_STRIDED); 1709 case AArch64::LD1D_4Z_PSEUDO: 1710 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass, 1711 AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z, 1712 AArch64::LD1D_4Z_STRIDED); 1713 case AArch64::LDNT1B_4Z_PSEUDO: 1714 return expandMultiVecPseudo( 1715 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1716 AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED); 1717 case AArch64::LDNT1H_4Z_PSEUDO: 1718 return expandMultiVecPseudo( 1719 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1720 AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED); 1721 case AArch64::LDNT1W_4Z_PSEUDO: 1722 return expandMultiVecPseudo( 1723 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1724 AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED); 1725 case AArch64::LDNT1D_4Z_PSEUDO: 1726 return expandMultiVecPseudo( 1727 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass, 1728 AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED); 1729 } 1730 return false; 1731 } 1732 1733 /// Iterate over the instructions in basic block MBB and expand any 1734 /// pseudo instructions. Return true if anything was modified. 1735 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 1736 bool Modified = false; 1737 1738 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1739 while (MBBI != E) { 1740 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 1741 Modified |= expandMI(MBB, MBBI, NMBBI); 1742 MBBI = NMBBI; 1743 } 1744 1745 return Modified; 1746 } 1747 1748 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 1749 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 1750 1751 bool Modified = false; 1752 for (auto &MBB : MF) 1753 Modified |= expandMBB(MBB); 1754 return Modified; 1755 } 1756 1757 /// Returns an instance of the pseudo instruction expansion pass. 1758 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 1759 return new AArch64ExpandPseudo(); 1760 } 1761