1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Triple.h" 24 #include "llvm/CodeGen/LivePhysRegs.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineConstantPool.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/IR/DebugLoc.h" 34 #include "llvm/MC/MCInstrDesc.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/CodeGen.h" 37 #include "llvm/Support/MathExtras.h" 38 #include "llvm/Target/TargetMachine.h" 39 #include <cassert> 40 #include <cstdint> 41 #include <iterator> 42 #include <limits> 43 #include <utility> 44 45 using namespace llvm; 46 47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 48 49 namespace { 50 51 class AArch64ExpandPseudo : public MachineFunctionPass { 52 public: 53 const AArch64InstrInfo *TII; 54 55 static char ID; 56 57 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 58 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 59 } 60 61 bool runOnMachineFunction(MachineFunction &Fn) override; 62 63 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 64 65 private: 66 bool expandMBB(MachineBasicBlock &MBB); 67 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 68 MachineBasicBlock::iterator &NextMBBI); 69 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 70 unsigned BitSize); 71 72 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, 73 MachineBasicBlock::iterator MBBI); 74 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 75 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 76 unsigned ExtendImm, unsigned ZeroReg, 77 MachineBasicBlock::iterator &NextMBBI); 78 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 79 MachineBasicBlock::iterator MBBI, 80 MachineBasicBlock::iterator &NextMBBI); 81 bool expandSetTagLoop(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI, 83 MachineBasicBlock::iterator &NextMBBI); 84 bool expandSVESpillFill(MachineBasicBlock &MBB, 85 MachineBasicBlock::iterator MBBI, unsigned Opc, 86 unsigned N); 87 bool expandCALL_RVMARKER(MachineBasicBlock &MBB, 88 MachineBasicBlock::iterator MBBI); 89 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, 90 MachineBasicBlock::iterator MBBI); 91 }; 92 93 } // end anonymous namespace 94 95 char AArch64ExpandPseudo::ID = 0; 96 97 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 98 AARCH64_EXPAND_PSEUDO_NAME, false, false) 99 100 /// Transfer implicit operands on the pseudo instruction to the 101 /// instructions created from the expansion. 102 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 103 MachineInstrBuilder &DefMI) { 104 const MCInstrDesc &Desc = OldMI.getDesc(); 105 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; 106 ++i) { 107 const MachineOperand &MO = OldMI.getOperand(i); 108 assert(MO.isReg() && MO.getReg()); 109 if (MO.isUse()) 110 UseMI.add(MO); 111 else 112 DefMI.add(MO); 113 } 114 } 115 116 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 117 /// real move-immediate instructions to synthesize the immediate. 118 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator MBBI, 120 unsigned BitSize) { 121 MachineInstr &MI = *MBBI; 122 Register DstReg = MI.getOperand(0).getReg(); 123 uint64_t RenamableState = 124 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0; 125 uint64_t Imm = MI.getOperand(1).getImm(); 126 127 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 128 // Useless def, and we don't want to risk creating an invalid ORR (which 129 // would really write to sp). 130 MI.eraseFromParent(); 131 return true; 132 } 133 134 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 135 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 136 assert(Insn.size() != 0); 137 138 SmallVector<MachineInstrBuilder, 4> MIBS; 139 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 140 bool LastItem = std::next(I) == E; 141 switch (I->Opcode) 142 { 143 default: llvm_unreachable("unhandled!"); break; 144 145 case AArch64::ORRWri: 146 case AArch64::ORRXri: 147 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 148 .add(MI.getOperand(0)) 149 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 150 .addImm(I->Op2)); 151 break; 152 case AArch64::MOVNWi: 153 case AArch64::MOVNXi: 154 case AArch64::MOVZWi: 155 case AArch64::MOVZXi: { 156 bool DstIsDead = MI.getOperand(0).isDead(); 157 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 158 .addReg(DstReg, RegState::Define | 159 getDeadRegState(DstIsDead && LastItem) | 160 RenamableState) 161 .addImm(I->Op1) 162 .addImm(I->Op2)); 163 } break; 164 case AArch64::MOVKWi: 165 case AArch64::MOVKXi: { 166 Register DstReg = MI.getOperand(0).getReg(); 167 bool DstIsDead = MI.getOperand(0).isDead(); 168 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 169 .addReg(DstReg, 170 RegState::Define | 171 getDeadRegState(DstIsDead && LastItem) | 172 RenamableState) 173 .addReg(DstReg) 174 .addImm(I->Op1) 175 .addImm(I->Op2)); 176 } break; 177 } 178 } 179 transferImpOps(MI, MIBS.front(), MIBS.back()); 180 MI.eraseFromParent(); 181 return true; 182 } 183 184 bool AArch64ExpandPseudo::expandCMP_SWAP( 185 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 186 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 187 MachineBasicBlock::iterator &NextMBBI) { 188 MachineInstr &MI = *MBBI; 189 DebugLoc DL = MI.getDebugLoc(); 190 const MachineOperand &Dest = MI.getOperand(0); 191 Register StatusReg = MI.getOperand(1).getReg(); 192 bool StatusDead = MI.getOperand(1).isDead(); 193 // Duplicating undef operands into 2 instructions does not guarantee the same 194 // value on both; However undef should be replaced by xzr anyway. 195 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 196 Register AddrReg = MI.getOperand(2).getReg(); 197 Register DesiredReg = MI.getOperand(3).getReg(); 198 Register NewReg = MI.getOperand(4).getReg(); 199 200 MachineFunction *MF = MBB.getParent(); 201 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 202 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 203 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 204 205 MF->insert(++MBB.getIterator(), LoadCmpBB); 206 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 207 MF->insert(++StoreBB->getIterator(), DoneBB); 208 209 // .Lloadcmp: 210 // mov wStatus, 0 211 // ldaxr xDest, [xAddr] 212 // cmp xDest, xDesired 213 // b.ne .Ldone 214 if (!StatusDead) 215 BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg) 216 .addImm(0).addImm(0); 217 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) 218 .addReg(AddrReg); 219 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) 220 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 221 .addReg(DesiredReg) 222 .addImm(ExtendImm); 223 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) 224 .addImm(AArch64CC::NE) 225 .addMBB(DoneBB) 226 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 227 LoadCmpBB->addSuccessor(DoneBB); 228 LoadCmpBB->addSuccessor(StoreBB); 229 230 // .Lstore: 231 // stlxr wStatus, xNew, [xAddr] 232 // cbnz wStatus, .Lloadcmp 233 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg) 234 .addReg(NewReg) 235 .addReg(AddrReg); 236 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 237 .addReg(StatusReg, getKillRegState(StatusDead)) 238 .addMBB(LoadCmpBB); 239 StoreBB->addSuccessor(LoadCmpBB); 240 StoreBB->addSuccessor(DoneBB); 241 242 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 243 DoneBB->transferSuccessors(&MBB); 244 245 MBB.addSuccessor(LoadCmpBB); 246 247 NextMBBI = MBB.end(); 248 MI.eraseFromParent(); 249 250 // Recompute livein lists. 251 LivePhysRegs LiveRegs; 252 computeAndAddLiveIns(LiveRegs, *DoneBB); 253 computeAndAddLiveIns(LiveRegs, *StoreBB); 254 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 255 // Do an extra pass around the loop to get loop carried registers right. 256 StoreBB->clearLiveIns(); 257 computeAndAddLiveIns(LiveRegs, *StoreBB); 258 LoadCmpBB->clearLiveIns(); 259 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 260 261 return true; 262 } 263 264 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 265 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 266 MachineBasicBlock::iterator &NextMBBI) { 267 MachineInstr &MI = *MBBI; 268 DebugLoc DL = MI.getDebugLoc(); 269 MachineOperand &DestLo = MI.getOperand(0); 270 MachineOperand &DestHi = MI.getOperand(1); 271 Register StatusReg = MI.getOperand(2).getReg(); 272 bool StatusDead = MI.getOperand(2).isDead(); 273 // Duplicating undef operands into 2 instructions does not guarantee the same 274 // value on both; However undef should be replaced by xzr anyway. 275 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 276 Register AddrReg = MI.getOperand(3).getReg(); 277 Register DesiredLoReg = MI.getOperand(4).getReg(); 278 Register DesiredHiReg = MI.getOperand(5).getReg(); 279 Register NewLoReg = MI.getOperand(6).getReg(); 280 Register NewHiReg = MI.getOperand(7).getReg(); 281 282 unsigned LdxpOp, StxpOp; 283 284 switch (MI.getOpcode()) { 285 case AArch64::CMP_SWAP_128_MONOTONIC: 286 LdxpOp = AArch64::LDXPX; 287 StxpOp = AArch64::STXPX; 288 break; 289 case AArch64::CMP_SWAP_128_RELEASE: 290 LdxpOp = AArch64::LDXPX; 291 StxpOp = AArch64::STLXPX; 292 break; 293 case AArch64::CMP_SWAP_128_ACQUIRE: 294 LdxpOp = AArch64::LDAXPX; 295 StxpOp = AArch64::STXPX; 296 break; 297 case AArch64::CMP_SWAP_128: 298 LdxpOp = AArch64::LDAXPX; 299 StxpOp = AArch64::STLXPX; 300 break; 301 default: 302 llvm_unreachable("Unexpected opcode"); 303 } 304 305 MachineFunction *MF = MBB.getParent(); 306 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 307 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 308 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 309 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 310 311 MF->insert(++MBB.getIterator(), LoadCmpBB); 312 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 313 MF->insert(++StoreBB->getIterator(), FailBB); 314 MF->insert(++FailBB->getIterator(), DoneBB); 315 316 // .Lloadcmp: 317 // ldaxp xDestLo, xDestHi, [xAddr] 318 // cmp xDestLo, xDesiredLo 319 // sbcs xDestHi, xDesiredHi 320 // b.ne .Ldone 321 BuildMI(LoadCmpBB, DL, TII->get(LdxpOp)) 322 .addReg(DestLo.getReg(), RegState::Define) 323 .addReg(DestHi.getReg(), RegState::Define) 324 .addReg(AddrReg); 325 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 326 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 327 .addReg(DesiredLoReg) 328 .addImm(0); 329 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 330 .addUse(AArch64::WZR) 331 .addUse(AArch64::WZR) 332 .addImm(AArch64CC::EQ); 333 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 334 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 335 .addReg(DesiredHiReg) 336 .addImm(0); 337 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 338 .addUse(StatusReg, RegState::Kill) 339 .addUse(StatusReg, RegState::Kill) 340 .addImm(AArch64CC::EQ); 341 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW)) 342 .addUse(StatusReg, getKillRegState(StatusDead)) 343 .addMBB(FailBB); 344 LoadCmpBB->addSuccessor(FailBB); 345 LoadCmpBB->addSuccessor(StoreBB); 346 347 // .Lstore: 348 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 349 // cbnz wStatus, .Lloadcmp 350 BuildMI(StoreBB, DL, TII->get(StxpOp), StatusReg) 351 .addReg(NewLoReg) 352 .addReg(NewHiReg) 353 .addReg(AddrReg); 354 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 355 .addReg(StatusReg, getKillRegState(StatusDead)) 356 .addMBB(LoadCmpBB); 357 BuildMI(StoreBB, DL, TII->get(AArch64::B)).addMBB(DoneBB); 358 StoreBB->addSuccessor(LoadCmpBB); 359 StoreBB->addSuccessor(DoneBB); 360 361 // .Lfail: 362 // stlxp wStatus, xDestLo, xDestHi, [xAddr] 363 // cbnz wStatus, .Lloadcmp 364 BuildMI(FailBB, DL, TII->get(StxpOp), StatusReg) 365 .addReg(DestLo.getReg()) 366 .addReg(DestHi.getReg()) 367 .addReg(AddrReg); 368 BuildMI(FailBB, DL, TII->get(AArch64::CBNZW)) 369 .addReg(StatusReg, getKillRegState(StatusDead)) 370 .addMBB(LoadCmpBB); 371 FailBB->addSuccessor(LoadCmpBB); 372 FailBB->addSuccessor(DoneBB); 373 374 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 375 DoneBB->transferSuccessors(&MBB); 376 377 MBB.addSuccessor(LoadCmpBB); 378 379 NextMBBI = MBB.end(); 380 MI.eraseFromParent(); 381 382 // Recompute liveness bottom up. 383 LivePhysRegs LiveRegs; 384 computeAndAddLiveIns(LiveRegs, *DoneBB); 385 computeAndAddLiveIns(LiveRegs, *FailBB); 386 computeAndAddLiveIns(LiveRegs, *StoreBB); 387 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 388 389 // Do an extra pass in the loop to get the loop carried dependencies right. 390 FailBB->clearLiveIns(); 391 computeAndAddLiveIns(LiveRegs, *FailBB); 392 StoreBB->clearLiveIns(); 393 computeAndAddLiveIns(LiveRegs, *StoreBB); 394 LoadCmpBB->clearLiveIns(); 395 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 396 397 return true; 398 } 399 400 /// \brief Expand Pseudos to Instructions with destructive operands. 401 /// 402 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes 403 /// or for fixing relaxed register allocation conditions to comply with 404 /// the instructions register constraints. The latter case may be cheaper 405 /// than setting the register constraints in the register allocator, 406 /// since that will insert regular MOV instructions rather than MOVPRFX. 407 /// 408 /// Example (after register allocation): 409 /// 410 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 411 /// 412 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. 413 /// * We cannot map directly to FSUB_ZPmZ_B because the register 414 /// constraints of the instruction are not met. 415 /// * Also the _ZERO specifies the false lanes need to be zeroed. 416 /// 417 /// We first try to see if the destructive operand == result operand, 418 /// if not, we try to swap the operands, e.g. 419 /// 420 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 421 /// 422 /// But because FSUB_ZPmZ is not commutative, this is semantically 423 /// different, so we need a reverse instruction: 424 /// 425 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 426 /// 427 /// Then we implement the zeroing of the false lanes of Z0 by adding 428 /// a zeroing MOVPRFX instruction: 429 /// 430 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 431 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 432 /// 433 /// Note that this can only be done for _ZERO or _UNDEF variants where 434 /// we can guarantee the false lanes to be zeroed (by implementing this) 435 /// or that they are undef (don't care / not used), otherwise the 436 /// swapping of operands is illegal because the operation is not 437 /// (or cannot be emulated to be) fully commutative. 438 bool AArch64ExpandPseudo::expand_DestructiveOp( 439 MachineInstr &MI, 440 MachineBasicBlock &MBB, 441 MachineBasicBlock::iterator MBBI) { 442 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode()); 443 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; 444 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; 445 bool FalseZero = FalseLanes == AArch64::FalseLanesZero; 446 447 unsigned DstReg = MI.getOperand(0).getReg(); 448 bool DstIsDead = MI.getOperand(0).isDead(); 449 450 if (DType == AArch64::DestructiveBinary) 451 assert(DstReg != MI.getOperand(3).getReg()); 452 453 bool UseRev = false; 454 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; 455 switch (DType) { 456 case AArch64::DestructiveBinaryComm: 457 case AArch64::DestructiveBinaryCommWithRev: 458 if (DstReg == MI.getOperand(3).getReg()) { 459 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 460 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2); 461 UseRev = true; 462 break; 463 } 464 LLVM_FALLTHROUGH; 465 case AArch64::DestructiveBinary: 466 case AArch64::DestructiveBinaryImm: 467 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); 468 break; 469 case AArch64::DestructiveUnaryPassthru: 470 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3); 471 break; 472 case AArch64::DestructiveTernaryCommWithRev: 473 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4); 474 if (DstReg == MI.getOperand(3).getReg()) { 475 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za 476 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2); 477 UseRev = true; 478 } else if (DstReg == MI.getOperand(4).getReg()) { 479 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za 480 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2); 481 UseRev = true; 482 } 483 break; 484 default: 485 llvm_unreachable("Unsupported Destructive Operand type"); 486 } 487 488 #ifndef NDEBUG 489 // MOVPRFX can only be used if the destination operand 490 // is the destructive operand, not as any other operand, 491 // so the Destructive Operand must be unique. 492 bool DOPRegIsUnique = false; 493 switch (DType) { 494 case AArch64::DestructiveBinaryComm: 495 case AArch64::DestructiveBinaryCommWithRev: 496 DOPRegIsUnique = 497 DstReg != MI.getOperand(DOPIdx).getReg() || 498 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); 499 break; 500 case AArch64::DestructiveUnaryPassthru: 501 case AArch64::DestructiveBinaryImm: 502 DOPRegIsUnique = true; 503 break; 504 case AArch64::DestructiveTernaryCommWithRev: 505 DOPRegIsUnique = 506 DstReg != MI.getOperand(DOPIdx).getReg() || 507 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() && 508 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); 509 break; 510 } 511 #endif 512 513 // Resolve the reverse opcode 514 if (UseRev) { 515 int NewOpcode; 516 // e.g. DIV -> DIVR 517 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) 518 Opcode = NewOpcode; 519 // e.g. DIVR -> DIV 520 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) 521 Opcode = NewOpcode; 522 } 523 524 // Get the right MOVPRFX 525 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); 526 unsigned MovPrfx, MovPrfxZero; 527 switch (ElementSize) { 528 case AArch64::ElementSizeNone: 529 case AArch64::ElementSizeB: 530 MovPrfx = AArch64::MOVPRFX_ZZ; 531 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; 532 break; 533 case AArch64::ElementSizeH: 534 MovPrfx = AArch64::MOVPRFX_ZZ; 535 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; 536 break; 537 case AArch64::ElementSizeS: 538 MovPrfx = AArch64::MOVPRFX_ZZ; 539 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; 540 break; 541 case AArch64::ElementSizeD: 542 MovPrfx = AArch64::MOVPRFX_ZZ; 543 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; 544 break; 545 default: 546 llvm_unreachable("Unsupported ElementSize"); 547 } 548 549 // 550 // Create the destructive operation (if required) 551 // 552 MachineInstrBuilder PRFX, DOP; 553 if (FalseZero) { 554 #ifndef NDEBUG 555 assert(DOPRegIsUnique && "The destructive operand should be unique"); 556 #endif 557 assert(ElementSize != AArch64::ElementSizeNone && 558 "This instruction is unpredicated"); 559 560 // Merge source operand into destination register 561 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero)) 562 .addReg(DstReg, RegState::Define) 563 .addReg(MI.getOperand(PredIdx).getReg()) 564 .addReg(MI.getOperand(DOPIdx).getReg()); 565 566 // After the movprfx, the destructive operand is same as Dst 567 DOPIdx = 0; 568 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { 569 #ifndef NDEBUG 570 assert(DOPRegIsUnique && "The destructive operand should be unique"); 571 #endif 572 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) 573 .addReg(DstReg, RegState::Define) 574 .addReg(MI.getOperand(DOPIdx).getReg()); 575 DOPIdx = 0; 576 } 577 578 // 579 // Create the destructive operation 580 // 581 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) 582 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); 583 584 switch (DType) { 585 case AArch64::DestructiveUnaryPassthru: 586 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 587 .add(MI.getOperand(PredIdx)) 588 .add(MI.getOperand(SrcIdx)); 589 break; 590 case AArch64::DestructiveBinaryImm: 591 case AArch64::DestructiveBinaryComm: 592 case AArch64::DestructiveBinaryCommWithRev: 593 DOP.add(MI.getOperand(PredIdx)) 594 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 595 .add(MI.getOperand(SrcIdx)); 596 break; 597 case AArch64::DestructiveTernaryCommWithRev: 598 DOP.add(MI.getOperand(PredIdx)) 599 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 600 .add(MI.getOperand(SrcIdx)) 601 .add(MI.getOperand(Src2Idx)); 602 break; 603 } 604 605 if (PRFX) { 606 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); 607 transferImpOps(MI, PRFX, DOP); 608 } else 609 transferImpOps(MI, DOP, DOP); 610 611 MI.eraseFromParent(); 612 return true; 613 } 614 615 bool AArch64ExpandPseudo::expandSetTagLoop( 616 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 617 MachineBasicBlock::iterator &NextMBBI) { 618 MachineInstr &MI = *MBBI; 619 DebugLoc DL = MI.getDebugLoc(); 620 Register SizeReg = MI.getOperand(0).getReg(); 621 Register AddressReg = MI.getOperand(1).getReg(); 622 623 MachineFunction *MF = MBB.getParent(); 624 625 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; 626 const unsigned OpCode1 = 627 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; 628 const unsigned OpCode2 = 629 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 630 631 unsigned Size = MI.getOperand(2).getImm(); 632 assert(Size > 0 && Size % 16 == 0); 633 if (Size % (16 * 2) != 0) { 634 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) 635 .addReg(AddressReg) 636 .addReg(AddressReg) 637 .addImm(1); 638 Size -= 16; 639 } 640 MachineBasicBlock::iterator I = 641 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) 642 .addImm(Size); 643 expandMOVImm(MBB, I, 64); 644 645 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 646 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 647 648 MF->insert(++MBB.getIterator(), LoopBB); 649 MF->insert(++LoopBB->getIterator(), DoneBB); 650 651 BuildMI(LoopBB, DL, TII->get(OpCode2)) 652 .addDef(AddressReg) 653 .addReg(AddressReg) 654 .addReg(AddressReg) 655 .addImm(2) 656 .cloneMemRefs(MI) 657 .setMIFlags(MI.getFlags()); 658 BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri)) 659 .addDef(SizeReg) 660 .addReg(SizeReg) 661 .addImm(16 * 2) 662 .addImm(0); 663 BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB); 664 665 LoopBB->addSuccessor(LoopBB); 666 LoopBB->addSuccessor(DoneBB); 667 668 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 669 DoneBB->transferSuccessors(&MBB); 670 671 MBB.addSuccessor(LoopBB); 672 673 NextMBBI = MBB.end(); 674 MI.eraseFromParent(); 675 // Recompute liveness bottom up. 676 LivePhysRegs LiveRegs; 677 computeAndAddLiveIns(LiveRegs, *DoneBB); 678 computeAndAddLiveIns(LiveRegs, *LoopBB); 679 // Do an extra pass in the loop to get the loop carried dependencies right. 680 // FIXME: is this necessary? 681 LoopBB->clearLiveIns(); 682 computeAndAddLiveIns(LiveRegs, *LoopBB); 683 DoneBB->clearLiveIns(); 684 computeAndAddLiveIns(LiveRegs, *DoneBB); 685 686 return true; 687 } 688 689 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, 690 MachineBasicBlock::iterator MBBI, 691 unsigned Opc, unsigned N) { 692 const TargetRegisterInfo *TRI = 693 MBB.getParent()->getSubtarget().getRegisterInfo(); 694 MachineInstr &MI = *MBBI; 695 for (unsigned Offset = 0; Offset < N; ++Offset) { 696 int ImmOffset = MI.getOperand(2).getImm() + Offset; 697 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; 698 assert(ImmOffset >= -256 && ImmOffset < 256 && 699 "Immediate spill offset out of range"); 700 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 701 .addReg( 702 TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset), 703 Opc == AArch64::LDR_ZXI ? RegState::Define : 0) 704 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) 705 .addImm(ImmOffset); 706 } 707 MI.eraseFromParent(); 708 return true; 709 } 710 711 bool AArch64ExpandPseudo::expandCALL_RVMARKER( 712 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 713 // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29, 714 // x29` marker. Mark the sequence as bundle, to avoid passes moving other code 715 // in between. 716 MachineInstr &MI = *MBBI; 717 718 MachineInstr *OriginalCall; 719 MachineOperand &CallTarget = MI.getOperand(0); 720 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 721 "invalid operand for regular call"); 722 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 723 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 724 OriginalCall->addOperand(CallTarget); 725 726 unsigned RegMaskStartIdx = 1; 727 // Skip register arguments. Those are added during ISel, but are not 728 // needed for the concrete branch. 729 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { 730 auto MOP = MI.getOperand(RegMaskStartIdx); 731 assert(MOP.isReg() && "can only add register operands"); 732 OriginalCall->addOperand(MachineOperand::CreateReg( 733 MOP.getReg(), /*Def=*/false, /*Implicit=*/true)); 734 RegMaskStartIdx++; 735 } 736 for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx) 737 OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx)); 738 739 auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 740 .addReg(AArch64::FP, RegState::Define) 741 .addReg(AArch64::XZR) 742 .addReg(AArch64::FP) 743 .addImm(0) 744 .getInstr(); 745 if (MI.shouldUpdateCallSiteInfo()) 746 MBB.getParent()->moveCallSiteInfo(&MI, Marker); 747 MI.eraseFromParent(); 748 finalizeBundle(MBB, OriginalCall->getIterator(), 749 std::next(Marker->getIterator())); 750 return true; 751 } 752 753 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( 754 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 755 Register CtxReg = MBBI->getOperand(0).getReg(); 756 Register BaseReg = MBBI->getOperand(1).getReg(); 757 int Offset = MBBI->getOperand(2).getImm(); 758 DebugLoc DL(MBBI->getDebugLoc()); 759 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 760 761 if (STI.getTargetTriple().getArchName() != "arm64e") { 762 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 763 .addUse(CtxReg) 764 .addUse(BaseReg) 765 .addImm(Offset / 8) 766 .setMIFlag(MachineInstr::FrameSetup); 767 MBBI->eraseFromParent(); 768 return true; 769 } 770 771 // We need to sign the context in an address-discriminated way. 0xc31a is a 772 // fixed random value, chosen as part of the ABI. 773 // add x16, xBase, #Offset 774 // movk x16, #0xc31a, lsl #48 775 // mov x17, x22/xzr 776 // pacdb x17, x16 777 // str x17, [xBase, #Offset] 778 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; 779 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16) 780 .addUse(BaseReg) 781 .addImm(abs(Offset)) 782 .addImm(0) 783 .setMIFlag(MachineInstr::FrameSetup); 784 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16) 785 .addUse(AArch64::X16) 786 .addImm(0xc31a) 787 .addImm(48) 788 .setMIFlag(MachineInstr::FrameSetup); 789 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so 790 // move it somewhere before signing. 791 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) 792 .addUse(AArch64::XZR) 793 .addUse(CtxReg) 794 .addImm(0) 795 .setMIFlag(MachineInstr::FrameSetup); 796 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17) 797 .addUse(AArch64::X17) 798 .addUse(AArch64::X16) 799 .setMIFlag(MachineInstr::FrameSetup); 800 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 801 .addUse(AArch64::X17) 802 .addUse(BaseReg) 803 .addImm(Offset / 8) 804 .setMIFlag(MachineInstr::FrameSetup); 805 806 MBBI->eraseFromParent(); 807 return true; 808 } 809 810 /// If MBBI references a pseudo instruction that should be expanded here, 811 /// do the expansion and return true. Otherwise return false. 812 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 813 MachineBasicBlock::iterator MBBI, 814 MachineBasicBlock::iterator &NextMBBI) { 815 MachineInstr &MI = *MBBI; 816 unsigned Opcode = MI.getOpcode(); 817 818 // Check if we can expand the destructive op 819 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode()); 820 if (OrigInstr != -1) { 821 auto &Orig = TII->get(OrigInstr); 822 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) 823 != AArch64::NotDestructive) { 824 return expand_DestructiveOp(MI, MBB, MBBI); 825 } 826 } 827 828 switch (Opcode) { 829 default: 830 break; 831 832 case AArch64::BSPv8i8: 833 case AArch64::BSPv16i8: { 834 Register DstReg = MI.getOperand(0).getReg(); 835 if (DstReg == MI.getOperand(3).getReg()) { 836 // Expand to BIT 837 BuildMI(MBB, MBBI, MI.getDebugLoc(), 838 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 839 : AArch64::BITv16i8)) 840 .add(MI.getOperand(0)) 841 .add(MI.getOperand(3)) 842 .add(MI.getOperand(2)) 843 .add(MI.getOperand(1)); 844 } else if (DstReg == MI.getOperand(2).getReg()) { 845 // Expand to BIF 846 BuildMI(MBB, MBBI, MI.getDebugLoc(), 847 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 848 : AArch64::BIFv16i8)) 849 .add(MI.getOperand(0)) 850 .add(MI.getOperand(2)) 851 .add(MI.getOperand(3)) 852 .add(MI.getOperand(1)); 853 } else { 854 // Expand to BSL, use additional move if required 855 if (DstReg == MI.getOperand(1).getReg()) { 856 BuildMI(MBB, MBBI, MI.getDebugLoc(), 857 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 858 : AArch64::BSLv16i8)) 859 .add(MI.getOperand(0)) 860 .add(MI.getOperand(1)) 861 .add(MI.getOperand(2)) 862 .add(MI.getOperand(3)); 863 } else { 864 BuildMI(MBB, MBBI, MI.getDebugLoc(), 865 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 866 : AArch64::ORRv16i8)) 867 .addReg(DstReg, 868 RegState::Define | 869 getRenamableRegState(MI.getOperand(0).isRenamable())) 870 .add(MI.getOperand(1)) 871 .add(MI.getOperand(1)); 872 BuildMI(MBB, MBBI, MI.getDebugLoc(), 873 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 874 : AArch64::BSLv16i8)) 875 .add(MI.getOperand(0)) 876 .addReg(DstReg, 877 RegState::Kill | 878 getRenamableRegState(MI.getOperand(0).isRenamable())) 879 .add(MI.getOperand(2)) 880 .add(MI.getOperand(3)); 881 } 882 } 883 MI.eraseFromParent(); 884 return true; 885 } 886 887 case AArch64::ADDWrr: 888 case AArch64::SUBWrr: 889 case AArch64::ADDXrr: 890 case AArch64::SUBXrr: 891 case AArch64::ADDSWrr: 892 case AArch64::SUBSWrr: 893 case AArch64::ADDSXrr: 894 case AArch64::SUBSXrr: 895 case AArch64::ANDWrr: 896 case AArch64::ANDXrr: 897 case AArch64::BICWrr: 898 case AArch64::BICXrr: 899 case AArch64::ANDSWrr: 900 case AArch64::ANDSXrr: 901 case AArch64::BICSWrr: 902 case AArch64::BICSXrr: 903 case AArch64::EONWrr: 904 case AArch64::EONXrr: 905 case AArch64::EORWrr: 906 case AArch64::EORXrr: 907 case AArch64::ORNWrr: 908 case AArch64::ORNXrr: 909 case AArch64::ORRWrr: 910 case AArch64::ORRXrr: { 911 unsigned Opcode; 912 switch (MI.getOpcode()) { 913 default: 914 return false; 915 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 916 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 917 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 918 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 919 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 920 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 921 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 922 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 923 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 924 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 925 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 926 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 927 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 928 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 929 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 930 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 931 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 932 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 933 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 934 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 935 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 936 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 937 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 938 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 939 } 940 MachineInstrBuilder MIB1 = 941 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), 942 MI.getOperand(0).getReg()) 943 .add(MI.getOperand(1)) 944 .add(MI.getOperand(2)) 945 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 946 transferImpOps(MI, MIB1, MIB1); 947 MI.eraseFromParent(); 948 return true; 949 } 950 951 case AArch64::LOADgot: { 952 MachineFunction *MF = MBB.getParent(); 953 Register DstReg = MI.getOperand(0).getReg(); 954 const MachineOperand &MO1 = MI.getOperand(1); 955 unsigned Flags = MO1.getTargetFlags(); 956 957 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 958 // Tiny codemodel expand to LDR 959 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 960 TII->get(AArch64::LDRXl), DstReg); 961 962 if (MO1.isGlobal()) { 963 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 964 } else if (MO1.isSymbol()) { 965 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 966 } else { 967 assert(MO1.isCPI() && 968 "Only expect globals, externalsymbols, or constant pools"); 969 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 970 } 971 } else { 972 // Small codemodel expand into ADRP + LDR. 973 MachineFunction &MF = *MI.getParent()->getParent(); 974 DebugLoc DL = MI.getDebugLoc(); 975 MachineInstrBuilder MIB1 = 976 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 977 978 MachineInstrBuilder MIB2; 979 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 980 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 981 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 982 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 983 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 984 .addDef(Reg32) 985 .addReg(DstReg, RegState::Kill) 986 .addReg(DstReg, DstFlags | RegState::Implicit); 987 } else { 988 unsigned DstReg = MI.getOperand(0).getReg(); 989 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 990 .add(MI.getOperand(0)) 991 .addUse(DstReg, RegState::Kill); 992 } 993 994 if (MO1.isGlobal()) { 995 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 996 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 997 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 998 } else if (MO1.isSymbol()) { 999 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 1000 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 1001 AArch64II::MO_PAGEOFF | 1002 AArch64II::MO_NC); 1003 } else { 1004 assert(MO1.isCPI() && 1005 "Only expect globals, externalsymbols, or constant pools"); 1006 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1007 Flags | AArch64II::MO_PAGE); 1008 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1009 Flags | AArch64II::MO_PAGEOFF | 1010 AArch64II::MO_NC); 1011 } 1012 1013 transferImpOps(MI, MIB1, MIB2); 1014 } 1015 MI.eraseFromParent(); 1016 return true; 1017 } 1018 case AArch64::MOVaddrBA: { 1019 MachineFunction &MF = *MI.getParent()->getParent(); 1020 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) { 1021 // blockaddress expressions have to come from a constant pool because the 1022 // largest addend (and hence offset within a function) allowed for ADRP is 1023 // only 8MB. 1024 const BlockAddress *BA = MI.getOperand(1).getBlockAddress(); 1025 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset"); 1026 1027 MachineConstantPool *MCP = MF.getConstantPool(); 1028 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8)); 1029 1030 Register DstReg = MI.getOperand(0).getReg(); 1031 auto MIB1 = 1032 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1033 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 1034 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1035 TII->get(AArch64::LDRXui), DstReg) 1036 .addUse(DstReg) 1037 .addConstantPoolIndex( 1038 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1039 transferImpOps(MI, MIB1, MIB2); 1040 MI.eraseFromParent(); 1041 return true; 1042 } 1043 } 1044 LLVM_FALLTHROUGH; 1045 case AArch64::MOVaddr: 1046 case AArch64::MOVaddrJT: 1047 case AArch64::MOVaddrCP: 1048 case AArch64::MOVaddrTLS: 1049 case AArch64::MOVaddrEXT: { 1050 // Expand into ADRP + ADD. 1051 Register DstReg = MI.getOperand(0).getReg(); 1052 MachineInstrBuilder MIB1 = 1053 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1054 .add(MI.getOperand(1)); 1055 1056 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 1057 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 1058 // We do so by creating a MOVK that sets bits 48-63 of the register to 1059 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 1060 // the small code model so we can assume a binary size of <= 4GB, which 1061 // makes the untagged PC relative offset positive. The binary must also be 1062 // loaded into address range [0, 2^48). Both of these properties need to 1063 // be ensured at runtime when using tagged addresses. 1064 auto Tag = MI.getOperand(1); 1065 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 1066 Tag.setOffset(0x100000000); 1067 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 1068 .addReg(DstReg) 1069 .add(Tag) 1070 .addImm(48); 1071 } 1072 1073 MachineInstrBuilder MIB2 = 1074 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1075 .add(MI.getOperand(0)) 1076 .addReg(DstReg) 1077 .add(MI.getOperand(2)) 1078 .addImm(0); 1079 1080 transferImpOps(MI, MIB1, MIB2); 1081 MI.eraseFromParent(); 1082 return true; 1083 } 1084 case AArch64::ADDlowTLS: 1085 // Produce a plain ADD 1086 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1087 .add(MI.getOperand(0)) 1088 .add(MI.getOperand(1)) 1089 .add(MI.getOperand(2)) 1090 .addImm(0); 1091 MI.eraseFromParent(); 1092 return true; 1093 1094 case AArch64::MOVbaseTLS: { 1095 Register DstReg = MI.getOperand(0).getReg(); 1096 auto SysReg = AArch64SysReg::TPIDR_EL0; 1097 MachineFunction *MF = MBB.getParent(); 1098 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 1099 SysReg = AArch64SysReg::TPIDR_EL3; 1100 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 1101 SysReg = AArch64SysReg::TPIDR_EL2; 1102 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 1103 SysReg = AArch64SysReg::TPIDR_EL1; 1104 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 1105 .addImm(SysReg); 1106 MI.eraseFromParent(); 1107 return true; 1108 } 1109 1110 case AArch64::MOVi32imm: 1111 return expandMOVImm(MBB, MBBI, 32); 1112 case AArch64::MOVi64imm: 1113 return expandMOVImm(MBB, MBBI, 64); 1114 case AArch64::RET_ReallyLR: { 1115 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 1116 // function and missing live-ins. We are fine in practice because callee 1117 // saved register handling ensures the register value is restored before 1118 // RET, but we need the undef flag here to appease the MachineVerifier 1119 // liveness checks. 1120 MachineInstrBuilder MIB = 1121 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 1122 .addReg(AArch64::LR, RegState::Undef); 1123 transferImpOps(MI, MIB, MIB); 1124 MI.eraseFromParent(); 1125 return true; 1126 } 1127 case AArch64::CMP_SWAP_8: 1128 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 1129 AArch64::SUBSWrx, 1130 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 1131 AArch64::WZR, NextMBBI); 1132 case AArch64::CMP_SWAP_16: 1133 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 1134 AArch64::SUBSWrx, 1135 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 1136 AArch64::WZR, NextMBBI); 1137 case AArch64::CMP_SWAP_32: 1138 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 1139 AArch64::SUBSWrs, 1140 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1141 AArch64::WZR, NextMBBI); 1142 case AArch64::CMP_SWAP_64: 1143 return expandCMP_SWAP(MBB, MBBI, 1144 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 1145 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1146 AArch64::XZR, NextMBBI); 1147 case AArch64::CMP_SWAP_128: 1148 case AArch64::CMP_SWAP_128_RELEASE: 1149 case AArch64::CMP_SWAP_128_ACQUIRE: 1150 case AArch64::CMP_SWAP_128_MONOTONIC: 1151 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 1152 1153 case AArch64::AESMCrrTied: 1154 case AArch64::AESIMCrrTied: { 1155 MachineInstrBuilder MIB = 1156 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1157 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 1158 AArch64::AESIMCrr)) 1159 .add(MI.getOperand(0)) 1160 .add(MI.getOperand(1)); 1161 transferImpOps(MI, MIB, MIB); 1162 MI.eraseFromParent(); 1163 return true; 1164 } 1165 case AArch64::IRGstack: { 1166 MachineFunction &MF = *MBB.getParent(); 1167 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1168 const AArch64FrameLowering *TFI = 1169 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 1170 1171 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 1172 // almost always point to SP-after-prologue; if not, emit a longer 1173 // instruction sequence. 1174 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 1175 Register FrameReg; 1176 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 1177 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 1178 /*PreferFP=*/false, 1179 /*ForSimm=*/true); 1180 Register SrcReg = FrameReg; 1181 if (FrameRegOffset) { 1182 // Use output register as temporary. 1183 SrcReg = MI.getOperand(0).getReg(); 1184 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 1185 FrameRegOffset, TII); 1186 } 1187 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 1188 .add(MI.getOperand(0)) 1189 .addUse(SrcReg) 1190 .add(MI.getOperand(2)); 1191 MI.eraseFromParent(); 1192 return true; 1193 } 1194 case AArch64::TAGPstack: { 1195 int64_t Offset = MI.getOperand(2).getImm(); 1196 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1197 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) 1198 .add(MI.getOperand(0)) 1199 .add(MI.getOperand(1)) 1200 .addImm(std::abs(Offset)) 1201 .add(MI.getOperand(4)); 1202 MI.eraseFromParent(); 1203 return true; 1204 } 1205 case AArch64::STGloop_wback: 1206 case AArch64::STZGloop_wback: 1207 return expandSetTagLoop(MBB, MBBI, NextMBBI); 1208 case AArch64::STGloop: 1209 case AArch64::STZGloop: 1210 report_fatal_error( 1211 "Non-writeback variants of STGloop / STZGloop should not " 1212 "survive past PrologEpilogInserter."); 1213 case AArch64::STR_ZZZZXI: 1214 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); 1215 case AArch64::STR_ZZZXI: 1216 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); 1217 case AArch64::STR_ZZXI: 1218 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); 1219 case AArch64::LDR_ZZZZXI: 1220 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); 1221 case AArch64::LDR_ZZZXI: 1222 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); 1223 case AArch64::LDR_ZZXI: 1224 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); 1225 case AArch64::BLR_RVMARKER: 1226 return expandCALL_RVMARKER(MBB, MBBI); 1227 case AArch64::StoreSwiftAsyncContext: 1228 return expandStoreSwiftAsyncContext(MBB, MBBI); 1229 } 1230 return false; 1231 } 1232 1233 /// Iterate over the instructions in basic block MBB and expand any 1234 /// pseudo instructions. Return true if anything was modified. 1235 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 1236 bool Modified = false; 1237 1238 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1239 while (MBBI != E) { 1240 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 1241 Modified |= expandMI(MBB, MBBI, NMBBI); 1242 MBBI = NMBBI; 1243 } 1244 1245 return Modified; 1246 } 1247 1248 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 1249 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 1250 1251 bool Modified = false; 1252 for (auto &MBB : MF) 1253 Modified |= expandMBB(MBB); 1254 return Modified; 1255 } 1256 1257 /// Returns an instance of the pseudo instruction expansion pass. 1258 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 1259 return new AArch64ExpandPseudo(); 1260 } 1261