1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Triple.h" 24 #include "llvm/CodeGen/LivePhysRegs.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineConstantPool.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/IR/DebugLoc.h" 34 #include "llvm/MC/MCInstrDesc.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/CodeGen.h" 37 #include "llvm/Support/MathExtras.h" 38 #include "llvm/Target/TargetMachine.h" 39 #include <cassert> 40 #include <cstdint> 41 #include <iterator> 42 #include <limits> 43 #include <utility> 44 45 using namespace llvm; 46 47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 48 49 namespace { 50 51 class AArch64ExpandPseudo : public MachineFunctionPass { 52 public: 53 const AArch64InstrInfo *TII; 54 55 static char ID; 56 57 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 58 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 59 } 60 61 bool runOnMachineFunction(MachineFunction &Fn) override; 62 63 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 64 65 private: 66 bool expandMBB(MachineBasicBlock &MBB); 67 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 68 MachineBasicBlock::iterator &NextMBBI); 69 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 70 unsigned BitSize); 71 72 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, 73 MachineBasicBlock::iterator MBBI); 74 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 75 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 76 unsigned ExtendImm, unsigned ZeroReg, 77 MachineBasicBlock::iterator &NextMBBI); 78 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 79 MachineBasicBlock::iterator MBBI, 80 MachineBasicBlock::iterator &NextMBBI); 81 bool expandSetTagLoop(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI, 83 MachineBasicBlock::iterator &NextMBBI); 84 bool expandSVESpillFill(MachineBasicBlock &MBB, 85 MachineBasicBlock::iterator MBBI, unsigned Opc, 86 unsigned N); 87 bool expandCALL_RVMARKER(MachineBasicBlock &MBB, 88 MachineBasicBlock::iterator MBBI); 89 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); 90 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, 91 MachineBasicBlock::iterator MBBI); 92 }; 93 94 } // end anonymous namespace 95 96 char AArch64ExpandPseudo::ID = 0; 97 98 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 99 AARCH64_EXPAND_PSEUDO_NAME, false, false) 100 101 /// Transfer implicit operands on the pseudo instruction to the 102 /// instructions created from the expansion. 103 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 104 MachineInstrBuilder &DefMI) { 105 const MCInstrDesc &Desc = OldMI.getDesc(); 106 for (const MachineOperand &MO : 107 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) { 108 assert(MO.isReg() && MO.getReg()); 109 if (MO.isUse()) 110 UseMI.add(MO); 111 else 112 DefMI.add(MO); 113 } 114 } 115 116 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 117 /// real move-immediate instructions to synthesize the immediate. 118 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 119 MachineBasicBlock::iterator MBBI, 120 unsigned BitSize) { 121 MachineInstr &MI = *MBBI; 122 Register DstReg = MI.getOperand(0).getReg(); 123 uint64_t RenamableState = 124 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0; 125 uint64_t Imm = MI.getOperand(1).getImm(); 126 127 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 128 // Useless def, and we don't want to risk creating an invalid ORR (which 129 // would really write to sp). 130 MI.eraseFromParent(); 131 return true; 132 } 133 134 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 135 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 136 assert(Insn.size() != 0); 137 138 SmallVector<MachineInstrBuilder, 4> MIBS; 139 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 140 bool LastItem = std::next(I) == E; 141 switch (I->Opcode) 142 { 143 default: llvm_unreachable("unhandled!"); break; 144 145 case AArch64::ORRWri: 146 case AArch64::ORRXri: 147 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 148 .add(MI.getOperand(0)) 149 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 150 .addImm(I->Op2)); 151 break; 152 case AArch64::MOVNWi: 153 case AArch64::MOVNXi: 154 case AArch64::MOVZWi: 155 case AArch64::MOVZXi: { 156 bool DstIsDead = MI.getOperand(0).isDead(); 157 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 158 .addReg(DstReg, RegState::Define | 159 getDeadRegState(DstIsDead && LastItem) | 160 RenamableState) 161 .addImm(I->Op1) 162 .addImm(I->Op2)); 163 } break; 164 case AArch64::MOVKWi: 165 case AArch64::MOVKXi: { 166 Register DstReg = MI.getOperand(0).getReg(); 167 bool DstIsDead = MI.getOperand(0).isDead(); 168 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 169 .addReg(DstReg, 170 RegState::Define | 171 getDeadRegState(DstIsDead && LastItem) | 172 RenamableState) 173 .addReg(DstReg) 174 .addImm(I->Op1) 175 .addImm(I->Op2)); 176 } break; 177 } 178 } 179 transferImpOps(MI, MIBS.front(), MIBS.back()); 180 MI.eraseFromParent(); 181 return true; 182 } 183 184 bool AArch64ExpandPseudo::expandCMP_SWAP( 185 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 186 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 187 MachineBasicBlock::iterator &NextMBBI) { 188 MachineInstr &MI = *MBBI; 189 DebugLoc DL = MI.getDebugLoc(); 190 const MachineOperand &Dest = MI.getOperand(0); 191 Register StatusReg = MI.getOperand(1).getReg(); 192 bool StatusDead = MI.getOperand(1).isDead(); 193 // Duplicating undef operands into 2 instructions does not guarantee the same 194 // value on both; However undef should be replaced by xzr anyway. 195 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 196 Register AddrReg = MI.getOperand(2).getReg(); 197 Register DesiredReg = MI.getOperand(3).getReg(); 198 Register NewReg = MI.getOperand(4).getReg(); 199 200 MachineFunction *MF = MBB.getParent(); 201 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 202 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 203 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 204 205 MF->insert(++MBB.getIterator(), LoadCmpBB); 206 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 207 MF->insert(++StoreBB->getIterator(), DoneBB); 208 209 // .Lloadcmp: 210 // mov wStatus, 0 211 // ldaxr xDest, [xAddr] 212 // cmp xDest, xDesired 213 // b.ne .Ldone 214 if (!StatusDead) 215 BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg) 216 .addImm(0).addImm(0); 217 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) 218 .addReg(AddrReg); 219 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) 220 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 221 .addReg(DesiredReg) 222 .addImm(ExtendImm); 223 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) 224 .addImm(AArch64CC::NE) 225 .addMBB(DoneBB) 226 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 227 LoadCmpBB->addSuccessor(DoneBB); 228 LoadCmpBB->addSuccessor(StoreBB); 229 230 // .Lstore: 231 // stlxr wStatus, xNew, [xAddr] 232 // cbnz wStatus, .Lloadcmp 233 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg) 234 .addReg(NewReg) 235 .addReg(AddrReg); 236 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 237 .addReg(StatusReg, getKillRegState(StatusDead)) 238 .addMBB(LoadCmpBB); 239 StoreBB->addSuccessor(LoadCmpBB); 240 StoreBB->addSuccessor(DoneBB); 241 242 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 243 DoneBB->transferSuccessors(&MBB); 244 245 MBB.addSuccessor(LoadCmpBB); 246 247 NextMBBI = MBB.end(); 248 MI.eraseFromParent(); 249 250 // Recompute livein lists. 251 LivePhysRegs LiveRegs; 252 computeAndAddLiveIns(LiveRegs, *DoneBB); 253 computeAndAddLiveIns(LiveRegs, *StoreBB); 254 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 255 // Do an extra pass around the loop to get loop carried registers right. 256 StoreBB->clearLiveIns(); 257 computeAndAddLiveIns(LiveRegs, *StoreBB); 258 LoadCmpBB->clearLiveIns(); 259 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 260 261 return true; 262 } 263 264 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 265 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 266 MachineBasicBlock::iterator &NextMBBI) { 267 MachineInstr &MI = *MBBI; 268 DebugLoc DL = MI.getDebugLoc(); 269 MachineOperand &DestLo = MI.getOperand(0); 270 MachineOperand &DestHi = MI.getOperand(1); 271 Register StatusReg = MI.getOperand(2).getReg(); 272 bool StatusDead = MI.getOperand(2).isDead(); 273 // Duplicating undef operands into 2 instructions does not guarantee the same 274 // value on both; However undef should be replaced by xzr anyway. 275 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 276 Register AddrReg = MI.getOperand(3).getReg(); 277 Register DesiredLoReg = MI.getOperand(4).getReg(); 278 Register DesiredHiReg = MI.getOperand(5).getReg(); 279 Register NewLoReg = MI.getOperand(6).getReg(); 280 Register NewHiReg = MI.getOperand(7).getReg(); 281 282 unsigned LdxpOp, StxpOp; 283 284 switch (MI.getOpcode()) { 285 case AArch64::CMP_SWAP_128_MONOTONIC: 286 LdxpOp = AArch64::LDXPX; 287 StxpOp = AArch64::STXPX; 288 break; 289 case AArch64::CMP_SWAP_128_RELEASE: 290 LdxpOp = AArch64::LDXPX; 291 StxpOp = AArch64::STLXPX; 292 break; 293 case AArch64::CMP_SWAP_128_ACQUIRE: 294 LdxpOp = AArch64::LDAXPX; 295 StxpOp = AArch64::STXPX; 296 break; 297 case AArch64::CMP_SWAP_128: 298 LdxpOp = AArch64::LDAXPX; 299 StxpOp = AArch64::STLXPX; 300 break; 301 default: 302 llvm_unreachable("Unexpected opcode"); 303 } 304 305 MachineFunction *MF = MBB.getParent(); 306 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 307 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 308 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 309 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 310 311 MF->insert(++MBB.getIterator(), LoadCmpBB); 312 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 313 MF->insert(++StoreBB->getIterator(), FailBB); 314 MF->insert(++FailBB->getIterator(), DoneBB); 315 316 // .Lloadcmp: 317 // ldaxp xDestLo, xDestHi, [xAddr] 318 // cmp xDestLo, xDesiredLo 319 // sbcs xDestHi, xDesiredHi 320 // b.ne .Ldone 321 BuildMI(LoadCmpBB, DL, TII->get(LdxpOp)) 322 .addReg(DestLo.getReg(), RegState::Define) 323 .addReg(DestHi.getReg(), RegState::Define) 324 .addReg(AddrReg); 325 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 326 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 327 .addReg(DesiredLoReg) 328 .addImm(0); 329 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 330 .addUse(AArch64::WZR) 331 .addUse(AArch64::WZR) 332 .addImm(AArch64CC::EQ); 333 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 334 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 335 .addReg(DesiredHiReg) 336 .addImm(0); 337 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 338 .addUse(StatusReg, RegState::Kill) 339 .addUse(StatusReg, RegState::Kill) 340 .addImm(AArch64CC::EQ); 341 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW)) 342 .addUse(StatusReg, getKillRegState(StatusDead)) 343 .addMBB(FailBB); 344 LoadCmpBB->addSuccessor(FailBB); 345 LoadCmpBB->addSuccessor(StoreBB); 346 347 // .Lstore: 348 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 349 // cbnz wStatus, .Lloadcmp 350 BuildMI(StoreBB, DL, TII->get(StxpOp), StatusReg) 351 .addReg(NewLoReg) 352 .addReg(NewHiReg) 353 .addReg(AddrReg); 354 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 355 .addReg(StatusReg, getKillRegState(StatusDead)) 356 .addMBB(LoadCmpBB); 357 BuildMI(StoreBB, DL, TII->get(AArch64::B)).addMBB(DoneBB); 358 StoreBB->addSuccessor(LoadCmpBB); 359 StoreBB->addSuccessor(DoneBB); 360 361 // .Lfail: 362 // stlxp wStatus, xDestLo, xDestHi, [xAddr] 363 // cbnz wStatus, .Lloadcmp 364 BuildMI(FailBB, DL, TII->get(StxpOp), StatusReg) 365 .addReg(DestLo.getReg()) 366 .addReg(DestHi.getReg()) 367 .addReg(AddrReg); 368 BuildMI(FailBB, DL, TII->get(AArch64::CBNZW)) 369 .addReg(StatusReg, getKillRegState(StatusDead)) 370 .addMBB(LoadCmpBB); 371 FailBB->addSuccessor(LoadCmpBB); 372 FailBB->addSuccessor(DoneBB); 373 374 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 375 DoneBB->transferSuccessors(&MBB); 376 377 MBB.addSuccessor(LoadCmpBB); 378 379 NextMBBI = MBB.end(); 380 MI.eraseFromParent(); 381 382 // Recompute liveness bottom up. 383 LivePhysRegs LiveRegs; 384 computeAndAddLiveIns(LiveRegs, *DoneBB); 385 computeAndAddLiveIns(LiveRegs, *FailBB); 386 computeAndAddLiveIns(LiveRegs, *StoreBB); 387 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 388 389 // Do an extra pass in the loop to get the loop carried dependencies right. 390 FailBB->clearLiveIns(); 391 computeAndAddLiveIns(LiveRegs, *FailBB); 392 StoreBB->clearLiveIns(); 393 computeAndAddLiveIns(LiveRegs, *StoreBB); 394 LoadCmpBB->clearLiveIns(); 395 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 396 397 return true; 398 } 399 400 /// \brief Expand Pseudos to Instructions with destructive operands. 401 /// 402 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes 403 /// or for fixing relaxed register allocation conditions to comply with 404 /// the instructions register constraints. The latter case may be cheaper 405 /// than setting the register constraints in the register allocator, 406 /// since that will insert regular MOV instructions rather than MOVPRFX. 407 /// 408 /// Example (after register allocation): 409 /// 410 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 411 /// 412 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. 413 /// * We cannot map directly to FSUB_ZPmZ_B because the register 414 /// constraints of the instruction are not met. 415 /// * Also the _ZERO specifies the false lanes need to be zeroed. 416 /// 417 /// We first try to see if the destructive operand == result operand, 418 /// if not, we try to swap the operands, e.g. 419 /// 420 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 421 /// 422 /// But because FSUB_ZPmZ is not commutative, this is semantically 423 /// different, so we need a reverse instruction: 424 /// 425 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 426 /// 427 /// Then we implement the zeroing of the false lanes of Z0 by adding 428 /// a zeroing MOVPRFX instruction: 429 /// 430 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 431 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 432 /// 433 /// Note that this can only be done for _ZERO or _UNDEF variants where 434 /// we can guarantee the false lanes to be zeroed (by implementing this) 435 /// or that they are undef (don't care / not used), otherwise the 436 /// swapping of operands is illegal because the operation is not 437 /// (or cannot be emulated to be) fully commutative. 438 bool AArch64ExpandPseudo::expand_DestructiveOp( 439 MachineInstr &MI, 440 MachineBasicBlock &MBB, 441 MachineBasicBlock::iterator MBBI) { 442 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode()); 443 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; 444 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; 445 bool FalseZero = FalseLanes == AArch64::FalseLanesZero; 446 447 Register DstReg = MI.getOperand(0).getReg(); 448 bool DstIsDead = MI.getOperand(0).isDead(); 449 450 if (DType == AArch64::DestructiveBinary) 451 assert(DstReg != MI.getOperand(3).getReg()); 452 453 bool UseRev = false; 454 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; 455 switch (DType) { 456 case AArch64::DestructiveBinaryComm: 457 case AArch64::DestructiveBinaryCommWithRev: 458 if (DstReg == MI.getOperand(3).getReg()) { 459 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 460 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2); 461 UseRev = true; 462 break; 463 } 464 LLVM_FALLTHROUGH; 465 case AArch64::DestructiveBinary: 466 case AArch64::DestructiveBinaryImm: 467 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); 468 break; 469 case AArch64::DestructiveUnaryPassthru: 470 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3); 471 break; 472 case AArch64::DestructiveTernaryCommWithRev: 473 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4); 474 if (DstReg == MI.getOperand(3).getReg()) { 475 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za 476 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2); 477 UseRev = true; 478 } else if (DstReg == MI.getOperand(4).getReg()) { 479 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za 480 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2); 481 UseRev = true; 482 } 483 break; 484 default: 485 llvm_unreachable("Unsupported Destructive Operand type"); 486 } 487 488 #ifndef NDEBUG 489 // MOVPRFX can only be used if the destination operand 490 // is the destructive operand, not as any other operand, 491 // so the Destructive Operand must be unique. 492 bool DOPRegIsUnique = false; 493 switch (DType) { 494 case AArch64::DestructiveBinaryComm: 495 case AArch64::DestructiveBinaryCommWithRev: 496 DOPRegIsUnique = 497 DstReg != MI.getOperand(DOPIdx).getReg() || 498 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); 499 break; 500 case AArch64::DestructiveUnaryPassthru: 501 case AArch64::DestructiveBinaryImm: 502 DOPRegIsUnique = true; 503 break; 504 case AArch64::DestructiveTernaryCommWithRev: 505 DOPRegIsUnique = 506 DstReg != MI.getOperand(DOPIdx).getReg() || 507 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() && 508 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); 509 break; 510 } 511 #endif 512 513 // Resolve the reverse opcode 514 if (UseRev) { 515 int NewOpcode; 516 // e.g. DIV -> DIVR 517 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) 518 Opcode = NewOpcode; 519 // e.g. DIVR -> DIV 520 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) 521 Opcode = NewOpcode; 522 } 523 524 // Get the right MOVPRFX 525 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); 526 unsigned MovPrfx, MovPrfxZero; 527 switch (ElementSize) { 528 case AArch64::ElementSizeNone: 529 case AArch64::ElementSizeB: 530 MovPrfx = AArch64::MOVPRFX_ZZ; 531 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; 532 break; 533 case AArch64::ElementSizeH: 534 MovPrfx = AArch64::MOVPRFX_ZZ; 535 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; 536 break; 537 case AArch64::ElementSizeS: 538 MovPrfx = AArch64::MOVPRFX_ZZ; 539 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; 540 break; 541 case AArch64::ElementSizeD: 542 MovPrfx = AArch64::MOVPRFX_ZZ; 543 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; 544 break; 545 default: 546 llvm_unreachable("Unsupported ElementSize"); 547 } 548 549 // 550 // Create the destructive operation (if required) 551 // 552 MachineInstrBuilder PRFX, DOP; 553 if (FalseZero) { 554 #ifndef NDEBUG 555 assert(DOPRegIsUnique && "The destructive operand should be unique"); 556 #endif 557 assert(ElementSize != AArch64::ElementSizeNone && 558 "This instruction is unpredicated"); 559 560 // Merge source operand into destination register 561 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero)) 562 .addReg(DstReg, RegState::Define) 563 .addReg(MI.getOperand(PredIdx).getReg()) 564 .addReg(MI.getOperand(DOPIdx).getReg()); 565 566 // After the movprfx, the destructive operand is same as Dst 567 DOPIdx = 0; 568 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { 569 #ifndef NDEBUG 570 assert(DOPRegIsUnique && "The destructive operand should be unique"); 571 #endif 572 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) 573 .addReg(DstReg, RegState::Define) 574 .addReg(MI.getOperand(DOPIdx).getReg()); 575 DOPIdx = 0; 576 } 577 578 // 579 // Create the destructive operation 580 // 581 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) 582 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); 583 584 switch (DType) { 585 case AArch64::DestructiveUnaryPassthru: 586 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 587 .add(MI.getOperand(PredIdx)) 588 .add(MI.getOperand(SrcIdx)); 589 break; 590 case AArch64::DestructiveBinaryImm: 591 case AArch64::DestructiveBinaryComm: 592 case AArch64::DestructiveBinaryCommWithRev: 593 DOP.add(MI.getOperand(PredIdx)) 594 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 595 .add(MI.getOperand(SrcIdx)); 596 break; 597 case AArch64::DestructiveTernaryCommWithRev: 598 DOP.add(MI.getOperand(PredIdx)) 599 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 600 .add(MI.getOperand(SrcIdx)) 601 .add(MI.getOperand(Src2Idx)); 602 break; 603 } 604 605 if (PRFX) { 606 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); 607 transferImpOps(MI, PRFX, DOP); 608 } else 609 transferImpOps(MI, DOP, DOP); 610 611 MI.eraseFromParent(); 612 return true; 613 } 614 615 bool AArch64ExpandPseudo::expandSetTagLoop( 616 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 617 MachineBasicBlock::iterator &NextMBBI) { 618 MachineInstr &MI = *MBBI; 619 DebugLoc DL = MI.getDebugLoc(); 620 Register SizeReg = MI.getOperand(0).getReg(); 621 Register AddressReg = MI.getOperand(1).getReg(); 622 623 MachineFunction *MF = MBB.getParent(); 624 625 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; 626 const unsigned OpCode1 = 627 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; 628 const unsigned OpCode2 = 629 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 630 631 unsigned Size = MI.getOperand(2).getImm(); 632 assert(Size > 0 && Size % 16 == 0); 633 if (Size % (16 * 2) != 0) { 634 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) 635 .addReg(AddressReg) 636 .addReg(AddressReg) 637 .addImm(1); 638 Size -= 16; 639 } 640 MachineBasicBlock::iterator I = 641 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) 642 .addImm(Size); 643 expandMOVImm(MBB, I, 64); 644 645 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 646 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 647 648 MF->insert(++MBB.getIterator(), LoopBB); 649 MF->insert(++LoopBB->getIterator(), DoneBB); 650 651 BuildMI(LoopBB, DL, TII->get(OpCode2)) 652 .addDef(AddressReg) 653 .addReg(AddressReg) 654 .addReg(AddressReg) 655 .addImm(2) 656 .cloneMemRefs(MI) 657 .setMIFlags(MI.getFlags()); 658 BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri)) 659 .addDef(SizeReg) 660 .addReg(SizeReg) 661 .addImm(16 * 2) 662 .addImm(0); 663 BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB); 664 665 LoopBB->addSuccessor(LoopBB); 666 LoopBB->addSuccessor(DoneBB); 667 668 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 669 DoneBB->transferSuccessors(&MBB); 670 671 MBB.addSuccessor(LoopBB); 672 673 NextMBBI = MBB.end(); 674 MI.eraseFromParent(); 675 // Recompute liveness bottom up. 676 LivePhysRegs LiveRegs; 677 computeAndAddLiveIns(LiveRegs, *DoneBB); 678 computeAndAddLiveIns(LiveRegs, *LoopBB); 679 // Do an extra pass in the loop to get the loop carried dependencies right. 680 // FIXME: is this necessary? 681 LoopBB->clearLiveIns(); 682 computeAndAddLiveIns(LiveRegs, *LoopBB); 683 DoneBB->clearLiveIns(); 684 computeAndAddLiveIns(LiveRegs, *DoneBB); 685 686 return true; 687 } 688 689 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, 690 MachineBasicBlock::iterator MBBI, 691 unsigned Opc, unsigned N) { 692 const TargetRegisterInfo *TRI = 693 MBB.getParent()->getSubtarget().getRegisterInfo(); 694 MachineInstr &MI = *MBBI; 695 for (unsigned Offset = 0; Offset < N; ++Offset) { 696 int ImmOffset = MI.getOperand(2).getImm() + Offset; 697 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; 698 assert(ImmOffset >= -256 && ImmOffset < 256 && 699 "Immediate spill offset out of range"); 700 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 701 .addReg( 702 TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset), 703 Opc == AArch64::LDR_ZXI ? RegState::Define : 0) 704 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) 705 .addImm(ImmOffset); 706 } 707 MI.eraseFromParent(); 708 return true; 709 } 710 711 bool AArch64ExpandPseudo::expandCALL_RVMARKER( 712 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 713 // Expand CALL_RVMARKER pseudo to: 714 // - a branch to the call target, followed by 715 // - the special `mov x29, x29` marker, and 716 // - another branch, to the runtime function 717 // Mark the sequence as bundle, to avoid passes moving other code in between. 718 MachineInstr &MI = *MBBI; 719 720 MachineInstr *OriginalCall; 721 MachineOperand &RVTarget = MI.getOperand(0); 722 MachineOperand &CallTarget = MI.getOperand(1); 723 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 724 "invalid operand for regular call"); 725 assert(RVTarget.isGlobal() && "invalid operand for attached call"); 726 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 727 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 728 OriginalCall->addOperand(CallTarget); 729 730 unsigned RegMaskStartIdx = 2; 731 // Skip register arguments. Those are added during ISel, but are not 732 // needed for the concrete branch. 733 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { 734 auto MOP = MI.getOperand(RegMaskStartIdx); 735 assert(MOP.isReg() && "can only add register operands"); 736 OriginalCall->addOperand(MachineOperand::CreateReg( 737 MOP.getReg(), /*Def=*/false, /*Implicit=*/true)); 738 RegMaskStartIdx++; 739 } 740 for (const MachineOperand &MO : 741 llvm::drop_begin(MI.operands(), RegMaskStartIdx)) 742 OriginalCall->addOperand(MO); 743 744 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 745 .addReg(AArch64::FP, RegState::Define) 746 .addReg(AArch64::XZR) 747 .addReg(AArch64::FP) 748 .addImm(0); 749 750 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL)) 751 .add(RVTarget) 752 .getInstr(); 753 754 if (MI.shouldUpdateCallSiteInfo()) 755 MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall); 756 757 MI.eraseFromParent(); 758 finalizeBundle(MBB, OriginalCall->getIterator(), 759 std::next(RVCall->getIterator())); 760 return true; 761 } 762 763 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, 764 MachineBasicBlock::iterator MBBI) { 765 // Expand CALL_BTI pseudo to: 766 // - a branch to the call target 767 // - a BTI instruction 768 // Mark the sequence as a bundle, to avoid passes moving other code in 769 // between. 770 771 MachineInstr &MI = *MBBI; 772 MachineOperand &CallTarget = MI.getOperand(0); 773 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 774 "invalid operand for regular call"); 775 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 776 MachineInstr *Call = 777 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 778 Call->addOperand(CallTarget); 779 780 MachineInstr *BTI = 781 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT)) 782 // BTI J so that setjmp can to BR to this. 783 .addImm(36) 784 .getInstr(); 785 786 if (MI.shouldUpdateCallSiteInfo()) 787 MBB.getParent()->moveCallSiteInfo(&MI, Call); 788 789 MI.eraseFromParent(); 790 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator())); 791 return true; 792 } 793 794 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( 795 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 796 Register CtxReg = MBBI->getOperand(0).getReg(); 797 Register BaseReg = MBBI->getOperand(1).getReg(); 798 int Offset = MBBI->getOperand(2).getImm(); 799 DebugLoc DL(MBBI->getDebugLoc()); 800 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 801 802 if (STI.getTargetTriple().getArchName() != "arm64e") { 803 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 804 .addUse(CtxReg) 805 .addUse(BaseReg) 806 .addImm(Offset / 8) 807 .setMIFlag(MachineInstr::FrameSetup); 808 MBBI->eraseFromParent(); 809 return true; 810 } 811 812 // We need to sign the context in an address-discriminated way. 0xc31a is a 813 // fixed random value, chosen as part of the ABI. 814 // add x16, xBase, #Offset 815 // movk x16, #0xc31a, lsl #48 816 // mov x17, x22/xzr 817 // pacdb x17, x16 818 // str x17, [xBase, #Offset] 819 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; 820 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16) 821 .addUse(BaseReg) 822 .addImm(abs(Offset)) 823 .addImm(0) 824 .setMIFlag(MachineInstr::FrameSetup); 825 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16) 826 .addUse(AArch64::X16) 827 .addImm(0xc31a) 828 .addImm(48) 829 .setMIFlag(MachineInstr::FrameSetup); 830 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so 831 // move it somewhere before signing. 832 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) 833 .addUse(AArch64::XZR) 834 .addUse(CtxReg) 835 .addImm(0) 836 .setMIFlag(MachineInstr::FrameSetup); 837 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17) 838 .addUse(AArch64::X17) 839 .addUse(AArch64::X16) 840 .setMIFlag(MachineInstr::FrameSetup); 841 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 842 .addUse(AArch64::X17) 843 .addUse(BaseReg) 844 .addImm(Offset / 8) 845 .setMIFlag(MachineInstr::FrameSetup); 846 847 MBBI->eraseFromParent(); 848 return true; 849 } 850 851 /// If MBBI references a pseudo instruction that should be expanded here, 852 /// do the expansion and return true. Otherwise return false. 853 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 854 MachineBasicBlock::iterator MBBI, 855 MachineBasicBlock::iterator &NextMBBI) { 856 MachineInstr &MI = *MBBI; 857 unsigned Opcode = MI.getOpcode(); 858 859 // Check if we can expand the destructive op 860 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode()); 861 if (OrigInstr != -1) { 862 auto &Orig = TII->get(OrigInstr); 863 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) 864 != AArch64::NotDestructive) { 865 return expand_DestructiveOp(MI, MBB, MBBI); 866 } 867 } 868 869 switch (Opcode) { 870 default: 871 break; 872 873 case AArch64::BSPv8i8: 874 case AArch64::BSPv16i8: { 875 Register DstReg = MI.getOperand(0).getReg(); 876 if (DstReg == MI.getOperand(3).getReg()) { 877 // Expand to BIT 878 BuildMI(MBB, MBBI, MI.getDebugLoc(), 879 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 880 : AArch64::BITv16i8)) 881 .add(MI.getOperand(0)) 882 .add(MI.getOperand(3)) 883 .add(MI.getOperand(2)) 884 .add(MI.getOperand(1)); 885 } else if (DstReg == MI.getOperand(2).getReg()) { 886 // Expand to BIF 887 BuildMI(MBB, MBBI, MI.getDebugLoc(), 888 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 889 : AArch64::BIFv16i8)) 890 .add(MI.getOperand(0)) 891 .add(MI.getOperand(2)) 892 .add(MI.getOperand(3)) 893 .add(MI.getOperand(1)); 894 } else { 895 // Expand to BSL, use additional move if required 896 if (DstReg == MI.getOperand(1).getReg()) { 897 BuildMI(MBB, MBBI, MI.getDebugLoc(), 898 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 899 : AArch64::BSLv16i8)) 900 .add(MI.getOperand(0)) 901 .add(MI.getOperand(1)) 902 .add(MI.getOperand(2)) 903 .add(MI.getOperand(3)); 904 } else { 905 BuildMI(MBB, MBBI, MI.getDebugLoc(), 906 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 907 : AArch64::ORRv16i8)) 908 .addReg(DstReg, 909 RegState::Define | 910 getRenamableRegState(MI.getOperand(0).isRenamable())) 911 .add(MI.getOperand(1)) 912 .add(MI.getOperand(1)); 913 BuildMI(MBB, MBBI, MI.getDebugLoc(), 914 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 915 : AArch64::BSLv16i8)) 916 .add(MI.getOperand(0)) 917 .addReg(DstReg, 918 RegState::Kill | 919 getRenamableRegState(MI.getOperand(0).isRenamable())) 920 .add(MI.getOperand(2)) 921 .add(MI.getOperand(3)); 922 } 923 } 924 MI.eraseFromParent(); 925 return true; 926 } 927 928 case AArch64::ADDWrr: 929 case AArch64::SUBWrr: 930 case AArch64::ADDXrr: 931 case AArch64::SUBXrr: 932 case AArch64::ADDSWrr: 933 case AArch64::SUBSWrr: 934 case AArch64::ADDSXrr: 935 case AArch64::SUBSXrr: 936 case AArch64::ANDWrr: 937 case AArch64::ANDXrr: 938 case AArch64::BICWrr: 939 case AArch64::BICXrr: 940 case AArch64::ANDSWrr: 941 case AArch64::ANDSXrr: 942 case AArch64::BICSWrr: 943 case AArch64::BICSXrr: 944 case AArch64::EONWrr: 945 case AArch64::EONXrr: 946 case AArch64::EORWrr: 947 case AArch64::EORXrr: 948 case AArch64::ORNWrr: 949 case AArch64::ORNXrr: 950 case AArch64::ORRWrr: 951 case AArch64::ORRXrr: { 952 unsigned Opcode; 953 switch (MI.getOpcode()) { 954 default: 955 return false; 956 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 957 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 958 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 959 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 960 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 961 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 962 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 963 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 964 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 965 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 966 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 967 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 968 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 969 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 970 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 971 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 972 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 973 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 974 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 975 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 976 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 977 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 978 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 979 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 980 } 981 MachineFunction &MF = *MBB.getParent(); 982 // Try to create new inst without implicit operands added. 983 MachineInstr *NewMI = MF.CreateMachineInstr( 984 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true); 985 MBB.insert(MBBI, NewMI); 986 MachineInstrBuilder MIB1(MF, NewMI); 987 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define) 988 .add(MI.getOperand(1)) 989 .add(MI.getOperand(2)) 990 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 991 transferImpOps(MI, MIB1, MIB1); 992 MI.eraseFromParent(); 993 return true; 994 } 995 996 case AArch64::LOADgot: { 997 MachineFunction *MF = MBB.getParent(); 998 Register DstReg = MI.getOperand(0).getReg(); 999 const MachineOperand &MO1 = MI.getOperand(1); 1000 unsigned Flags = MO1.getTargetFlags(); 1001 1002 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 1003 // Tiny codemodel expand to LDR 1004 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1005 TII->get(AArch64::LDRXl), DstReg); 1006 1007 if (MO1.isGlobal()) { 1008 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 1009 } else if (MO1.isSymbol()) { 1010 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 1011 } else { 1012 assert(MO1.isCPI() && 1013 "Only expect globals, externalsymbols, or constant pools"); 1014 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 1015 } 1016 } else { 1017 // Small codemodel expand into ADRP + LDR. 1018 MachineFunction &MF = *MI.getParent()->getParent(); 1019 DebugLoc DL = MI.getDebugLoc(); 1020 MachineInstrBuilder MIB1 = 1021 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 1022 1023 MachineInstrBuilder MIB2; 1024 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 1025 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 1026 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 1027 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 1028 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 1029 .addDef(Reg32) 1030 .addReg(DstReg, RegState::Kill) 1031 .addReg(DstReg, DstFlags | RegState::Implicit); 1032 } else { 1033 Register DstReg = MI.getOperand(0).getReg(); 1034 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 1035 .add(MI.getOperand(0)) 1036 .addUse(DstReg, RegState::Kill); 1037 } 1038 1039 if (MO1.isGlobal()) { 1040 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 1041 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 1042 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1043 } else if (MO1.isSymbol()) { 1044 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 1045 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 1046 AArch64II::MO_PAGEOFF | 1047 AArch64II::MO_NC); 1048 } else { 1049 assert(MO1.isCPI() && 1050 "Only expect globals, externalsymbols, or constant pools"); 1051 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1052 Flags | AArch64II::MO_PAGE); 1053 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1054 Flags | AArch64II::MO_PAGEOFF | 1055 AArch64II::MO_NC); 1056 } 1057 1058 transferImpOps(MI, MIB1, MIB2); 1059 } 1060 MI.eraseFromParent(); 1061 return true; 1062 } 1063 case AArch64::MOVaddrBA: { 1064 MachineFunction &MF = *MI.getParent()->getParent(); 1065 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) { 1066 // blockaddress expressions have to come from a constant pool because the 1067 // largest addend (and hence offset within a function) allowed for ADRP is 1068 // only 8MB. 1069 const BlockAddress *BA = MI.getOperand(1).getBlockAddress(); 1070 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset"); 1071 1072 MachineConstantPool *MCP = MF.getConstantPool(); 1073 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8)); 1074 1075 Register DstReg = MI.getOperand(0).getReg(); 1076 auto MIB1 = 1077 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1078 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 1079 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1080 TII->get(AArch64::LDRXui), DstReg) 1081 .addUse(DstReg) 1082 .addConstantPoolIndex( 1083 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1084 transferImpOps(MI, MIB1, MIB2); 1085 MI.eraseFromParent(); 1086 return true; 1087 } 1088 } 1089 LLVM_FALLTHROUGH; 1090 case AArch64::MOVaddr: 1091 case AArch64::MOVaddrJT: 1092 case AArch64::MOVaddrCP: 1093 case AArch64::MOVaddrTLS: 1094 case AArch64::MOVaddrEXT: { 1095 // Expand into ADRP + ADD. 1096 Register DstReg = MI.getOperand(0).getReg(); 1097 assert(DstReg != AArch64::XZR); 1098 MachineInstrBuilder MIB1 = 1099 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1100 .add(MI.getOperand(1)); 1101 1102 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 1103 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 1104 // We do so by creating a MOVK that sets bits 48-63 of the register to 1105 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 1106 // the small code model so we can assume a binary size of <= 4GB, which 1107 // makes the untagged PC relative offset positive. The binary must also be 1108 // loaded into address range [0, 2^48). Both of these properties need to 1109 // be ensured at runtime when using tagged addresses. 1110 auto Tag = MI.getOperand(1); 1111 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 1112 Tag.setOffset(0x100000000); 1113 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 1114 .addReg(DstReg) 1115 .add(Tag) 1116 .addImm(48); 1117 } 1118 1119 MachineInstrBuilder MIB2 = 1120 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1121 .add(MI.getOperand(0)) 1122 .addReg(DstReg) 1123 .add(MI.getOperand(2)) 1124 .addImm(0); 1125 1126 transferImpOps(MI, MIB1, MIB2); 1127 MI.eraseFromParent(); 1128 return true; 1129 } 1130 case AArch64::ADDlowTLS: 1131 // Produce a plain ADD 1132 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1133 .add(MI.getOperand(0)) 1134 .add(MI.getOperand(1)) 1135 .add(MI.getOperand(2)) 1136 .addImm(0); 1137 MI.eraseFromParent(); 1138 return true; 1139 1140 case AArch64::MOVbaseTLS: { 1141 Register DstReg = MI.getOperand(0).getReg(); 1142 auto SysReg = AArch64SysReg::TPIDR_EL0; 1143 MachineFunction *MF = MBB.getParent(); 1144 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 1145 SysReg = AArch64SysReg::TPIDR_EL3; 1146 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 1147 SysReg = AArch64SysReg::TPIDR_EL2; 1148 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 1149 SysReg = AArch64SysReg::TPIDR_EL1; 1150 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 1151 .addImm(SysReg); 1152 MI.eraseFromParent(); 1153 return true; 1154 } 1155 1156 case AArch64::MOVi32imm: 1157 return expandMOVImm(MBB, MBBI, 32); 1158 case AArch64::MOVi64imm: 1159 return expandMOVImm(MBB, MBBI, 64); 1160 case AArch64::RET_ReallyLR: { 1161 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 1162 // function and missing live-ins. We are fine in practice because callee 1163 // saved register handling ensures the register value is restored before 1164 // RET, but we need the undef flag here to appease the MachineVerifier 1165 // liveness checks. 1166 MachineInstrBuilder MIB = 1167 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 1168 .addReg(AArch64::LR, RegState::Undef); 1169 transferImpOps(MI, MIB, MIB); 1170 MI.eraseFromParent(); 1171 return true; 1172 } 1173 case AArch64::CMP_SWAP_8: 1174 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 1175 AArch64::SUBSWrx, 1176 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 1177 AArch64::WZR, NextMBBI); 1178 case AArch64::CMP_SWAP_16: 1179 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 1180 AArch64::SUBSWrx, 1181 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 1182 AArch64::WZR, NextMBBI); 1183 case AArch64::CMP_SWAP_32: 1184 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 1185 AArch64::SUBSWrs, 1186 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1187 AArch64::WZR, NextMBBI); 1188 case AArch64::CMP_SWAP_64: 1189 return expandCMP_SWAP(MBB, MBBI, 1190 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 1191 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1192 AArch64::XZR, NextMBBI); 1193 case AArch64::CMP_SWAP_128: 1194 case AArch64::CMP_SWAP_128_RELEASE: 1195 case AArch64::CMP_SWAP_128_ACQUIRE: 1196 case AArch64::CMP_SWAP_128_MONOTONIC: 1197 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 1198 1199 case AArch64::AESMCrrTied: 1200 case AArch64::AESIMCrrTied: { 1201 MachineInstrBuilder MIB = 1202 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1203 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 1204 AArch64::AESIMCrr)) 1205 .add(MI.getOperand(0)) 1206 .add(MI.getOperand(1)); 1207 transferImpOps(MI, MIB, MIB); 1208 MI.eraseFromParent(); 1209 return true; 1210 } 1211 case AArch64::IRGstack: { 1212 MachineFunction &MF = *MBB.getParent(); 1213 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1214 const AArch64FrameLowering *TFI = 1215 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 1216 1217 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 1218 // almost always point to SP-after-prologue; if not, emit a longer 1219 // instruction sequence. 1220 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 1221 Register FrameReg; 1222 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 1223 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 1224 /*PreferFP=*/false, 1225 /*ForSimm=*/true); 1226 Register SrcReg = FrameReg; 1227 if (FrameRegOffset) { 1228 // Use output register as temporary. 1229 SrcReg = MI.getOperand(0).getReg(); 1230 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 1231 FrameRegOffset, TII); 1232 } 1233 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 1234 .add(MI.getOperand(0)) 1235 .addUse(SrcReg) 1236 .add(MI.getOperand(2)); 1237 MI.eraseFromParent(); 1238 return true; 1239 } 1240 case AArch64::TAGPstack: { 1241 int64_t Offset = MI.getOperand(2).getImm(); 1242 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1243 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) 1244 .add(MI.getOperand(0)) 1245 .add(MI.getOperand(1)) 1246 .addImm(std::abs(Offset)) 1247 .add(MI.getOperand(4)); 1248 MI.eraseFromParent(); 1249 return true; 1250 } 1251 case AArch64::STGloop_wback: 1252 case AArch64::STZGloop_wback: 1253 return expandSetTagLoop(MBB, MBBI, NextMBBI); 1254 case AArch64::STGloop: 1255 case AArch64::STZGloop: 1256 report_fatal_error( 1257 "Non-writeback variants of STGloop / STZGloop should not " 1258 "survive past PrologEpilogInserter."); 1259 case AArch64::STR_ZZZZXI: 1260 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); 1261 case AArch64::STR_ZZZXI: 1262 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); 1263 case AArch64::STR_ZZXI: 1264 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); 1265 case AArch64::LDR_ZZZZXI: 1266 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); 1267 case AArch64::LDR_ZZZXI: 1268 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); 1269 case AArch64::LDR_ZZXI: 1270 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); 1271 case AArch64::BLR_RVMARKER: 1272 return expandCALL_RVMARKER(MBB, MBBI); 1273 case AArch64::BLR_BTI: 1274 return expandCALL_BTI(MBB, MBBI); 1275 case AArch64::StoreSwiftAsyncContext: 1276 return expandStoreSwiftAsyncContext(MBB, MBBI); 1277 } 1278 return false; 1279 } 1280 1281 /// Iterate over the instructions in basic block MBB and expand any 1282 /// pseudo instructions. Return true if anything was modified. 1283 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 1284 bool Modified = false; 1285 1286 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1287 while (MBBI != E) { 1288 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 1289 Modified |= expandMI(MBB, MBBI, NMBBI); 1290 MBBI = NMBBI; 1291 } 1292 1293 return Modified; 1294 } 1295 1296 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 1297 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 1298 1299 bool Modified = false; 1300 for (auto &MBB : MF) 1301 Modified |= expandMBB(MBB); 1302 return Modified; 1303 } 1304 1305 /// Returns an instance of the pseudo instruction expansion pass. 1306 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 1307 return new AArch64ExpandPseudo(); 1308 } 1309