1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Triple.h" 24 #include "llvm/CodeGen/LivePhysRegs.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineConstantPool.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/CodeGen/TargetSubtargetInfo.h" 33 #include "llvm/IR/DebugLoc.h" 34 #include "llvm/MC/MCInstrDesc.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/CodeGen.h" 37 #include "llvm/Support/MathExtras.h" 38 #include "llvm/Target/TargetMachine.h" 39 #include <cassert> 40 #include <cstdint> 41 #include <iterator> 42 #include <limits> 43 #include <utility> 44 45 using namespace llvm; 46 47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 48 49 namespace { 50 51 class AArch64ExpandPseudo : public MachineFunctionPass { 52 public: 53 const AArch64InstrInfo *TII; 54 55 static char ID; 56 57 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 58 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 59 } 60 61 bool runOnMachineFunction(MachineFunction &Fn) override; 62 63 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 64 65 private: 66 bool expandMBB(MachineBasicBlock &MBB); 67 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 68 MachineBasicBlock::iterator &NextMBBI); 69 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 70 unsigned BitSize); 71 72 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB, 73 MachineBasicBlock::iterator MBBI); 74 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 75 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 76 unsigned ExtendImm, unsigned ZeroReg, 77 MachineBasicBlock::iterator &NextMBBI); 78 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 79 MachineBasicBlock::iterator MBBI, 80 MachineBasicBlock::iterator &NextMBBI); 81 bool expandSetTagLoop(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI, 83 MachineBasicBlock::iterator &NextMBBI); 84 bool expandSVESpillFill(MachineBasicBlock &MBB, 85 MachineBasicBlock::iterator MBBI, unsigned Opc, 86 unsigned N); 87 bool expandCALL_RVMARKER(MachineBasicBlock &MBB, 88 MachineBasicBlock::iterator MBBI); 89 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, 90 MachineBasicBlock::iterator MBBI); 91 }; 92 93 } // end anonymous namespace 94 95 char AArch64ExpandPseudo::ID = 0; 96 97 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 98 AARCH64_EXPAND_PSEUDO_NAME, false, false) 99 100 /// Transfer implicit operands on the pseudo instruction to the 101 /// instructions created from the expansion. 102 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 103 MachineInstrBuilder &DefMI) { 104 const MCInstrDesc &Desc = OldMI.getDesc(); 105 for (const MachineOperand &MO : 106 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) { 107 assert(MO.isReg() && MO.getReg()); 108 if (MO.isUse()) 109 UseMI.add(MO); 110 else 111 DefMI.add(MO); 112 } 113 } 114 115 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 116 /// real move-immediate instructions to synthesize the immediate. 117 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 118 MachineBasicBlock::iterator MBBI, 119 unsigned BitSize) { 120 MachineInstr &MI = *MBBI; 121 Register DstReg = MI.getOperand(0).getReg(); 122 uint64_t RenamableState = 123 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0; 124 uint64_t Imm = MI.getOperand(1).getImm(); 125 126 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 127 // Useless def, and we don't want to risk creating an invalid ORR (which 128 // would really write to sp). 129 MI.eraseFromParent(); 130 return true; 131 } 132 133 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 134 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 135 assert(Insn.size() != 0); 136 137 SmallVector<MachineInstrBuilder, 4> MIBS; 138 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 139 bool LastItem = std::next(I) == E; 140 switch (I->Opcode) 141 { 142 default: llvm_unreachable("unhandled!"); break; 143 144 case AArch64::ORRWri: 145 case AArch64::ORRXri: 146 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 147 .add(MI.getOperand(0)) 148 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 149 .addImm(I->Op2)); 150 break; 151 case AArch64::MOVNWi: 152 case AArch64::MOVNXi: 153 case AArch64::MOVZWi: 154 case AArch64::MOVZXi: { 155 bool DstIsDead = MI.getOperand(0).isDead(); 156 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 157 .addReg(DstReg, RegState::Define | 158 getDeadRegState(DstIsDead && LastItem) | 159 RenamableState) 160 .addImm(I->Op1) 161 .addImm(I->Op2)); 162 } break; 163 case AArch64::MOVKWi: 164 case AArch64::MOVKXi: { 165 Register DstReg = MI.getOperand(0).getReg(); 166 bool DstIsDead = MI.getOperand(0).isDead(); 167 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 168 .addReg(DstReg, 169 RegState::Define | 170 getDeadRegState(DstIsDead && LastItem) | 171 RenamableState) 172 .addReg(DstReg) 173 .addImm(I->Op1) 174 .addImm(I->Op2)); 175 } break; 176 } 177 } 178 transferImpOps(MI, MIBS.front(), MIBS.back()); 179 MI.eraseFromParent(); 180 return true; 181 } 182 183 bool AArch64ExpandPseudo::expandCMP_SWAP( 184 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 185 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 186 MachineBasicBlock::iterator &NextMBBI) { 187 MachineInstr &MI = *MBBI; 188 DebugLoc DL = MI.getDebugLoc(); 189 const MachineOperand &Dest = MI.getOperand(0); 190 Register StatusReg = MI.getOperand(1).getReg(); 191 bool StatusDead = MI.getOperand(1).isDead(); 192 // Duplicating undef operands into 2 instructions does not guarantee the same 193 // value on both; However undef should be replaced by xzr anyway. 194 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 195 Register AddrReg = MI.getOperand(2).getReg(); 196 Register DesiredReg = MI.getOperand(3).getReg(); 197 Register NewReg = MI.getOperand(4).getReg(); 198 199 MachineFunction *MF = MBB.getParent(); 200 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 201 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 202 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 203 204 MF->insert(++MBB.getIterator(), LoadCmpBB); 205 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 206 MF->insert(++StoreBB->getIterator(), DoneBB); 207 208 // .Lloadcmp: 209 // mov wStatus, 0 210 // ldaxr xDest, [xAddr] 211 // cmp xDest, xDesired 212 // b.ne .Ldone 213 if (!StatusDead) 214 BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg) 215 .addImm(0).addImm(0); 216 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) 217 .addReg(AddrReg); 218 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) 219 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 220 .addReg(DesiredReg) 221 .addImm(ExtendImm); 222 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) 223 .addImm(AArch64CC::NE) 224 .addMBB(DoneBB) 225 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 226 LoadCmpBB->addSuccessor(DoneBB); 227 LoadCmpBB->addSuccessor(StoreBB); 228 229 // .Lstore: 230 // stlxr wStatus, xNew, [xAddr] 231 // cbnz wStatus, .Lloadcmp 232 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg) 233 .addReg(NewReg) 234 .addReg(AddrReg); 235 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 236 .addReg(StatusReg, getKillRegState(StatusDead)) 237 .addMBB(LoadCmpBB); 238 StoreBB->addSuccessor(LoadCmpBB); 239 StoreBB->addSuccessor(DoneBB); 240 241 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 242 DoneBB->transferSuccessors(&MBB); 243 244 MBB.addSuccessor(LoadCmpBB); 245 246 NextMBBI = MBB.end(); 247 MI.eraseFromParent(); 248 249 // Recompute livein lists. 250 LivePhysRegs LiveRegs; 251 computeAndAddLiveIns(LiveRegs, *DoneBB); 252 computeAndAddLiveIns(LiveRegs, *StoreBB); 253 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 254 // Do an extra pass around the loop to get loop carried registers right. 255 StoreBB->clearLiveIns(); 256 computeAndAddLiveIns(LiveRegs, *StoreBB); 257 LoadCmpBB->clearLiveIns(); 258 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 259 260 return true; 261 } 262 263 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 264 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 265 MachineBasicBlock::iterator &NextMBBI) { 266 MachineInstr &MI = *MBBI; 267 DebugLoc DL = MI.getDebugLoc(); 268 MachineOperand &DestLo = MI.getOperand(0); 269 MachineOperand &DestHi = MI.getOperand(1); 270 Register StatusReg = MI.getOperand(2).getReg(); 271 bool StatusDead = MI.getOperand(2).isDead(); 272 // Duplicating undef operands into 2 instructions does not guarantee the same 273 // value on both; However undef should be replaced by xzr anyway. 274 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 275 Register AddrReg = MI.getOperand(3).getReg(); 276 Register DesiredLoReg = MI.getOperand(4).getReg(); 277 Register DesiredHiReg = MI.getOperand(5).getReg(); 278 Register NewLoReg = MI.getOperand(6).getReg(); 279 Register NewHiReg = MI.getOperand(7).getReg(); 280 281 unsigned LdxpOp, StxpOp; 282 283 switch (MI.getOpcode()) { 284 case AArch64::CMP_SWAP_128_MONOTONIC: 285 LdxpOp = AArch64::LDXPX; 286 StxpOp = AArch64::STXPX; 287 break; 288 case AArch64::CMP_SWAP_128_RELEASE: 289 LdxpOp = AArch64::LDXPX; 290 StxpOp = AArch64::STLXPX; 291 break; 292 case AArch64::CMP_SWAP_128_ACQUIRE: 293 LdxpOp = AArch64::LDAXPX; 294 StxpOp = AArch64::STXPX; 295 break; 296 case AArch64::CMP_SWAP_128: 297 LdxpOp = AArch64::LDAXPX; 298 StxpOp = AArch64::STLXPX; 299 break; 300 default: 301 llvm_unreachable("Unexpected opcode"); 302 } 303 304 MachineFunction *MF = MBB.getParent(); 305 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 306 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 307 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 308 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 309 310 MF->insert(++MBB.getIterator(), LoadCmpBB); 311 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 312 MF->insert(++StoreBB->getIterator(), FailBB); 313 MF->insert(++FailBB->getIterator(), DoneBB); 314 315 // .Lloadcmp: 316 // ldaxp xDestLo, xDestHi, [xAddr] 317 // cmp xDestLo, xDesiredLo 318 // sbcs xDestHi, xDesiredHi 319 // b.ne .Ldone 320 BuildMI(LoadCmpBB, DL, TII->get(LdxpOp)) 321 .addReg(DestLo.getReg(), RegState::Define) 322 .addReg(DestHi.getReg(), RegState::Define) 323 .addReg(AddrReg); 324 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 325 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 326 .addReg(DesiredLoReg) 327 .addImm(0); 328 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 329 .addUse(AArch64::WZR) 330 .addUse(AArch64::WZR) 331 .addImm(AArch64CC::EQ); 332 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 333 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 334 .addReg(DesiredHiReg) 335 .addImm(0); 336 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 337 .addUse(StatusReg, RegState::Kill) 338 .addUse(StatusReg, RegState::Kill) 339 .addImm(AArch64CC::EQ); 340 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW)) 341 .addUse(StatusReg, getKillRegState(StatusDead)) 342 .addMBB(FailBB); 343 LoadCmpBB->addSuccessor(FailBB); 344 LoadCmpBB->addSuccessor(StoreBB); 345 346 // .Lstore: 347 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 348 // cbnz wStatus, .Lloadcmp 349 BuildMI(StoreBB, DL, TII->get(StxpOp), StatusReg) 350 .addReg(NewLoReg) 351 .addReg(NewHiReg) 352 .addReg(AddrReg); 353 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 354 .addReg(StatusReg, getKillRegState(StatusDead)) 355 .addMBB(LoadCmpBB); 356 BuildMI(StoreBB, DL, TII->get(AArch64::B)).addMBB(DoneBB); 357 StoreBB->addSuccessor(LoadCmpBB); 358 StoreBB->addSuccessor(DoneBB); 359 360 // .Lfail: 361 // stlxp wStatus, xDestLo, xDestHi, [xAddr] 362 // cbnz wStatus, .Lloadcmp 363 BuildMI(FailBB, DL, TII->get(StxpOp), StatusReg) 364 .addReg(DestLo.getReg()) 365 .addReg(DestHi.getReg()) 366 .addReg(AddrReg); 367 BuildMI(FailBB, DL, TII->get(AArch64::CBNZW)) 368 .addReg(StatusReg, getKillRegState(StatusDead)) 369 .addMBB(LoadCmpBB); 370 FailBB->addSuccessor(LoadCmpBB); 371 FailBB->addSuccessor(DoneBB); 372 373 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 374 DoneBB->transferSuccessors(&MBB); 375 376 MBB.addSuccessor(LoadCmpBB); 377 378 NextMBBI = MBB.end(); 379 MI.eraseFromParent(); 380 381 // Recompute liveness bottom up. 382 LivePhysRegs LiveRegs; 383 computeAndAddLiveIns(LiveRegs, *DoneBB); 384 computeAndAddLiveIns(LiveRegs, *FailBB); 385 computeAndAddLiveIns(LiveRegs, *StoreBB); 386 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 387 388 // Do an extra pass in the loop to get the loop carried dependencies right. 389 FailBB->clearLiveIns(); 390 computeAndAddLiveIns(LiveRegs, *FailBB); 391 StoreBB->clearLiveIns(); 392 computeAndAddLiveIns(LiveRegs, *StoreBB); 393 LoadCmpBB->clearLiveIns(); 394 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 395 396 return true; 397 } 398 399 /// \brief Expand Pseudos to Instructions with destructive operands. 400 /// 401 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes 402 /// or for fixing relaxed register allocation conditions to comply with 403 /// the instructions register constraints. The latter case may be cheaper 404 /// than setting the register constraints in the register allocator, 405 /// since that will insert regular MOV instructions rather than MOVPRFX. 406 /// 407 /// Example (after register allocation): 408 /// 409 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0 410 /// 411 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B. 412 /// * We cannot map directly to FSUB_ZPmZ_B because the register 413 /// constraints of the instruction are not met. 414 /// * Also the _ZERO specifies the false lanes need to be zeroed. 415 /// 416 /// We first try to see if the destructive operand == result operand, 417 /// if not, we try to swap the operands, e.g. 418 /// 419 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1 420 /// 421 /// But because FSUB_ZPmZ is not commutative, this is semantically 422 /// different, so we need a reverse instruction: 423 /// 424 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 425 /// 426 /// Then we implement the zeroing of the false lanes of Z0 by adding 427 /// a zeroing MOVPRFX instruction: 428 /// 429 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0 430 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1 431 /// 432 /// Note that this can only be done for _ZERO or _UNDEF variants where 433 /// we can guarantee the false lanes to be zeroed (by implementing this) 434 /// or that they are undef (don't care / not used), otherwise the 435 /// swapping of operands is illegal because the operation is not 436 /// (or cannot be emulated to be) fully commutative. 437 bool AArch64ExpandPseudo::expand_DestructiveOp( 438 MachineInstr &MI, 439 MachineBasicBlock &MBB, 440 MachineBasicBlock::iterator MBBI) { 441 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode()); 442 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask; 443 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask; 444 bool FalseZero = FalseLanes == AArch64::FalseLanesZero; 445 446 unsigned DstReg = MI.getOperand(0).getReg(); 447 bool DstIsDead = MI.getOperand(0).isDead(); 448 449 if (DType == AArch64::DestructiveBinary) 450 assert(DstReg != MI.getOperand(3).getReg()); 451 452 bool UseRev = false; 453 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx; 454 switch (DType) { 455 case AArch64::DestructiveBinaryComm: 456 case AArch64::DestructiveBinaryCommWithRev: 457 if (DstReg == MI.getOperand(3).getReg()) { 458 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1 459 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2); 460 UseRev = true; 461 break; 462 } 463 LLVM_FALLTHROUGH; 464 case AArch64::DestructiveBinary: 465 case AArch64::DestructiveBinaryImm: 466 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3); 467 break; 468 case AArch64::DestructiveUnaryPassthru: 469 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3); 470 break; 471 case AArch64::DestructiveTernaryCommWithRev: 472 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4); 473 if (DstReg == MI.getOperand(3).getReg()) { 474 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za 475 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2); 476 UseRev = true; 477 } else if (DstReg == MI.getOperand(4).getReg()) { 478 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za 479 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2); 480 UseRev = true; 481 } 482 break; 483 default: 484 llvm_unreachable("Unsupported Destructive Operand type"); 485 } 486 487 #ifndef NDEBUG 488 // MOVPRFX can only be used if the destination operand 489 // is the destructive operand, not as any other operand, 490 // so the Destructive Operand must be unique. 491 bool DOPRegIsUnique = false; 492 switch (DType) { 493 case AArch64::DestructiveBinaryComm: 494 case AArch64::DestructiveBinaryCommWithRev: 495 DOPRegIsUnique = 496 DstReg != MI.getOperand(DOPIdx).getReg() || 497 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg(); 498 break; 499 case AArch64::DestructiveUnaryPassthru: 500 case AArch64::DestructiveBinaryImm: 501 DOPRegIsUnique = true; 502 break; 503 case AArch64::DestructiveTernaryCommWithRev: 504 DOPRegIsUnique = 505 DstReg != MI.getOperand(DOPIdx).getReg() || 506 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() && 507 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg()); 508 break; 509 } 510 #endif 511 512 // Resolve the reverse opcode 513 if (UseRev) { 514 int NewOpcode; 515 // e.g. DIV -> DIVR 516 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1) 517 Opcode = NewOpcode; 518 // e.g. DIVR -> DIV 519 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1) 520 Opcode = NewOpcode; 521 } 522 523 // Get the right MOVPRFX 524 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode); 525 unsigned MovPrfx, MovPrfxZero; 526 switch (ElementSize) { 527 case AArch64::ElementSizeNone: 528 case AArch64::ElementSizeB: 529 MovPrfx = AArch64::MOVPRFX_ZZ; 530 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B; 531 break; 532 case AArch64::ElementSizeH: 533 MovPrfx = AArch64::MOVPRFX_ZZ; 534 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H; 535 break; 536 case AArch64::ElementSizeS: 537 MovPrfx = AArch64::MOVPRFX_ZZ; 538 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S; 539 break; 540 case AArch64::ElementSizeD: 541 MovPrfx = AArch64::MOVPRFX_ZZ; 542 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D; 543 break; 544 default: 545 llvm_unreachable("Unsupported ElementSize"); 546 } 547 548 // 549 // Create the destructive operation (if required) 550 // 551 MachineInstrBuilder PRFX, DOP; 552 if (FalseZero) { 553 #ifndef NDEBUG 554 assert(DOPRegIsUnique && "The destructive operand should be unique"); 555 #endif 556 assert(ElementSize != AArch64::ElementSizeNone && 557 "This instruction is unpredicated"); 558 559 // Merge source operand into destination register 560 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero)) 561 .addReg(DstReg, RegState::Define) 562 .addReg(MI.getOperand(PredIdx).getReg()) 563 .addReg(MI.getOperand(DOPIdx).getReg()); 564 565 // After the movprfx, the destructive operand is same as Dst 566 DOPIdx = 0; 567 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) { 568 #ifndef NDEBUG 569 assert(DOPRegIsUnique && "The destructive operand should be unique"); 570 #endif 571 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx)) 572 .addReg(DstReg, RegState::Define) 573 .addReg(MI.getOperand(DOPIdx).getReg()); 574 DOPIdx = 0; 575 } 576 577 // 578 // Create the destructive operation 579 // 580 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode)) 581 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)); 582 583 switch (DType) { 584 case AArch64::DestructiveUnaryPassthru: 585 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 586 .add(MI.getOperand(PredIdx)) 587 .add(MI.getOperand(SrcIdx)); 588 break; 589 case AArch64::DestructiveBinaryImm: 590 case AArch64::DestructiveBinaryComm: 591 case AArch64::DestructiveBinaryCommWithRev: 592 DOP.add(MI.getOperand(PredIdx)) 593 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 594 .add(MI.getOperand(SrcIdx)); 595 break; 596 case AArch64::DestructiveTernaryCommWithRev: 597 DOP.add(MI.getOperand(PredIdx)) 598 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill) 599 .add(MI.getOperand(SrcIdx)) 600 .add(MI.getOperand(Src2Idx)); 601 break; 602 } 603 604 if (PRFX) { 605 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator()); 606 transferImpOps(MI, PRFX, DOP); 607 } else 608 transferImpOps(MI, DOP, DOP); 609 610 MI.eraseFromParent(); 611 return true; 612 } 613 614 bool AArch64ExpandPseudo::expandSetTagLoop( 615 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 616 MachineBasicBlock::iterator &NextMBBI) { 617 MachineInstr &MI = *MBBI; 618 DebugLoc DL = MI.getDebugLoc(); 619 Register SizeReg = MI.getOperand(0).getReg(); 620 Register AddressReg = MI.getOperand(1).getReg(); 621 622 MachineFunction *MF = MBB.getParent(); 623 624 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback; 625 const unsigned OpCode1 = 626 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex; 627 const unsigned OpCode2 = 628 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 629 630 unsigned Size = MI.getOperand(2).getImm(); 631 assert(Size > 0 && Size % 16 == 0); 632 if (Size % (16 * 2) != 0) { 633 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg) 634 .addReg(AddressReg) 635 .addReg(AddressReg) 636 .addImm(1); 637 Size -= 16; 638 } 639 MachineBasicBlock::iterator I = 640 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg) 641 .addImm(Size); 642 expandMOVImm(MBB, I, 64); 643 644 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 645 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 646 647 MF->insert(++MBB.getIterator(), LoopBB); 648 MF->insert(++LoopBB->getIterator(), DoneBB); 649 650 BuildMI(LoopBB, DL, TII->get(OpCode2)) 651 .addDef(AddressReg) 652 .addReg(AddressReg) 653 .addReg(AddressReg) 654 .addImm(2) 655 .cloneMemRefs(MI) 656 .setMIFlags(MI.getFlags()); 657 BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri)) 658 .addDef(SizeReg) 659 .addReg(SizeReg) 660 .addImm(16 * 2) 661 .addImm(0); 662 BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB); 663 664 LoopBB->addSuccessor(LoopBB); 665 LoopBB->addSuccessor(DoneBB); 666 667 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 668 DoneBB->transferSuccessors(&MBB); 669 670 MBB.addSuccessor(LoopBB); 671 672 NextMBBI = MBB.end(); 673 MI.eraseFromParent(); 674 // Recompute liveness bottom up. 675 LivePhysRegs LiveRegs; 676 computeAndAddLiveIns(LiveRegs, *DoneBB); 677 computeAndAddLiveIns(LiveRegs, *LoopBB); 678 // Do an extra pass in the loop to get the loop carried dependencies right. 679 // FIXME: is this necessary? 680 LoopBB->clearLiveIns(); 681 computeAndAddLiveIns(LiveRegs, *LoopBB); 682 DoneBB->clearLiveIns(); 683 computeAndAddLiveIns(LiveRegs, *DoneBB); 684 685 return true; 686 } 687 688 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB, 689 MachineBasicBlock::iterator MBBI, 690 unsigned Opc, unsigned N) { 691 const TargetRegisterInfo *TRI = 692 MBB.getParent()->getSubtarget().getRegisterInfo(); 693 MachineInstr &MI = *MBBI; 694 for (unsigned Offset = 0; Offset < N; ++Offset) { 695 int ImmOffset = MI.getOperand(2).getImm() + Offset; 696 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false; 697 assert(ImmOffset >= -256 && ImmOffset < 256 && 698 "Immediate spill offset out of range"); 699 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)) 700 .addReg( 701 TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset), 702 Opc == AArch64::LDR_ZXI ? RegState::Define : 0) 703 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill)) 704 .addImm(ImmOffset); 705 } 706 MI.eraseFromParent(); 707 return true; 708 } 709 710 bool AArch64ExpandPseudo::expandCALL_RVMARKER( 711 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 712 // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29, 713 // x29` marker. Mark the sequence as bundle, to avoid passes moving other code 714 // in between. 715 MachineInstr &MI = *MBBI; 716 717 MachineInstr *OriginalCall; 718 MachineOperand &CallTarget = MI.getOperand(0); 719 assert((CallTarget.isGlobal() || CallTarget.isReg()) && 720 "invalid operand for regular call"); 721 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; 722 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); 723 OriginalCall->addOperand(CallTarget); 724 725 unsigned RegMaskStartIdx = 1; 726 // Skip register arguments. Those are added during ISel, but are not 727 // needed for the concrete branch. 728 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) { 729 auto MOP = MI.getOperand(RegMaskStartIdx); 730 assert(MOP.isReg() && "can only add register operands"); 731 OriginalCall->addOperand(MachineOperand::CreateReg( 732 MOP.getReg(), /*Def=*/false, /*Implicit=*/true)); 733 RegMaskStartIdx++; 734 } 735 for (const MachineOperand &MO : 736 llvm::drop_begin(MI.operands(), RegMaskStartIdx)) 737 OriginalCall->addOperand(MO); 738 739 auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs)) 740 .addReg(AArch64::FP, RegState::Define) 741 .addReg(AArch64::XZR) 742 .addReg(AArch64::FP) 743 .addImm(0) 744 .getInstr(); 745 if (MI.shouldUpdateCallSiteInfo()) 746 MBB.getParent()->moveCallSiteInfo(&MI, Marker); 747 MI.eraseFromParent(); 748 finalizeBundle(MBB, OriginalCall->getIterator(), 749 std::next(Marker->getIterator())); 750 return true; 751 } 752 753 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( 754 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { 755 Register CtxReg = MBBI->getOperand(0).getReg(); 756 Register BaseReg = MBBI->getOperand(1).getReg(); 757 int Offset = MBBI->getOperand(2).getImm(); 758 DebugLoc DL(MBBI->getDebugLoc()); 759 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>(); 760 761 if (STI.getTargetTriple().getArchName() != "arm64e") { 762 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 763 .addUse(CtxReg) 764 .addUse(BaseReg) 765 .addImm(Offset / 8) 766 .setMIFlag(MachineInstr::FrameSetup); 767 MBBI->eraseFromParent(); 768 return true; 769 } 770 771 // We need to sign the context in an address-discriminated way. 0xc31a is a 772 // fixed random value, chosen as part of the ABI. 773 // add x16, xBase, #Offset 774 // movk x16, #0xc31a, lsl #48 775 // mov x17, x22/xzr 776 // pacdb x17, x16 777 // str x17, [xBase, #Offset] 778 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri; 779 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16) 780 .addUse(BaseReg) 781 .addImm(abs(Offset)) 782 .addImm(0) 783 .setMIFlag(MachineInstr::FrameSetup); 784 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16) 785 .addUse(AArch64::X16) 786 .addImm(0xc31a) 787 .addImm(48) 788 .setMIFlag(MachineInstr::FrameSetup); 789 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so 790 // move it somewhere before signing. 791 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17) 792 .addUse(AArch64::XZR) 793 .addUse(CtxReg) 794 .addImm(0) 795 .setMIFlag(MachineInstr::FrameSetup); 796 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17) 797 .addUse(AArch64::X17) 798 .addUse(AArch64::X16) 799 .setMIFlag(MachineInstr::FrameSetup); 800 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui)) 801 .addUse(AArch64::X17) 802 .addUse(BaseReg) 803 .addImm(Offset / 8) 804 .setMIFlag(MachineInstr::FrameSetup); 805 806 MBBI->eraseFromParent(); 807 return true; 808 } 809 810 /// If MBBI references a pseudo instruction that should be expanded here, 811 /// do the expansion and return true. Otherwise return false. 812 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 813 MachineBasicBlock::iterator MBBI, 814 MachineBasicBlock::iterator &NextMBBI) { 815 MachineInstr &MI = *MBBI; 816 unsigned Opcode = MI.getOpcode(); 817 818 // Check if we can expand the destructive op 819 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode()); 820 if (OrigInstr != -1) { 821 auto &Orig = TII->get(OrigInstr); 822 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) 823 != AArch64::NotDestructive) { 824 return expand_DestructiveOp(MI, MBB, MBBI); 825 } 826 } 827 828 switch (Opcode) { 829 default: 830 break; 831 832 case AArch64::BSPv8i8: 833 case AArch64::BSPv16i8: { 834 Register DstReg = MI.getOperand(0).getReg(); 835 if (DstReg == MI.getOperand(3).getReg()) { 836 // Expand to BIT 837 BuildMI(MBB, MBBI, MI.getDebugLoc(), 838 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8 839 : AArch64::BITv16i8)) 840 .add(MI.getOperand(0)) 841 .add(MI.getOperand(3)) 842 .add(MI.getOperand(2)) 843 .add(MI.getOperand(1)); 844 } else if (DstReg == MI.getOperand(2).getReg()) { 845 // Expand to BIF 846 BuildMI(MBB, MBBI, MI.getDebugLoc(), 847 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8 848 : AArch64::BIFv16i8)) 849 .add(MI.getOperand(0)) 850 .add(MI.getOperand(2)) 851 .add(MI.getOperand(3)) 852 .add(MI.getOperand(1)); 853 } else { 854 // Expand to BSL, use additional move if required 855 if (DstReg == MI.getOperand(1).getReg()) { 856 BuildMI(MBB, MBBI, MI.getDebugLoc(), 857 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 858 : AArch64::BSLv16i8)) 859 .add(MI.getOperand(0)) 860 .add(MI.getOperand(1)) 861 .add(MI.getOperand(2)) 862 .add(MI.getOperand(3)); 863 } else { 864 BuildMI(MBB, MBBI, MI.getDebugLoc(), 865 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8 866 : AArch64::ORRv16i8)) 867 .addReg(DstReg, 868 RegState::Define | 869 getRenamableRegState(MI.getOperand(0).isRenamable())) 870 .add(MI.getOperand(1)) 871 .add(MI.getOperand(1)); 872 BuildMI(MBB, MBBI, MI.getDebugLoc(), 873 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8 874 : AArch64::BSLv16i8)) 875 .add(MI.getOperand(0)) 876 .addReg(DstReg, 877 RegState::Kill | 878 getRenamableRegState(MI.getOperand(0).isRenamable())) 879 .add(MI.getOperand(2)) 880 .add(MI.getOperand(3)); 881 } 882 } 883 MI.eraseFromParent(); 884 return true; 885 } 886 887 case AArch64::ADDWrr: 888 case AArch64::SUBWrr: 889 case AArch64::ADDXrr: 890 case AArch64::SUBXrr: 891 case AArch64::ADDSWrr: 892 case AArch64::SUBSWrr: 893 case AArch64::ADDSXrr: 894 case AArch64::SUBSXrr: 895 case AArch64::ANDWrr: 896 case AArch64::ANDXrr: 897 case AArch64::BICWrr: 898 case AArch64::BICXrr: 899 case AArch64::ANDSWrr: 900 case AArch64::ANDSXrr: 901 case AArch64::BICSWrr: 902 case AArch64::BICSXrr: 903 case AArch64::EONWrr: 904 case AArch64::EONXrr: 905 case AArch64::EORWrr: 906 case AArch64::EORXrr: 907 case AArch64::ORNWrr: 908 case AArch64::ORNXrr: 909 case AArch64::ORRWrr: 910 case AArch64::ORRXrr: { 911 unsigned Opcode; 912 switch (MI.getOpcode()) { 913 default: 914 return false; 915 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 916 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 917 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 918 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 919 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 920 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 921 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 922 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 923 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 924 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 925 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 926 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 927 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 928 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 929 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 930 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 931 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 932 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 933 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 934 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 935 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 936 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 937 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 938 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 939 } 940 MachineFunction &MF = *MBB.getParent(); 941 // Try to create new inst without implicit operands added. 942 MachineInstr *NewMI = MF.CreateMachineInstr( 943 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true); 944 MBB.insert(MBBI, NewMI); 945 MachineInstrBuilder MIB1(MF, NewMI); 946 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define) 947 .add(MI.getOperand(1)) 948 .add(MI.getOperand(2)) 949 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 950 transferImpOps(MI, MIB1, MIB1); 951 MI.eraseFromParent(); 952 return true; 953 } 954 955 case AArch64::LOADgot: { 956 MachineFunction *MF = MBB.getParent(); 957 Register DstReg = MI.getOperand(0).getReg(); 958 const MachineOperand &MO1 = MI.getOperand(1); 959 unsigned Flags = MO1.getTargetFlags(); 960 961 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 962 // Tiny codemodel expand to LDR 963 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 964 TII->get(AArch64::LDRXl), DstReg); 965 966 if (MO1.isGlobal()) { 967 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 968 } else if (MO1.isSymbol()) { 969 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 970 } else { 971 assert(MO1.isCPI() && 972 "Only expect globals, externalsymbols, or constant pools"); 973 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 974 } 975 } else { 976 // Small codemodel expand into ADRP + LDR. 977 MachineFunction &MF = *MI.getParent()->getParent(); 978 DebugLoc DL = MI.getDebugLoc(); 979 MachineInstrBuilder MIB1 = 980 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 981 982 MachineInstrBuilder MIB2; 983 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 984 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 985 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 986 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 987 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 988 .addDef(Reg32) 989 .addReg(DstReg, RegState::Kill) 990 .addReg(DstReg, DstFlags | RegState::Implicit); 991 } else { 992 unsigned DstReg = MI.getOperand(0).getReg(); 993 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 994 .add(MI.getOperand(0)) 995 .addUse(DstReg, RegState::Kill); 996 } 997 998 if (MO1.isGlobal()) { 999 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 1000 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 1001 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1002 } else if (MO1.isSymbol()) { 1003 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 1004 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 1005 AArch64II::MO_PAGEOFF | 1006 AArch64II::MO_NC); 1007 } else { 1008 assert(MO1.isCPI() && 1009 "Only expect globals, externalsymbols, or constant pools"); 1010 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1011 Flags | AArch64II::MO_PAGE); 1012 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 1013 Flags | AArch64II::MO_PAGEOFF | 1014 AArch64II::MO_NC); 1015 } 1016 1017 transferImpOps(MI, MIB1, MIB2); 1018 } 1019 MI.eraseFromParent(); 1020 return true; 1021 } 1022 case AArch64::MOVaddrBA: { 1023 MachineFunction &MF = *MI.getParent()->getParent(); 1024 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) { 1025 // blockaddress expressions have to come from a constant pool because the 1026 // largest addend (and hence offset within a function) allowed for ADRP is 1027 // only 8MB. 1028 const BlockAddress *BA = MI.getOperand(1).getBlockAddress(); 1029 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset"); 1030 1031 MachineConstantPool *MCP = MF.getConstantPool(); 1032 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8)); 1033 1034 Register DstReg = MI.getOperand(0).getReg(); 1035 auto MIB1 = 1036 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1037 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 1038 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 1039 TII->get(AArch64::LDRXui), DstReg) 1040 .addUse(DstReg) 1041 .addConstantPoolIndex( 1042 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 1043 transferImpOps(MI, MIB1, MIB2); 1044 MI.eraseFromParent(); 1045 return true; 1046 } 1047 } 1048 LLVM_FALLTHROUGH; 1049 case AArch64::MOVaddr: 1050 case AArch64::MOVaddrJT: 1051 case AArch64::MOVaddrCP: 1052 case AArch64::MOVaddrTLS: 1053 case AArch64::MOVaddrEXT: { 1054 // Expand into ADRP + ADD. 1055 Register DstReg = MI.getOperand(0).getReg(); 1056 assert(DstReg != AArch64::XZR); 1057 MachineInstrBuilder MIB1 = 1058 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 1059 .add(MI.getOperand(1)); 1060 1061 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 1062 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 1063 // We do so by creating a MOVK that sets bits 48-63 of the register to 1064 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 1065 // the small code model so we can assume a binary size of <= 4GB, which 1066 // makes the untagged PC relative offset positive. The binary must also be 1067 // loaded into address range [0, 2^48). Both of these properties need to 1068 // be ensured at runtime when using tagged addresses. 1069 auto Tag = MI.getOperand(1); 1070 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 1071 Tag.setOffset(0x100000000); 1072 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 1073 .addReg(DstReg) 1074 .add(Tag) 1075 .addImm(48); 1076 } 1077 1078 MachineInstrBuilder MIB2 = 1079 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1080 .add(MI.getOperand(0)) 1081 .addReg(DstReg) 1082 .add(MI.getOperand(2)) 1083 .addImm(0); 1084 1085 transferImpOps(MI, MIB1, MIB2); 1086 MI.eraseFromParent(); 1087 return true; 1088 } 1089 case AArch64::ADDlowTLS: 1090 // Produce a plain ADD 1091 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 1092 .add(MI.getOperand(0)) 1093 .add(MI.getOperand(1)) 1094 .add(MI.getOperand(2)) 1095 .addImm(0); 1096 MI.eraseFromParent(); 1097 return true; 1098 1099 case AArch64::MOVbaseTLS: { 1100 Register DstReg = MI.getOperand(0).getReg(); 1101 auto SysReg = AArch64SysReg::TPIDR_EL0; 1102 MachineFunction *MF = MBB.getParent(); 1103 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 1104 SysReg = AArch64SysReg::TPIDR_EL3; 1105 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 1106 SysReg = AArch64SysReg::TPIDR_EL2; 1107 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 1108 SysReg = AArch64SysReg::TPIDR_EL1; 1109 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 1110 .addImm(SysReg); 1111 MI.eraseFromParent(); 1112 return true; 1113 } 1114 1115 case AArch64::MOVi32imm: 1116 return expandMOVImm(MBB, MBBI, 32); 1117 case AArch64::MOVi64imm: 1118 return expandMOVImm(MBB, MBBI, 64); 1119 case AArch64::RET_ReallyLR: { 1120 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 1121 // function and missing live-ins. We are fine in practice because callee 1122 // saved register handling ensures the register value is restored before 1123 // RET, but we need the undef flag here to appease the MachineVerifier 1124 // liveness checks. 1125 MachineInstrBuilder MIB = 1126 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 1127 .addReg(AArch64::LR, RegState::Undef); 1128 transferImpOps(MI, MIB, MIB); 1129 MI.eraseFromParent(); 1130 return true; 1131 } 1132 case AArch64::CMP_SWAP_8: 1133 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 1134 AArch64::SUBSWrx, 1135 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 1136 AArch64::WZR, NextMBBI); 1137 case AArch64::CMP_SWAP_16: 1138 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 1139 AArch64::SUBSWrx, 1140 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 1141 AArch64::WZR, NextMBBI); 1142 case AArch64::CMP_SWAP_32: 1143 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 1144 AArch64::SUBSWrs, 1145 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1146 AArch64::WZR, NextMBBI); 1147 case AArch64::CMP_SWAP_64: 1148 return expandCMP_SWAP(MBB, MBBI, 1149 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 1150 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 1151 AArch64::XZR, NextMBBI); 1152 case AArch64::CMP_SWAP_128: 1153 case AArch64::CMP_SWAP_128_RELEASE: 1154 case AArch64::CMP_SWAP_128_ACQUIRE: 1155 case AArch64::CMP_SWAP_128_MONOTONIC: 1156 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 1157 1158 case AArch64::AESMCrrTied: 1159 case AArch64::AESIMCrrTied: { 1160 MachineInstrBuilder MIB = 1161 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1162 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 1163 AArch64::AESIMCrr)) 1164 .add(MI.getOperand(0)) 1165 .add(MI.getOperand(1)); 1166 transferImpOps(MI, MIB, MIB); 1167 MI.eraseFromParent(); 1168 return true; 1169 } 1170 case AArch64::IRGstack: { 1171 MachineFunction &MF = *MBB.getParent(); 1172 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1173 const AArch64FrameLowering *TFI = 1174 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 1175 1176 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 1177 // almost always point to SP-after-prologue; if not, emit a longer 1178 // instruction sequence. 1179 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 1180 Register FrameReg; 1181 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 1182 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 1183 /*PreferFP=*/false, 1184 /*ForSimm=*/true); 1185 Register SrcReg = FrameReg; 1186 if (FrameRegOffset) { 1187 // Use output register as temporary. 1188 SrcReg = MI.getOperand(0).getReg(); 1189 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 1190 FrameRegOffset, TII); 1191 } 1192 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 1193 .add(MI.getOperand(0)) 1194 .addUse(SrcReg) 1195 .add(MI.getOperand(2)); 1196 MI.eraseFromParent(); 1197 return true; 1198 } 1199 case AArch64::TAGPstack: { 1200 int64_t Offset = MI.getOperand(2).getImm(); 1201 BuildMI(MBB, MBBI, MI.getDebugLoc(), 1202 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) 1203 .add(MI.getOperand(0)) 1204 .add(MI.getOperand(1)) 1205 .addImm(std::abs(Offset)) 1206 .add(MI.getOperand(4)); 1207 MI.eraseFromParent(); 1208 return true; 1209 } 1210 case AArch64::STGloop_wback: 1211 case AArch64::STZGloop_wback: 1212 return expandSetTagLoop(MBB, MBBI, NextMBBI); 1213 case AArch64::STGloop: 1214 case AArch64::STZGloop: 1215 report_fatal_error( 1216 "Non-writeback variants of STGloop / STZGloop should not " 1217 "survive past PrologEpilogInserter."); 1218 case AArch64::STR_ZZZZXI: 1219 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4); 1220 case AArch64::STR_ZZZXI: 1221 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3); 1222 case AArch64::STR_ZZXI: 1223 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2); 1224 case AArch64::LDR_ZZZZXI: 1225 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4); 1226 case AArch64::LDR_ZZZXI: 1227 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3); 1228 case AArch64::LDR_ZZXI: 1229 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); 1230 case AArch64::BLR_RVMARKER: 1231 return expandCALL_RVMARKER(MBB, MBBI); 1232 case AArch64::StoreSwiftAsyncContext: 1233 return expandStoreSwiftAsyncContext(MBB, MBBI); 1234 } 1235 return false; 1236 } 1237 1238 /// Iterate over the instructions in basic block MBB and expand any 1239 /// pseudo instructions. Return true if anything was modified. 1240 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 1241 bool Modified = false; 1242 1243 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 1244 while (MBBI != E) { 1245 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 1246 Modified |= expandMI(MBB, MBBI, NMBBI); 1247 MBBI = NMBBI; 1248 } 1249 1250 return Modified; 1251 } 1252 1253 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 1254 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 1255 1256 bool Modified = false; 1257 for (auto &MBB : MF) 1258 Modified |= expandMBB(MBB); 1259 return Modified; 1260 } 1261 1262 /// Returns an instance of the pseudo instruction expansion pass. 1263 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 1264 return new AArch64ExpandPseudo(); 1265 } 1266