1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Triple.h" 24 #include "llvm/CodeGen/LivePhysRegs.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineOperand.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/CodeGen.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Target/TargetMachine.h" 38 #include <cassert> 39 #include <cstdint> 40 #include <iterator> 41 #include <limits> 42 #include <utility> 43 44 using namespace llvm; 45 46 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 47 48 namespace { 49 50 class AArch64ExpandPseudo : public MachineFunctionPass { 51 public: 52 const AArch64InstrInfo *TII; 53 54 static char ID; 55 56 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 57 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 58 } 59 60 bool runOnMachineFunction(MachineFunction &Fn) override; 61 62 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 63 64 private: 65 bool expandMBB(MachineBasicBlock &MBB); 66 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 67 MachineBasicBlock::iterator &NextMBBI); 68 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 69 unsigned BitSize); 70 71 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 72 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 73 unsigned ExtendImm, unsigned ZeroReg, 74 MachineBasicBlock::iterator &NextMBBI); 75 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 76 MachineBasicBlock::iterator MBBI, 77 MachineBasicBlock::iterator &NextMBBI); 78 bool expandSetTagLoop(MachineBasicBlock &MBB, 79 MachineBasicBlock::iterator MBBI, 80 MachineBasicBlock::iterator &NextMBBI); 81 }; 82 83 } // end anonymous namespace 84 85 char AArch64ExpandPseudo::ID = 0; 86 87 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 88 AARCH64_EXPAND_PSEUDO_NAME, false, false) 89 90 /// Transfer implicit operands on the pseudo instruction to the 91 /// instructions created from the expansion. 92 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 93 MachineInstrBuilder &DefMI) { 94 const MCInstrDesc &Desc = OldMI.getDesc(); 95 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; 96 ++i) { 97 const MachineOperand &MO = OldMI.getOperand(i); 98 assert(MO.isReg() && MO.getReg()); 99 if (MO.isUse()) 100 UseMI.add(MO); 101 else 102 DefMI.add(MO); 103 } 104 } 105 106 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 107 /// real move-immediate instructions to synthesize the immediate. 108 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 109 MachineBasicBlock::iterator MBBI, 110 unsigned BitSize) { 111 MachineInstr &MI = *MBBI; 112 Register DstReg = MI.getOperand(0).getReg(); 113 uint64_t Imm = MI.getOperand(1).getImm(); 114 115 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 116 // Useless def, and we don't want to risk creating an invalid ORR (which 117 // would really write to sp). 118 MI.eraseFromParent(); 119 return true; 120 } 121 122 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 123 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 124 assert(Insn.size() != 0); 125 126 SmallVector<MachineInstrBuilder, 4> MIBS; 127 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 128 bool LastItem = std::next(I) == E; 129 switch (I->Opcode) 130 { 131 default: llvm_unreachable("unhandled!"); break; 132 133 case AArch64::ORRWri: 134 case AArch64::ORRXri: 135 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 136 .add(MI.getOperand(0)) 137 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 138 .addImm(I->Op2)); 139 break; 140 case AArch64::MOVNWi: 141 case AArch64::MOVNXi: 142 case AArch64::MOVZWi: 143 case AArch64::MOVZXi: { 144 bool DstIsDead = MI.getOperand(0).isDead(); 145 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 146 .addReg(DstReg, RegState::Define | 147 getDeadRegState(DstIsDead && LastItem)) 148 .addImm(I->Op1) 149 .addImm(I->Op2)); 150 } break; 151 case AArch64::MOVKWi: 152 case AArch64::MOVKXi: { 153 Register DstReg = MI.getOperand(0).getReg(); 154 bool DstIsDead = MI.getOperand(0).isDead(); 155 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 156 .addReg(DstReg, 157 RegState::Define | 158 getDeadRegState(DstIsDead && LastItem)) 159 .addReg(DstReg) 160 .addImm(I->Op1) 161 .addImm(I->Op2)); 162 } break; 163 } 164 } 165 transferImpOps(MI, MIBS.front(), MIBS.back()); 166 MI.eraseFromParent(); 167 return true; 168 } 169 170 bool AArch64ExpandPseudo::expandCMP_SWAP( 171 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 172 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 173 MachineBasicBlock::iterator &NextMBBI) { 174 MachineInstr &MI = *MBBI; 175 DebugLoc DL = MI.getDebugLoc(); 176 const MachineOperand &Dest = MI.getOperand(0); 177 Register StatusReg = MI.getOperand(1).getReg(); 178 bool StatusDead = MI.getOperand(1).isDead(); 179 // Duplicating undef operands into 2 instructions does not guarantee the same 180 // value on both; However undef should be replaced by xzr anyway. 181 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 182 Register AddrReg = MI.getOperand(2).getReg(); 183 Register DesiredReg = MI.getOperand(3).getReg(); 184 Register NewReg = MI.getOperand(4).getReg(); 185 186 MachineFunction *MF = MBB.getParent(); 187 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 188 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 189 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 190 191 MF->insert(++MBB.getIterator(), LoadCmpBB); 192 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 193 MF->insert(++StoreBB->getIterator(), DoneBB); 194 195 // .Lloadcmp: 196 // mov wStatus, 0 197 // ldaxr xDest, [xAddr] 198 // cmp xDest, xDesired 199 // b.ne .Ldone 200 if (!StatusDead) 201 BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg) 202 .addImm(0).addImm(0); 203 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) 204 .addReg(AddrReg); 205 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) 206 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 207 .addReg(DesiredReg) 208 .addImm(ExtendImm); 209 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) 210 .addImm(AArch64CC::NE) 211 .addMBB(DoneBB) 212 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 213 LoadCmpBB->addSuccessor(DoneBB); 214 LoadCmpBB->addSuccessor(StoreBB); 215 216 // .Lstore: 217 // stlxr wStatus, xNew, [xAddr] 218 // cbnz wStatus, .Lloadcmp 219 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg) 220 .addReg(NewReg) 221 .addReg(AddrReg); 222 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 223 .addReg(StatusReg, getKillRegState(StatusDead)) 224 .addMBB(LoadCmpBB); 225 StoreBB->addSuccessor(LoadCmpBB); 226 StoreBB->addSuccessor(DoneBB); 227 228 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 229 DoneBB->transferSuccessors(&MBB); 230 231 MBB.addSuccessor(LoadCmpBB); 232 233 NextMBBI = MBB.end(); 234 MI.eraseFromParent(); 235 236 // Recompute livein lists. 237 LivePhysRegs LiveRegs; 238 computeAndAddLiveIns(LiveRegs, *DoneBB); 239 computeAndAddLiveIns(LiveRegs, *StoreBB); 240 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 241 // Do an extra pass around the loop to get loop carried registers right. 242 StoreBB->clearLiveIns(); 243 computeAndAddLiveIns(LiveRegs, *StoreBB); 244 LoadCmpBB->clearLiveIns(); 245 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 246 247 return true; 248 } 249 250 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 251 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 252 MachineBasicBlock::iterator &NextMBBI) { 253 MachineInstr &MI = *MBBI; 254 DebugLoc DL = MI.getDebugLoc(); 255 MachineOperand &DestLo = MI.getOperand(0); 256 MachineOperand &DestHi = MI.getOperand(1); 257 Register StatusReg = MI.getOperand(2).getReg(); 258 bool StatusDead = MI.getOperand(2).isDead(); 259 // Duplicating undef operands into 2 instructions does not guarantee the same 260 // value on both; However undef should be replaced by xzr anyway. 261 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 262 Register AddrReg = MI.getOperand(3).getReg(); 263 Register DesiredLoReg = MI.getOperand(4).getReg(); 264 Register DesiredHiReg = MI.getOperand(5).getReg(); 265 Register NewLoReg = MI.getOperand(6).getReg(); 266 Register NewHiReg = MI.getOperand(7).getReg(); 267 268 MachineFunction *MF = MBB.getParent(); 269 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 270 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 271 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 272 273 MF->insert(++MBB.getIterator(), LoadCmpBB); 274 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 275 MF->insert(++StoreBB->getIterator(), DoneBB); 276 277 // .Lloadcmp: 278 // ldaxp xDestLo, xDestHi, [xAddr] 279 // cmp xDestLo, xDesiredLo 280 // sbcs xDestHi, xDesiredHi 281 // b.ne .Ldone 282 BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX)) 283 .addReg(DestLo.getReg(), RegState::Define) 284 .addReg(DestHi.getReg(), RegState::Define) 285 .addReg(AddrReg); 286 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 287 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 288 .addReg(DesiredLoReg) 289 .addImm(0); 290 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 291 .addUse(AArch64::WZR) 292 .addUse(AArch64::WZR) 293 .addImm(AArch64CC::EQ); 294 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 295 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 296 .addReg(DesiredHiReg) 297 .addImm(0); 298 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 299 .addUse(StatusReg, RegState::Kill) 300 .addUse(StatusReg, RegState::Kill) 301 .addImm(AArch64CC::EQ); 302 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW)) 303 .addUse(StatusReg, getKillRegState(StatusDead)) 304 .addMBB(DoneBB); 305 LoadCmpBB->addSuccessor(DoneBB); 306 LoadCmpBB->addSuccessor(StoreBB); 307 308 // .Lstore: 309 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 310 // cbnz wStatus, .Lloadcmp 311 BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg) 312 .addReg(NewLoReg) 313 .addReg(NewHiReg) 314 .addReg(AddrReg); 315 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 316 .addReg(StatusReg, getKillRegState(StatusDead)) 317 .addMBB(LoadCmpBB); 318 StoreBB->addSuccessor(LoadCmpBB); 319 StoreBB->addSuccessor(DoneBB); 320 321 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 322 DoneBB->transferSuccessors(&MBB); 323 324 MBB.addSuccessor(LoadCmpBB); 325 326 NextMBBI = MBB.end(); 327 MI.eraseFromParent(); 328 329 // Recompute liveness bottom up. 330 LivePhysRegs LiveRegs; 331 computeAndAddLiveIns(LiveRegs, *DoneBB); 332 computeAndAddLiveIns(LiveRegs, *StoreBB); 333 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 334 // Do an extra pass in the loop to get the loop carried dependencies right. 335 StoreBB->clearLiveIns(); 336 computeAndAddLiveIns(LiveRegs, *StoreBB); 337 LoadCmpBB->clearLiveIns(); 338 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 339 340 return true; 341 } 342 343 bool AArch64ExpandPseudo::expandSetTagLoop( 344 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 345 MachineBasicBlock::iterator &NextMBBI) { 346 MachineInstr &MI = *MBBI; 347 DebugLoc DL = MI.getDebugLoc(); 348 Register SizeReg = MI.getOperand(2).getReg(); 349 Register AddressReg = MI.getOperand(3).getReg(); 350 351 MachineFunction *MF = MBB.getParent(); 352 353 bool ZeroData = MI.getOpcode() == AArch64::STZGloop; 354 const unsigned OpCode = 355 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 356 357 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 358 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 359 360 MF->insert(++MBB.getIterator(), LoopBB); 361 MF->insert(++LoopBB->getIterator(), DoneBB); 362 363 BuildMI(LoopBB, DL, TII->get(OpCode)) 364 .addDef(AddressReg) 365 .addReg(AddressReg) 366 .addReg(AddressReg) 367 .addImm(2) 368 .cloneMemRefs(MI) 369 .setMIFlags(MI.getFlags()); 370 BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri)) 371 .addDef(SizeReg) 372 .addReg(SizeReg) 373 .addImm(16 * 2) 374 .addImm(0); 375 BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB); 376 377 LoopBB->addSuccessor(LoopBB); 378 LoopBB->addSuccessor(DoneBB); 379 380 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 381 DoneBB->transferSuccessors(&MBB); 382 383 MBB.addSuccessor(LoopBB); 384 385 NextMBBI = MBB.end(); 386 MI.eraseFromParent(); 387 // Recompute liveness bottom up. 388 LivePhysRegs LiveRegs; 389 computeAndAddLiveIns(LiveRegs, *DoneBB); 390 computeAndAddLiveIns(LiveRegs, *LoopBB); 391 // Do an extra pass in the loop to get the loop carried dependencies right. 392 // FIXME: is this necessary? 393 LoopBB->clearLiveIns(); 394 computeAndAddLiveIns(LiveRegs, *LoopBB); 395 DoneBB->clearLiveIns(); 396 computeAndAddLiveIns(LiveRegs, *DoneBB); 397 398 return true; 399 } 400 401 /// If MBBI references a pseudo instruction that should be expanded here, 402 /// do the expansion and return true. Otherwise return false. 403 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 404 MachineBasicBlock::iterator MBBI, 405 MachineBasicBlock::iterator &NextMBBI) { 406 MachineInstr &MI = *MBBI; 407 unsigned Opcode = MI.getOpcode(); 408 switch (Opcode) { 409 default: 410 break; 411 412 case AArch64::ADDWrr: 413 case AArch64::SUBWrr: 414 case AArch64::ADDXrr: 415 case AArch64::SUBXrr: 416 case AArch64::ADDSWrr: 417 case AArch64::SUBSWrr: 418 case AArch64::ADDSXrr: 419 case AArch64::SUBSXrr: 420 case AArch64::ANDWrr: 421 case AArch64::ANDXrr: 422 case AArch64::BICWrr: 423 case AArch64::BICXrr: 424 case AArch64::ANDSWrr: 425 case AArch64::ANDSXrr: 426 case AArch64::BICSWrr: 427 case AArch64::BICSXrr: 428 case AArch64::EONWrr: 429 case AArch64::EONXrr: 430 case AArch64::EORWrr: 431 case AArch64::EORXrr: 432 case AArch64::ORNWrr: 433 case AArch64::ORNXrr: 434 case AArch64::ORRWrr: 435 case AArch64::ORRXrr: { 436 unsigned Opcode; 437 switch (MI.getOpcode()) { 438 default: 439 return false; 440 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 441 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 442 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 443 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 444 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 445 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 446 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 447 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 448 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 449 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 450 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 451 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 452 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 453 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 454 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 455 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 456 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 457 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 458 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 459 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 460 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 461 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 462 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 463 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 464 } 465 MachineInstrBuilder MIB1 = 466 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), 467 MI.getOperand(0).getReg()) 468 .add(MI.getOperand(1)) 469 .add(MI.getOperand(2)) 470 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 471 transferImpOps(MI, MIB1, MIB1); 472 MI.eraseFromParent(); 473 return true; 474 } 475 476 case AArch64::LOADgot: { 477 MachineFunction *MF = MBB.getParent(); 478 Register DstReg = MI.getOperand(0).getReg(); 479 const MachineOperand &MO1 = MI.getOperand(1); 480 unsigned Flags = MO1.getTargetFlags(); 481 482 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 483 // Tiny codemodel expand to LDR 484 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 485 TII->get(AArch64::LDRXl), DstReg); 486 487 if (MO1.isGlobal()) { 488 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 489 } else if (MO1.isSymbol()) { 490 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 491 } else { 492 assert(MO1.isCPI() && 493 "Only expect globals, externalsymbols, or constant pools"); 494 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 495 } 496 } else { 497 // Small codemodel expand into ADRP + LDR. 498 MachineFunction &MF = *MI.getParent()->getParent(); 499 DebugLoc DL = MI.getDebugLoc(); 500 MachineInstrBuilder MIB1 = 501 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 502 503 MachineInstrBuilder MIB2; 504 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 505 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 506 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 507 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 508 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 509 .addDef(Reg32) 510 .addReg(DstReg, RegState::Kill) 511 .addReg(DstReg, DstFlags | RegState::Implicit); 512 } else { 513 unsigned DstReg = MI.getOperand(0).getReg(); 514 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 515 .add(MI.getOperand(0)) 516 .addUse(DstReg, RegState::Kill); 517 } 518 519 if (MO1.isGlobal()) { 520 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 521 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 522 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 523 } else if (MO1.isSymbol()) { 524 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 525 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 526 AArch64II::MO_PAGEOFF | 527 AArch64II::MO_NC); 528 } else { 529 assert(MO1.isCPI() && 530 "Only expect globals, externalsymbols, or constant pools"); 531 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 532 Flags | AArch64II::MO_PAGE); 533 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 534 Flags | AArch64II::MO_PAGEOFF | 535 AArch64II::MO_NC); 536 } 537 538 transferImpOps(MI, MIB1, MIB2); 539 } 540 MI.eraseFromParent(); 541 return true; 542 } 543 544 case AArch64::MOVaddr: 545 case AArch64::MOVaddrJT: 546 case AArch64::MOVaddrCP: 547 case AArch64::MOVaddrBA: 548 case AArch64::MOVaddrTLS: 549 case AArch64::MOVaddrEXT: { 550 // Expand into ADRP + ADD. 551 Register DstReg = MI.getOperand(0).getReg(); 552 MachineInstrBuilder MIB1 = 553 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 554 .add(MI.getOperand(1)); 555 556 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 557 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 558 // We do so by creating a MOVK that sets bits 48-63 of the register to 559 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 560 // the small code model so we can assume a binary size of <= 4GB, which 561 // makes the untagged PC relative offset positive. The binary must also be 562 // loaded into address range [0, 2^48). Both of these properties need to 563 // be ensured at runtime when using tagged addresses. 564 auto Tag = MI.getOperand(1); 565 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 566 Tag.setOffset(0x100000000); 567 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 568 .addReg(DstReg) 569 .add(Tag) 570 .addImm(48); 571 } 572 573 MachineInstrBuilder MIB2 = 574 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 575 .add(MI.getOperand(0)) 576 .addReg(DstReg) 577 .add(MI.getOperand(2)) 578 .addImm(0); 579 580 transferImpOps(MI, MIB1, MIB2); 581 MI.eraseFromParent(); 582 return true; 583 } 584 case AArch64::ADDlowTLS: 585 // Produce a plain ADD 586 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 587 .add(MI.getOperand(0)) 588 .add(MI.getOperand(1)) 589 .add(MI.getOperand(2)) 590 .addImm(0); 591 MI.eraseFromParent(); 592 return true; 593 594 case AArch64::MOVbaseTLS: { 595 Register DstReg = MI.getOperand(0).getReg(); 596 auto SysReg = AArch64SysReg::TPIDR_EL0; 597 MachineFunction *MF = MBB.getParent(); 598 if (MF->getTarget().getTargetTriple().isOSFuchsia() && 599 MF->getTarget().getCodeModel() == CodeModel::Kernel) 600 SysReg = AArch64SysReg::TPIDR_EL1; 601 else if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 602 SysReg = AArch64SysReg::TPIDR_EL3; 603 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 604 SysReg = AArch64SysReg::TPIDR_EL2; 605 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 606 SysReg = AArch64SysReg::TPIDR_EL1; 607 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 608 .addImm(SysReg); 609 MI.eraseFromParent(); 610 return true; 611 } 612 613 case AArch64::MOVi32imm: 614 return expandMOVImm(MBB, MBBI, 32); 615 case AArch64::MOVi64imm: 616 return expandMOVImm(MBB, MBBI, 64); 617 case AArch64::RET_ReallyLR: { 618 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 619 // function and missing live-ins. We are fine in practice because callee 620 // saved register handling ensures the register value is restored before 621 // RET, but we need the undef flag here to appease the MachineVerifier 622 // liveness checks. 623 MachineInstrBuilder MIB = 624 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 625 .addReg(AArch64::LR, RegState::Undef); 626 transferImpOps(MI, MIB, MIB); 627 MI.eraseFromParent(); 628 return true; 629 } 630 case AArch64::CMP_SWAP_8: 631 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 632 AArch64::SUBSWrx, 633 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 634 AArch64::WZR, NextMBBI); 635 case AArch64::CMP_SWAP_16: 636 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 637 AArch64::SUBSWrx, 638 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 639 AArch64::WZR, NextMBBI); 640 case AArch64::CMP_SWAP_32: 641 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 642 AArch64::SUBSWrs, 643 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 644 AArch64::WZR, NextMBBI); 645 case AArch64::CMP_SWAP_64: 646 return expandCMP_SWAP(MBB, MBBI, 647 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 648 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 649 AArch64::XZR, NextMBBI); 650 case AArch64::CMP_SWAP_128: 651 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 652 653 case AArch64::AESMCrrTied: 654 case AArch64::AESIMCrrTied: { 655 MachineInstrBuilder MIB = 656 BuildMI(MBB, MBBI, MI.getDebugLoc(), 657 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 658 AArch64::AESIMCrr)) 659 .add(MI.getOperand(0)) 660 .add(MI.getOperand(1)); 661 transferImpOps(MI, MIB, MIB); 662 MI.eraseFromParent(); 663 return true; 664 } 665 case AArch64::IRGstack: { 666 MachineFunction &MF = *MBB.getParent(); 667 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 668 const AArch64FrameLowering *TFI = 669 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 670 671 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 672 // almost always point to SP-after-prologue; if not, emit a longer 673 // instruction sequence. 674 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 675 unsigned FrameReg; 676 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 677 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 678 /*PreferFP=*/false, 679 /*ForSimm=*/true); 680 Register SrcReg = FrameReg; 681 if (FrameRegOffset) { 682 // Use output register as temporary. 683 SrcReg = MI.getOperand(0).getReg(); 684 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 685 FrameRegOffset, TII); 686 } 687 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 688 .add(MI.getOperand(0)) 689 .addUse(SrcReg) 690 .add(MI.getOperand(2)); 691 MI.eraseFromParent(); 692 return true; 693 } 694 case AArch64::TAGPstack: { 695 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDG)) 696 .add(MI.getOperand(0)) 697 .add(MI.getOperand(1)) 698 .add(MI.getOperand(2)) 699 .add(MI.getOperand(4)); 700 MI.eraseFromParent(); 701 return true; 702 } 703 case AArch64::STGloop: 704 case AArch64::STZGloop: 705 return expandSetTagLoop(MBB, MBBI, NextMBBI); 706 } 707 return false; 708 } 709 710 /// Iterate over the instructions in basic block MBB and expand any 711 /// pseudo instructions. Return true if anything was modified. 712 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 713 bool Modified = false; 714 715 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 716 while (MBBI != E) { 717 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 718 Modified |= expandMI(MBB, MBBI, NMBBI); 719 MBBI = NMBBI; 720 } 721 722 return Modified; 723 } 724 725 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 726 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 727 728 bool Modified = false; 729 for (auto &MBB : MF) 730 Modified |= expandMBB(MBB); 731 return Modified; 732 } 733 734 /// Returns an instance of the pseudo instruction expansion pass. 735 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 736 return new AArch64ExpandPseudo(); 737 } 738