1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling and other late optimizations. This 11 // pass should be run after register allocation but before the post-regalloc 12 // scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AArch64ExpandImm.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64MachineFunctionInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/Triple.h" 24 #include "llvm/CodeGen/LivePhysRegs.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/MachineFunctionPass.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineOperand.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/CodeGen.h" 36 #include "llvm/Support/MathExtras.h" 37 #include "llvm/Target/TargetMachine.h" 38 #include <cassert> 39 #include <cstdint> 40 #include <iterator> 41 #include <limits> 42 #include <utility> 43 44 using namespace llvm; 45 46 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass" 47 48 namespace { 49 50 class AArch64ExpandPseudo : public MachineFunctionPass { 51 public: 52 const AArch64InstrInfo *TII; 53 54 static char ID; 55 56 AArch64ExpandPseudo() : MachineFunctionPass(ID) { 57 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry()); 58 } 59 60 bool runOnMachineFunction(MachineFunction &Fn) override; 61 62 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; } 63 64 private: 65 bool expandMBB(MachineBasicBlock &MBB); 66 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 67 MachineBasicBlock::iterator &NextMBBI); 68 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 69 unsigned BitSize); 70 71 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 72 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp, 73 unsigned ExtendImm, unsigned ZeroReg, 74 MachineBasicBlock::iterator &NextMBBI); 75 bool expandCMP_SWAP_128(MachineBasicBlock &MBB, 76 MachineBasicBlock::iterator MBBI, 77 MachineBasicBlock::iterator &NextMBBI); 78 bool expandSetTagLoop(MachineBasicBlock &MBB, 79 MachineBasicBlock::iterator MBBI, 80 MachineBasicBlock::iterator &NextMBBI); 81 }; 82 83 } // end anonymous namespace 84 85 char AArch64ExpandPseudo::ID = 0; 86 87 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo", 88 AARCH64_EXPAND_PSEUDO_NAME, false, false) 89 90 /// Transfer implicit operands on the pseudo instruction to the 91 /// instructions created from the expansion. 92 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, 93 MachineInstrBuilder &DefMI) { 94 const MCInstrDesc &Desc = OldMI.getDesc(); 95 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e; 96 ++i) { 97 const MachineOperand &MO = OldMI.getOperand(i); 98 assert(MO.isReg() && MO.getReg()); 99 if (MO.isUse()) 100 UseMI.add(MO); 101 else 102 DefMI.add(MO); 103 } 104 } 105 106 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more 107 /// real move-immediate instructions to synthesize the immediate. 108 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB, 109 MachineBasicBlock::iterator MBBI, 110 unsigned BitSize) { 111 MachineInstr &MI = *MBBI; 112 Register DstReg = MI.getOperand(0).getReg(); 113 uint64_t RenamableState = 114 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0; 115 uint64_t Imm = MI.getOperand(1).getImm(); 116 117 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) { 118 // Useless def, and we don't want to risk creating an invalid ORR (which 119 // would really write to sp). 120 MI.eraseFromParent(); 121 return true; 122 } 123 124 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; 125 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn); 126 assert(Insn.size() != 0); 127 128 SmallVector<MachineInstrBuilder, 4> MIBS; 129 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) { 130 bool LastItem = std::next(I) == E; 131 switch (I->Opcode) 132 { 133 default: llvm_unreachable("unhandled!"); break; 134 135 case AArch64::ORRWri: 136 case AArch64::ORRXri: 137 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 138 .add(MI.getOperand(0)) 139 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR) 140 .addImm(I->Op2)); 141 break; 142 case AArch64::MOVNWi: 143 case AArch64::MOVNXi: 144 case AArch64::MOVZWi: 145 case AArch64::MOVZXi: { 146 bool DstIsDead = MI.getOperand(0).isDead(); 147 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 148 .addReg(DstReg, RegState::Define | 149 getDeadRegState(DstIsDead && LastItem) | 150 RenamableState) 151 .addImm(I->Op1) 152 .addImm(I->Op2)); 153 } break; 154 case AArch64::MOVKWi: 155 case AArch64::MOVKXi: { 156 Register DstReg = MI.getOperand(0).getReg(); 157 bool DstIsDead = MI.getOperand(0).isDead(); 158 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode)) 159 .addReg(DstReg, 160 RegState::Define | 161 getDeadRegState(DstIsDead && LastItem) | 162 RenamableState) 163 .addReg(DstReg) 164 .addImm(I->Op1) 165 .addImm(I->Op2)); 166 } break; 167 } 168 } 169 transferImpOps(MI, MIBS.front(), MIBS.back()); 170 MI.eraseFromParent(); 171 return true; 172 } 173 174 bool AArch64ExpandPseudo::expandCMP_SWAP( 175 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp, 176 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg, 177 MachineBasicBlock::iterator &NextMBBI) { 178 MachineInstr &MI = *MBBI; 179 DebugLoc DL = MI.getDebugLoc(); 180 const MachineOperand &Dest = MI.getOperand(0); 181 Register StatusReg = MI.getOperand(1).getReg(); 182 bool StatusDead = MI.getOperand(1).isDead(); 183 // Duplicating undef operands into 2 instructions does not guarantee the same 184 // value on both; However undef should be replaced by xzr anyway. 185 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 186 Register AddrReg = MI.getOperand(2).getReg(); 187 Register DesiredReg = MI.getOperand(3).getReg(); 188 Register NewReg = MI.getOperand(4).getReg(); 189 190 MachineFunction *MF = MBB.getParent(); 191 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 192 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 193 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 194 195 MF->insert(++MBB.getIterator(), LoadCmpBB); 196 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 197 MF->insert(++StoreBB->getIterator(), DoneBB); 198 199 // .Lloadcmp: 200 // mov wStatus, 0 201 // ldaxr xDest, [xAddr] 202 // cmp xDest, xDesired 203 // b.ne .Ldone 204 if (!StatusDead) 205 BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg) 206 .addImm(0).addImm(0); 207 BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg()) 208 .addReg(AddrReg); 209 BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg) 210 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 211 .addReg(DesiredReg) 212 .addImm(ExtendImm); 213 BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc)) 214 .addImm(AArch64CC::NE) 215 .addMBB(DoneBB) 216 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill); 217 LoadCmpBB->addSuccessor(DoneBB); 218 LoadCmpBB->addSuccessor(StoreBB); 219 220 // .Lstore: 221 // stlxr wStatus, xNew, [xAddr] 222 // cbnz wStatus, .Lloadcmp 223 BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg) 224 .addReg(NewReg) 225 .addReg(AddrReg); 226 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 227 .addReg(StatusReg, getKillRegState(StatusDead)) 228 .addMBB(LoadCmpBB); 229 StoreBB->addSuccessor(LoadCmpBB); 230 StoreBB->addSuccessor(DoneBB); 231 232 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 233 DoneBB->transferSuccessors(&MBB); 234 235 MBB.addSuccessor(LoadCmpBB); 236 237 NextMBBI = MBB.end(); 238 MI.eraseFromParent(); 239 240 // Recompute livein lists. 241 LivePhysRegs LiveRegs; 242 computeAndAddLiveIns(LiveRegs, *DoneBB); 243 computeAndAddLiveIns(LiveRegs, *StoreBB); 244 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 245 // Do an extra pass around the loop to get loop carried registers right. 246 StoreBB->clearLiveIns(); 247 computeAndAddLiveIns(LiveRegs, *StoreBB); 248 LoadCmpBB->clearLiveIns(); 249 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 250 251 return true; 252 } 253 254 bool AArch64ExpandPseudo::expandCMP_SWAP_128( 255 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 256 MachineBasicBlock::iterator &NextMBBI) { 257 MachineInstr &MI = *MBBI; 258 DebugLoc DL = MI.getDebugLoc(); 259 MachineOperand &DestLo = MI.getOperand(0); 260 MachineOperand &DestHi = MI.getOperand(1); 261 Register StatusReg = MI.getOperand(2).getReg(); 262 bool StatusDead = MI.getOperand(2).isDead(); 263 // Duplicating undef operands into 2 instructions does not guarantee the same 264 // value on both; However undef should be replaced by xzr anyway. 265 assert(!MI.getOperand(3).isUndef() && "cannot handle undef"); 266 Register AddrReg = MI.getOperand(3).getReg(); 267 Register DesiredLoReg = MI.getOperand(4).getReg(); 268 Register DesiredHiReg = MI.getOperand(5).getReg(); 269 Register NewLoReg = MI.getOperand(6).getReg(); 270 Register NewHiReg = MI.getOperand(7).getReg(); 271 272 MachineFunction *MF = MBB.getParent(); 273 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 274 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 275 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 276 277 MF->insert(++MBB.getIterator(), LoadCmpBB); 278 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 279 MF->insert(++StoreBB->getIterator(), DoneBB); 280 281 // .Lloadcmp: 282 // ldaxp xDestLo, xDestHi, [xAddr] 283 // cmp xDestLo, xDesiredLo 284 // sbcs xDestHi, xDesiredHi 285 // b.ne .Ldone 286 BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX)) 287 .addReg(DestLo.getReg(), RegState::Define) 288 .addReg(DestHi.getReg(), RegState::Define) 289 .addReg(AddrReg); 290 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 291 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead())) 292 .addReg(DesiredLoReg) 293 .addImm(0); 294 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 295 .addUse(AArch64::WZR) 296 .addUse(AArch64::WZR) 297 .addImm(AArch64CC::EQ); 298 BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR) 299 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead())) 300 .addReg(DesiredHiReg) 301 .addImm(0); 302 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg) 303 .addUse(StatusReg, RegState::Kill) 304 .addUse(StatusReg, RegState::Kill) 305 .addImm(AArch64CC::EQ); 306 BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW)) 307 .addUse(StatusReg, getKillRegState(StatusDead)) 308 .addMBB(DoneBB); 309 LoadCmpBB->addSuccessor(DoneBB); 310 LoadCmpBB->addSuccessor(StoreBB); 311 312 // .Lstore: 313 // stlxp wStatus, xNewLo, xNewHi, [xAddr] 314 // cbnz wStatus, .Lloadcmp 315 BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg) 316 .addReg(NewLoReg) 317 .addReg(NewHiReg) 318 .addReg(AddrReg); 319 BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW)) 320 .addReg(StatusReg, getKillRegState(StatusDead)) 321 .addMBB(LoadCmpBB); 322 StoreBB->addSuccessor(LoadCmpBB); 323 StoreBB->addSuccessor(DoneBB); 324 325 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 326 DoneBB->transferSuccessors(&MBB); 327 328 MBB.addSuccessor(LoadCmpBB); 329 330 NextMBBI = MBB.end(); 331 MI.eraseFromParent(); 332 333 // Recompute liveness bottom up. 334 LivePhysRegs LiveRegs; 335 computeAndAddLiveIns(LiveRegs, *DoneBB); 336 computeAndAddLiveIns(LiveRegs, *StoreBB); 337 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 338 // Do an extra pass in the loop to get the loop carried dependencies right. 339 StoreBB->clearLiveIns(); 340 computeAndAddLiveIns(LiveRegs, *StoreBB); 341 LoadCmpBB->clearLiveIns(); 342 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 343 344 return true; 345 } 346 347 bool AArch64ExpandPseudo::expandSetTagLoop( 348 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 349 MachineBasicBlock::iterator &NextMBBI) { 350 MachineInstr &MI = *MBBI; 351 DebugLoc DL = MI.getDebugLoc(); 352 Register SizeReg = MI.getOperand(2).getReg(); 353 Register AddressReg = MI.getOperand(3).getReg(); 354 355 MachineFunction *MF = MBB.getParent(); 356 357 bool ZeroData = MI.getOpcode() == AArch64::STZGloop; 358 const unsigned OpCode = 359 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; 360 361 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 362 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 363 364 MF->insert(++MBB.getIterator(), LoopBB); 365 MF->insert(++LoopBB->getIterator(), DoneBB); 366 367 BuildMI(LoopBB, DL, TII->get(OpCode)) 368 .addDef(AddressReg) 369 .addReg(AddressReg) 370 .addReg(AddressReg) 371 .addImm(2) 372 .cloneMemRefs(MI) 373 .setMIFlags(MI.getFlags()); 374 BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri)) 375 .addDef(SizeReg) 376 .addReg(SizeReg) 377 .addImm(16 * 2) 378 .addImm(0); 379 BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB); 380 381 LoopBB->addSuccessor(LoopBB); 382 LoopBB->addSuccessor(DoneBB); 383 384 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 385 DoneBB->transferSuccessors(&MBB); 386 387 MBB.addSuccessor(LoopBB); 388 389 NextMBBI = MBB.end(); 390 MI.eraseFromParent(); 391 // Recompute liveness bottom up. 392 LivePhysRegs LiveRegs; 393 computeAndAddLiveIns(LiveRegs, *DoneBB); 394 computeAndAddLiveIns(LiveRegs, *LoopBB); 395 // Do an extra pass in the loop to get the loop carried dependencies right. 396 // FIXME: is this necessary? 397 LoopBB->clearLiveIns(); 398 computeAndAddLiveIns(LiveRegs, *LoopBB); 399 DoneBB->clearLiveIns(); 400 computeAndAddLiveIns(LiveRegs, *DoneBB); 401 402 return true; 403 } 404 405 /// If MBBI references a pseudo instruction that should be expanded here, 406 /// do the expansion and return true. Otherwise return false. 407 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, 408 MachineBasicBlock::iterator MBBI, 409 MachineBasicBlock::iterator &NextMBBI) { 410 MachineInstr &MI = *MBBI; 411 unsigned Opcode = MI.getOpcode(); 412 switch (Opcode) { 413 default: 414 break; 415 416 case AArch64::ADDWrr: 417 case AArch64::SUBWrr: 418 case AArch64::ADDXrr: 419 case AArch64::SUBXrr: 420 case AArch64::ADDSWrr: 421 case AArch64::SUBSWrr: 422 case AArch64::ADDSXrr: 423 case AArch64::SUBSXrr: 424 case AArch64::ANDWrr: 425 case AArch64::ANDXrr: 426 case AArch64::BICWrr: 427 case AArch64::BICXrr: 428 case AArch64::ANDSWrr: 429 case AArch64::ANDSXrr: 430 case AArch64::BICSWrr: 431 case AArch64::BICSXrr: 432 case AArch64::EONWrr: 433 case AArch64::EONXrr: 434 case AArch64::EORWrr: 435 case AArch64::EORXrr: 436 case AArch64::ORNWrr: 437 case AArch64::ORNXrr: 438 case AArch64::ORRWrr: 439 case AArch64::ORRXrr: { 440 unsigned Opcode; 441 switch (MI.getOpcode()) { 442 default: 443 return false; 444 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break; 445 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break; 446 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break; 447 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break; 448 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break; 449 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break; 450 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break; 451 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break; 452 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break; 453 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break; 454 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break; 455 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break; 456 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break; 457 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break; 458 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break; 459 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break; 460 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break; 461 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break; 462 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break; 463 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break; 464 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break; 465 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break; 466 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break; 467 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break; 468 } 469 MachineInstrBuilder MIB1 = 470 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode), 471 MI.getOperand(0).getReg()) 472 .add(MI.getOperand(1)) 473 .add(MI.getOperand(2)) 474 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0)); 475 transferImpOps(MI, MIB1, MIB1); 476 MI.eraseFromParent(); 477 return true; 478 } 479 480 case AArch64::LOADgot: { 481 MachineFunction *MF = MBB.getParent(); 482 Register DstReg = MI.getOperand(0).getReg(); 483 const MachineOperand &MO1 = MI.getOperand(1); 484 unsigned Flags = MO1.getTargetFlags(); 485 486 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) { 487 // Tiny codemodel expand to LDR 488 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 489 TII->get(AArch64::LDRXl), DstReg); 490 491 if (MO1.isGlobal()) { 492 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags); 493 } else if (MO1.isSymbol()) { 494 MIB.addExternalSymbol(MO1.getSymbolName(), Flags); 495 } else { 496 assert(MO1.isCPI() && 497 "Only expect globals, externalsymbols, or constant pools"); 498 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags); 499 } 500 } else { 501 // Small codemodel expand into ADRP + LDR. 502 MachineFunction &MF = *MI.getParent()->getParent(); 503 DebugLoc DL = MI.getDebugLoc(); 504 MachineInstrBuilder MIB1 = 505 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg); 506 507 MachineInstrBuilder MIB2; 508 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) { 509 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 510 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32); 511 unsigned DstFlags = MI.getOperand(0).getTargetFlags(); 512 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui)) 513 .addDef(Reg32) 514 .addReg(DstReg, RegState::Kill) 515 .addReg(DstReg, DstFlags | RegState::Implicit); 516 } else { 517 unsigned DstReg = MI.getOperand(0).getReg(); 518 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui)) 519 .add(MI.getOperand(0)) 520 .addUse(DstReg, RegState::Kill); 521 } 522 523 if (MO1.isGlobal()) { 524 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE); 525 MIB2.addGlobalAddress(MO1.getGlobal(), 0, 526 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 527 } else if (MO1.isSymbol()) { 528 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE); 529 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags | 530 AArch64II::MO_PAGEOFF | 531 AArch64II::MO_NC); 532 } else { 533 assert(MO1.isCPI() && 534 "Only expect globals, externalsymbols, or constant pools"); 535 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 536 Flags | AArch64II::MO_PAGE); 537 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), 538 Flags | AArch64II::MO_PAGEOFF | 539 AArch64II::MO_NC); 540 } 541 542 transferImpOps(MI, MIB1, MIB2); 543 } 544 MI.eraseFromParent(); 545 return true; 546 } 547 548 case AArch64::MOVaddr: 549 case AArch64::MOVaddrJT: 550 case AArch64::MOVaddrCP: 551 case AArch64::MOVaddrBA: 552 case AArch64::MOVaddrTLS: 553 case AArch64::MOVaddrEXT: { 554 // Expand into ADRP + ADD. 555 Register DstReg = MI.getOperand(0).getReg(); 556 MachineInstrBuilder MIB1 = 557 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg) 558 .add(MI.getOperand(1)); 559 560 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) { 561 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 562 // We do so by creating a MOVK that sets bits 48-63 of the register to 563 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 564 // the small code model so we can assume a binary size of <= 4GB, which 565 // makes the untagged PC relative offset positive. The binary must also be 566 // loaded into address range [0, 2^48). Both of these properties need to 567 // be ensured at runtime when using tagged addresses. 568 auto Tag = MI.getOperand(1); 569 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3); 570 Tag.setOffset(0x100000000); 571 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg) 572 .addReg(DstReg) 573 .add(Tag) 574 .addImm(48); 575 } 576 577 MachineInstrBuilder MIB2 = 578 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 579 .add(MI.getOperand(0)) 580 .addReg(DstReg) 581 .add(MI.getOperand(2)) 582 .addImm(0); 583 584 transferImpOps(MI, MIB1, MIB2); 585 MI.eraseFromParent(); 586 return true; 587 } 588 case AArch64::ADDlowTLS: 589 // Produce a plain ADD 590 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri)) 591 .add(MI.getOperand(0)) 592 .add(MI.getOperand(1)) 593 .add(MI.getOperand(2)) 594 .addImm(0); 595 MI.eraseFromParent(); 596 return true; 597 598 case AArch64::MOVbaseTLS: { 599 Register DstReg = MI.getOperand(0).getReg(); 600 auto SysReg = AArch64SysReg::TPIDR_EL0; 601 MachineFunction *MF = MBB.getParent(); 602 if (MF->getTarget().getTargetTriple().isOSFuchsia() && 603 MF->getTarget().getCodeModel() == CodeModel::Kernel) 604 SysReg = AArch64SysReg::TPIDR_EL1; 605 else if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP()) 606 SysReg = AArch64SysReg::TPIDR_EL3; 607 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP()) 608 SysReg = AArch64SysReg::TPIDR_EL2; 609 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP()) 610 SysReg = AArch64SysReg::TPIDR_EL1; 611 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) 612 .addImm(SysReg); 613 MI.eraseFromParent(); 614 return true; 615 } 616 617 case AArch64::MOVi32imm: 618 return expandMOVImm(MBB, MBBI, 32); 619 case AArch64::MOVi64imm: 620 return expandMOVImm(MBB, MBBI, 64); 621 case AArch64::RET_ReallyLR: { 622 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the 623 // function and missing live-ins. We are fine in practice because callee 624 // saved register handling ensures the register value is restored before 625 // RET, but we need the undef flag here to appease the MachineVerifier 626 // liveness checks. 627 MachineInstrBuilder MIB = 628 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET)) 629 .addReg(AArch64::LR, RegState::Undef); 630 transferImpOps(MI, MIB, MIB); 631 MI.eraseFromParent(); 632 return true; 633 } 634 case AArch64::CMP_SWAP_8: 635 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB, 636 AArch64::SUBSWrx, 637 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0), 638 AArch64::WZR, NextMBBI); 639 case AArch64::CMP_SWAP_16: 640 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH, 641 AArch64::SUBSWrx, 642 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0), 643 AArch64::WZR, NextMBBI); 644 case AArch64::CMP_SWAP_32: 645 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW, 646 AArch64::SUBSWrs, 647 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 648 AArch64::WZR, NextMBBI); 649 case AArch64::CMP_SWAP_64: 650 return expandCMP_SWAP(MBB, MBBI, 651 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs, 652 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0), 653 AArch64::XZR, NextMBBI); 654 case AArch64::CMP_SWAP_128: 655 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI); 656 657 case AArch64::AESMCrrTied: 658 case AArch64::AESIMCrrTied: { 659 MachineInstrBuilder MIB = 660 BuildMI(MBB, MBBI, MI.getDebugLoc(), 661 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr : 662 AArch64::AESIMCrr)) 663 .add(MI.getOperand(0)) 664 .add(MI.getOperand(1)); 665 transferImpOps(MI, MIB, MIB); 666 MI.eraseFromParent(); 667 return true; 668 } 669 case AArch64::IRGstack: { 670 MachineFunction &MF = *MBB.getParent(); 671 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 672 const AArch64FrameLowering *TFI = 673 MF.getSubtarget<AArch64Subtarget>().getFrameLowering(); 674 675 // IRG does not allow immediate offset. getTaggedBasePointerOffset should 676 // almost always point to SP-after-prologue; if not, emit a longer 677 // instruction sequence. 678 int BaseOffset = -AFI->getTaggedBasePointerOffset(); 679 unsigned FrameReg; 680 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference( 681 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg, 682 /*PreferFP=*/false, 683 /*ForSimm=*/true); 684 Register SrcReg = FrameReg; 685 if (FrameRegOffset) { 686 // Use output register as temporary. 687 SrcReg = MI.getOperand(0).getReg(); 688 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, 689 FrameRegOffset, TII); 690 } 691 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) 692 .add(MI.getOperand(0)) 693 .addUse(SrcReg) 694 .add(MI.getOperand(2)); 695 MI.eraseFromParent(); 696 return true; 697 } 698 case AArch64::TAGPstack: { 699 int64_t Offset = MI.getOperand(2).getImm(); 700 BuildMI(MBB, MBBI, MI.getDebugLoc(), 701 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG)) 702 .add(MI.getOperand(0)) 703 .add(MI.getOperand(1)) 704 .addImm(std::abs(Offset)) 705 .add(MI.getOperand(4)); 706 MI.eraseFromParent(); 707 return true; 708 } 709 case AArch64::STGloop: 710 case AArch64::STZGloop: 711 return expandSetTagLoop(MBB, MBBI, NextMBBI); 712 } 713 return false; 714 } 715 716 /// Iterate over the instructions in basic block MBB and expand any 717 /// pseudo instructions. Return true if anything was modified. 718 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) { 719 bool Modified = false; 720 721 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 722 while (MBBI != E) { 723 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 724 Modified |= expandMI(MBB, MBBI, NMBBI); 725 MBBI = NMBBI; 726 } 727 728 return Modified; 729 } 730 731 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 732 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 733 734 bool Modified = false; 735 for (auto &MBB : MF) 736 Modified |= expandMBB(MBB); 737 return Modified; 738 } 739 740 /// Returns an instance of the pseudo instruction expansion pass. 741 FunctionPass *llvm::createAArch64ExpandPseudoPass() { 742 return new AArch64ExpandPseudo(); 743 } 744