1 //===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands atomic pseudo instructions into 10 // target instructions post RA. With such method, LL/SC loop is considered as 11 // a whole blob and make spilling unlikely happens in the LL/SC loop. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPC.h" 17 #include "PPCInstrInfo.h" 18 #include "PPCTargetMachine.h" 19 20 #include "llvm/CodeGen/LivePhysRegs.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "ppc-atomic-expand" 27 28 namespace { 29 30 class PPCExpandAtomicPseudo : public MachineFunctionPass { 31 public: 32 const PPCInstrInfo *TII; 33 const PPCRegisterInfo *TRI; 34 static char ID; 35 36 PPCExpandAtomicPseudo() : MachineFunctionPass(ID) { 37 initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); 38 } 39 40 bool runOnMachineFunction(MachineFunction &MF) override; 41 42 private: 43 bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI, 44 MachineBasicBlock::iterator &NMBBI); 45 bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI, 46 MachineBasicBlock::iterator &NMBBI); 47 bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI, 48 MachineBasicBlock::iterator &NMBBI); 49 }; 50 51 static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB, 52 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 53 Register Dest0, Register Dest1, Register Src0, 54 Register Src1) { 55 const MCInstrDesc &OR = TII->get(PPC::OR8); 56 const MCInstrDesc &XOR = TII->get(PPC::XOR8); 57 if (Dest0 == Src1 && Dest1 == Src0) { 58 // The most tricky case, swapping values. 59 BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); 60 BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1); 61 BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); 62 } else if (Dest0 != Src0 || Dest1 != Src1) { 63 if (Dest0 == Src1 || Dest1 != Src0) { 64 BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); 65 BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); 66 } else { 67 BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); 68 BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); 69 } 70 } 71 } 72 73 bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { 74 bool Changed = false; 75 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); 76 TRI = &TII->getRegisterInfo(); 77 for (MachineBasicBlock &MBB : MF) { 78 for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); 79 MBBI != MBBE;) { 80 MachineInstr &MI = *MBBI; 81 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 82 Changed |= expandMI(MBB, MI, NMBBI); 83 MBBI = NMBBI; 84 } 85 } 86 if (Changed) 87 MF.RenumberBlocks(); 88 return Changed; 89 } 90 91 bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI, 92 MachineBasicBlock::iterator &NMBBI) { 93 switch (MI.getOpcode()) { 94 case PPC::ATOMIC_SWAP_I128: 95 case PPC::ATOMIC_LOAD_ADD_I128: 96 case PPC::ATOMIC_LOAD_SUB_I128: 97 case PPC::ATOMIC_LOAD_XOR_I128: 98 case PPC::ATOMIC_LOAD_NAND_I128: 99 case PPC::ATOMIC_LOAD_AND_I128: 100 case PPC::ATOMIC_LOAD_OR_I128: 101 return expandAtomicRMW128(MBB, MI, NMBBI); 102 case PPC::ATOMIC_CMP_SWAP_I128: 103 return expandAtomicCmpSwap128(MBB, MI, NMBBI); 104 case PPC::BUILD_QUADWORD: { 105 Register Dst = MI.getOperand(0).getReg(); 106 Register DstHi = TRI->getSubReg(Dst, PPC::sub_gp8_x0); 107 Register DstLo = TRI->getSubReg(Dst, PPC::sub_gp8_x1); 108 Register Lo = MI.getOperand(1).getReg(); 109 Register Hi = MI.getOperand(2).getReg(); 110 PairedCopy(TII, MBB, MI, MI.getDebugLoc(), DstHi, DstLo, Hi, Lo); 111 MI.eraseFromParent(); 112 return true; 113 } 114 default: 115 return false; 116 } 117 } 118 119 bool PPCExpandAtomicPseudo::expandAtomicRMW128( 120 MachineBasicBlock &MBB, MachineInstr &MI, 121 MachineBasicBlock::iterator &NMBBI) { 122 const MCInstrDesc &LL = TII->get(PPC::LQARX); 123 const MCInstrDesc &SC = TII->get(PPC::STQCX); 124 DebugLoc DL = MI.getDebugLoc(); 125 MachineFunction *MF = MBB.getParent(); 126 const BasicBlock *BB = MBB.getBasicBlock(); 127 // Create layout of control flow. 128 MachineFunction::iterator MFI = ++MBB.getIterator(); 129 MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB); 130 MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); 131 MF->insert(MFI, LoopMBB); 132 MF->insert(MFI, ExitMBB); 133 ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), 134 MBB.end()); 135 ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 136 MBB.addSuccessor(LoopMBB); 137 138 // For non-min/max operations, control flow is kinda like: 139 // MBB: 140 // ... 141 // LoopMBB: 142 // lqarx in, ptr 143 // addc out.sub_x1, in.sub_x1, op.sub_x1 144 // adde out.sub_x0, in.sub_x0, op.sub_x0 145 // stqcx out, ptr 146 // bne- LoopMBB 147 // ExitMBB: 148 // ... 149 Register Old = MI.getOperand(0).getReg(); 150 Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); 151 Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); 152 Register Scratch = MI.getOperand(1).getReg(); 153 Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); 154 Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); 155 Register RA = MI.getOperand(2).getReg(); 156 Register RB = MI.getOperand(3).getReg(); 157 Register IncrLo = MI.getOperand(4).getReg(); 158 Register IncrHi = MI.getOperand(5).getReg(); 159 unsigned RMWOpcode = MI.getOpcode(); 160 161 MachineBasicBlock *CurrentMBB = LoopMBB; 162 BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); 163 164 switch (RMWOpcode) { 165 case PPC::ATOMIC_SWAP_I128: 166 PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, 167 IncrHi, IncrLo); 168 break; 169 case PPC::ATOMIC_LOAD_ADD_I128: 170 BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo) 171 .addReg(IncrLo) 172 .addReg(OldLo); 173 BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi) 174 .addReg(IncrHi) 175 .addReg(OldHi); 176 break; 177 case PPC::ATOMIC_LOAD_SUB_I128: 178 BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo) 179 .addReg(IncrLo) 180 .addReg(OldLo); 181 BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi) 182 .addReg(IncrHi) 183 .addReg(OldHi); 184 break; 185 186 #define TRIVIAL_ATOMICRMW(Opcode, Instr) \ 187 case Opcode: \ 188 BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \ 189 .addReg(IncrLo) \ 190 .addReg(OldLo); \ 191 BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \ 192 .addReg(IncrHi) \ 193 .addReg(OldHi); \ 194 break 195 196 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8); 197 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8); 198 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8); 199 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8); 200 #undef TRIVIAL_ATOMICRMW 201 default: 202 llvm_unreachable("Unhandled atomic RMW operation"); 203 } 204 BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); 205 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 206 .addImm(PPC::PRED_NE) 207 .addReg(PPC::CR0) 208 .addMBB(LoopMBB); 209 CurrentMBB->addSuccessor(LoopMBB); 210 CurrentMBB->addSuccessor(ExitMBB); 211 fullyRecomputeLiveIns({ExitMBB, LoopMBB}); 212 NMBBI = MBB.end(); 213 MI.eraseFromParent(); 214 return true; 215 } 216 217 bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( 218 MachineBasicBlock &MBB, MachineInstr &MI, 219 MachineBasicBlock::iterator &NMBBI) { 220 const MCInstrDesc &LL = TII->get(PPC::LQARX); 221 const MCInstrDesc &SC = TII->get(PPC::STQCX); 222 DebugLoc DL = MI.getDebugLoc(); 223 MachineFunction *MF = MBB.getParent(); 224 const BasicBlock *BB = MBB.getBasicBlock(); 225 Register Old = MI.getOperand(0).getReg(); 226 Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); 227 Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); 228 Register Scratch = MI.getOperand(1).getReg(); 229 Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); 230 Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); 231 Register RA = MI.getOperand(2).getReg(); 232 Register RB = MI.getOperand(3).getReg(); 233 Register CmpLo = MI.getOperand(4).getReg(); 234 Register CmpHi = MI.getOperand(5).getReg(); 235 Register NewLo = MI.getOperand(6).getReg(); 236 Register NewHi = MI.getOperand(7).getReg(); 237 // Create layout of control flow. 238 // loop: 239 // old = lqarx ptr 240 // <compare old, cmp> 241 // bne 0, exit 242 // succ: 243 // stqcx new ptr 244 // bne 0, loop 245 // exit: 246 // .... 247 MachineFunction::iterator MFI = ++MBB.getIterator(); 248 MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB); 249 MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB); 250 MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); 251 MF->insert(MFI, LoopCmpMBB); 252 MF->insert(MFI, CmpSuccMBB); 253 MF->insert(MFI, ExitMBB); 254 ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), 255 MBB.end()); 256 ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 257 MBB.addSuccessor(LoopCmpMBB); 258 // Build loop. 259 MachineBasicBlock *CurrentMBB = LoopCmpMBB; 260 BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); 261 BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchLo) 262 .addReg(OldLo) 263 .addReg(CmpLo); 264 BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchHi) 265 .addReg(OldHi) 266 .addReg(CmpHi); 267 BuildMI(CurrentMBB, DL, TII->get(PPC::OR8_rec), ScratchLo) 268 .addReg(ScratchLo) 269 .addReg(ScratchHi); 270 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 271 .addImm(PPC::PRED_NE) 272 .addReg(PPC::CR0) 273 .addMBB(ExitMBB); 274 CurrentMBB->addSuccessor(CmpSuccMBB); 275 CurrentMBB->addSuccessor(ExitMBB); 276 // Build succ. 277 CurrentMBB = CmpSuccMBB; 278 PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, 279 NewHi, NewLo); 280 BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); 281 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 282 .addImm(PPC::PRED_NE) 283 .addReg(PPC::CR0) 284 .addMBB(LoopCmpMBB); 285 CurrentMBB->addSuccessor(LoopCmpMBB); 286 CurrentMBB->addSuccessor(ExitMBB); 287 288 fullyRecomputeLiveIns({ExitMBB, CmpSuccMBB, LoopCmpMBB}); 289 NMBBI = MBB.end(); 290 MI.eraseFromParent(); 291 return true; 292 } 293 294 } // namespace 295 296 INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic", 297 false, false) 298 299 char PPCExpandAtomicPseudo::ID = 0; 300 FunctionPass *llvm::createPPCExpandAtomicPseudoPass() { 301 return new PPCExpandAtomicPseudo(); 302 } 303