1 //===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands atomic pseudo instructions into 10 // target instructions post RA. With such method, LL/SC loop is considered as 11 // a whole blob and make spilling unlikely happens in the LL/SC loop. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPC.h" 17 #include "PPCInstrInfo.h" 18 #include "PPCTargetMachine.h" 19 20 #include "llvm/CodeGen/LivePhysRegs.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "ppc-atomic-expand" 27 28 namespace { 29 30 class PPCExpandAtomicPseudo : public MachineFunctionPass { 31 public: 32 const PPCInstrInfo *TII; 33 const PPCRegisterInfo *TRI; 34 static char ID; 35 36 PPCExpandAtomicPseudo() : MachineFunctionPass(ID) { 37 initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); 38 } 39 40 bool runOnMachineFunction(MachineFunction &MF) override; 41 42 private: 43 bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI, 44 MachineBasicBlock::iterator &NMBBI); 45 bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI, 46 MachineBasicBlock::iterator &NMBBI); 47 bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI, 48 MachineBasicBlock::iterator &NMBBI); 49 }; 50 51 static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB, 52 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 53 Register Dest0, Register Dest1, Register Src0, 54 Register Src1) { 55 const MCInstrDesc &OR = TII->get(PPC::OR8); 56 const MCInstrDesc &XOR = TII->get(PPC::XOR8); 57 if (Dest0 == Src1 && Dest1 == Src0) { 58 // The most tricky case, swapping values. 59 BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); 60 BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1); 61 BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); 62 } else if (Dest0 != Src0 || Dest1 != Src1) { 63 if (Dest0 == Src1 || Dest1 != Src0) { 64 BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); 65 BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); 66 } else { 67 BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); 68 BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); 69 } 70 } 71 } 72 73 bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { 74 bool Changed = false; 75 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); 76 TRI = &TII->getRegisterInfo(); 77 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 78 MachineBasicBlock &MBB = *I; 79 for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); 80 MBBI != MBBE;) { 81 MachineInstr &MI = *MBBI; 82 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 83 Changed |= expandMI(MBB, MI, NMBBI); 84 MBBI = NMBBI; 85 } 86 } 87 if (Changed) 88 MF.RenumberBlocks(); 89 return Changed; 90 } 91 92 bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI, 93 MachineBasicBlock::iterator &NMBBI) { 94 switch (MI.getOpcode()) { 95 case PPC::ATOMIC_SWAP_I128: 96 case PPC::ATOMIC_LOAD_ADD_I128: 97 case PPC::ATOMIC_LOAD_SUB_I128: 98 case PPC::ATOMIC_LOAD_XOR_I128: 99 case PPC::ATOMIC_LOAD_NAND_I128: 100 case PPC::ATOMIC_LOAD_AND_I128: 101 case PPC::ATOMIC_LOAD_OR_I128: 102 return expandAtomicRMW128(MBB, MI, NMBBI); 103 case PPC::ATOMIC_CMP_SWAP_I128: 104 return expandAtomicCmpSwap128(MBB, MI, NMBBI); 105 default: 106 return false; 107 } 108 } 109 110 bool PPCExpandAtomicPseudo::expandAtomicRMW128( 111 MachineBasicBlock &MBB, MachineInstr &MI, 112 MachineBasicBlock::iterator &NMBBI) { 113 const MCInstrDesc &LL = TII->get(PPC::LQARX); 114 const MCInstrDesc &SC = TII->get(PPC::STQCX); 115 DebugLoc DL = MI.getDebugLoc(); 116 MachineFunction *MF = MBB.getParent(); 117 const BasicBlock *BB = MBB.getBasicBlock(); 118 // Create layout of control flow. 119 MachineFunction::iterator MFI = ++MBB.getIterator(); 120 MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB); 121 MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); 122 MF->insert(MFI, LoopMBB); 123 MF->insert(MFI, ExitMBB); 124 ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), 125 MBB.end()); 126 ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 127 MBB.addSuccessor(LoopMBB); 128 129 // For non-min/max operations, control flow is kinda like: 130 // MBB: 131 // ... 132 // LoopMBB: 133 // lqarx in, ptr 134 // addc out.sub_x1, in.sub_x1, op.sub_x1 135 // adde out.sub_x0, in.sub_x0, op.sub_x0 136 // stqcx out, ptr 137 // bne- LoopMBB 138 // ExitMBB: 139 // ... 140 Register Old = MI.getOperand(0).getReg(); 141 Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); 142 Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); 143 Register Scratch = MI.getOperand(1).getReg(); 144 Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); 145 Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); 146 Register RA = MI.getOperand(2).getReg(); 147 Register RB = MI.getOperand(3).getReg(); 148 Register IncrLo = MI.getOperand(4).getReg(); 149 Register IncrHi = MI.getOperand(5).getReg(); 150 unsigned RMWOpcode = MI.getOpcode(); 151 152 MachineBasicBlock *CurrentMBB = LoopMBB; 153 BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); 154 155 switch (RMWOpcode) { 156 case PPC::ATOMIC_SWAP_I128: 157 PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, 158 IncrHi, IncrLo); 159 break; 160 case PPC::ATOMIC_LOAD_ADD_I128: 161 BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo) 162 .addReg(IncrLo) 163 .addReg(OldLo); 164 BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi) 165 .addReg(IncrHi) 166 .addReg(OldHi); 167 break; 168 case PPC::ATOMIC_LOAD_SUB_I128: 169 BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo) 170 .addReg(IncrLo) 171 .addReg(OldLo); 172 BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi) 173 .addReg(IncrHi) 174 .addReg(OldHi); 175 break; 176 177 #define TRIVIAL_ATOMICRMW(Opcode, Instr) \ 178 case Opcode: \ 179 BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \ 180 .addReg(IncrLo) \ 181 .addReg(OldLo); \ 182 BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \ 183 .addReg(IncrHi) \ 184 .addReg(OldHi); \ 185 break 186 187 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8); 188 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8); 189 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8); 190 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8); 191 #undef TRIVIAL_ATOMICRMW 192 default: 193 llvm_unreachable("Unhandled atomic RMW operation"); 194 } 195 BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); 196 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 197 .addImm(PPC::PRED_NE) 198 .addReg(PPC::CR0) 199 .addMBB(LoopMBB); 200 CurrentMBB->addSuccessor(LoopMBB); 201 CurrentMBB->addSuccessor(ExitMBB); 202 recomputeLiveIns(*LoopMBB); 203 recomputeLiveIns(*ExitMBB); 204 NMBBI = MBB.end(); 205 MI.eraseFromParent(); 206 return true; 207 } 208 209 bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( 210 MachineBasicBlock &MBB, MachineInstr &MI, 211 MachineBasicBlock::iterator &NMBBI) { 212 const MCInstrDesc &LL = TII->get(PPC::LQARX); 213 const MCInstrDesc &SC = TII->get(PPC::STQCX); 214 DebugLoc DL = MI.getDebugLoc(); 215 MachineFunction *MF = MBB.getParent(); 216 const BasicBlock *BB = MBB.getBasicBlock(); 217 Register Old = MI.getOperand(0).getReg(); 218 Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); 219 Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); 220 Register Scratch = MI.getOperand(1).getReg(); 221 Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); 222 Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); 223 Register RA = MI.getOperand(2).getReg(); 224 Register RB = MI.getOperand(3).getReg(); 225 Register CmpLo = MI.getOperand(4).getReg(); 226 Register CmpHi = MI.getOperand(5).getReg(); 227 Register NewLo = MI.getOperand(6).getReg(); 228 Register NewHi = MI.getOperand(7).getReg(); 229 // Create layout of control flow. 230 // loop: 231 // old = lqarx ptr 232 // <compare old, cmp> 233 // bne 0, fail 234 // succ: 235 // stqcx new ptr 236 // bne 0, loop 237 // b exit 238 // fail: 239 // stqcx old ptr 240 // exit: 241 // .... 242 MachineFunction::iterator MFI = ++MBB.getIterator(); 243 MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB); 244 MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB); 245 MachineBasicBlock *CmpFailMBB = MF->CreateMachineBasicBlock(BB); 246 MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); 247 MF->insert(MFI, LoopCmpMBB); 248 MF->insert(MFI, CmpSuccMBB); 249 MF->insert(MFI, CmpFailMBB); 250 MF->insert(MFI, ExitMBB); 251 ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), 252 MBB.end()); 253 ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 254 MBB.addSuccessor(LoopCmpMBB); 255 // Build loop. 256 MachineBasicBlock *CurrentMBB = LoopCmpMBB; 257 BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); 258 BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchLo) 259 .addReg(OldLo) 260 .addReg(CmpLo); 261 BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchHi) 262 .addReg(OldHi) 263 .addReg(CmpHi); 264 BuildMI(CurrentMBB, DL, TII->get(PPC::OR8_rec), ScratchLo) 265 .addReg(ScratchLo) 266 .addReg(ScratchHi); 267 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 268 .addImm(PPC::PRED_NE) 269 .addReg(PPC::CR0) 270 .addMBB(CmpFailMBB); 271 CurrentMBB->addSuccessor(CmpSuccMBB); 272 CurrentMBB->addSuccessor(CmpFailMBB); 273 // Build succ. 274 CurrentMBB = CmpSuccMBB; 275 PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, 276 NewHi, NewLo); 277 BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); 278 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 279 .addImm(PPC::PRED_NE) 280 .addReg(PPC::CR0) 281 .addMBB(LoopCmpMBB); 282 BuildMI(CurrentMBB, DL, TII->get(PPC::B)).addMBB(ExitMBB); 283 CurrentMBB->addSuccessor(LoopCmpMBB); 284 CurrentMBB->addSuccessor(ExitMBB); 285 CurrentMBB = CmpFailMBB; 286 BuildMI(CurrentMBB, DL, SC).addReg(Old).addReg(RA).addReg(RB); 287 CurrentMBB->addSuccessor(ExitMBB); 288 289 recomputeLiveIns(*LoopCmpMBB); 290 recomputeLiveIns(*CmpSuccMBB); 291 recomputeLiveIns(*CmpFailMBB); 292 recomputeLiveIns(*ExitMBB); 293 NMBBI = MBB.end(); 294 MI.eraseFromParent(); 295 return true; 296 } 297 298 } // namespace 299 300 INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic", 301 false, false) 302 303 char PPCExpandAtomicPseudo::ID = 0; 304 FunctionPass *llvm::createPPCExpandAtomicPseudoPass() { 305 return new PPCExpandAtomicPseudo(); 306 } 307