1 //===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands atomic pseudo instructions into 10 // target instructions post RA. With such method, LL/SC loop is considered as 11 // a whole blob and make spilling unlikely happens in the LL/SC loop. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "MCTargetDesc/PPCPredicates.h" 16 #include "PPC.h" 17 #include "PPCInstrInfo.h" 18 #include "PPCTargetMachine.h" 19 20 #include "llvm/CodeGen/LivePhysRegs.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 24 using namespace llvm; 25 26 #define DEBUG_TYPE "ppc-atomic-expand" 27 28 namespace { 29 30 class PPCExpandAtomicPseudo : public MachineFunctionPass { 31 public: 32 const PPCInstrInfo *TII; 33 const PPCRegisterInfo *TRI; 34 static char ID; 35 36 PPCExpandAtomicPseudo() : MachineFunctionPass(ID) { 37 initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); 38 } 39 40 bool runOnMachineFunction(MachineFunction &MF) override; 41 42 private: 43 bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI, 44 MachineBasicBlock::iterator &NMBBI); 45 bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI, 46 MachineBasicBlock::iterator &NMBBI); 47 bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI, 48 MachineBasicBlock::iterator &NMBBI); 49 }; 50 51 static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB, 52 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 53 Register Dest0, Register Dest1, Register Src0, 54 Register Src1) { 55 const MCInstrDesc &OR = TII->get(PPC::OR8); 56 const MCInstrDesc &XOR = TII->get(PPC::XOR8); 57 if (Dest0 == Src1 && Dest1 == Src0) { 58 // The most tricky case, swapping values. 59 BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); 60 BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1); 61 BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); 62 } else if (Dest0 != Src0 || Dest1 != Src1) { 63 if (Dest0 == Src1 || Dest1 != Src0) { 64 BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); 65 BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); 66 } else { 67 BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); 68 BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); 69 } 70 } 71 } 72 73 bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { 74 bool Changed = false; 75 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo()); 76 TRI = &TII->getRegisterInfo(); 77 for (MachineBasicBlock &MBB : MF) { 78 for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); 79 MBBI != MBBE;) { 80 MachineInstr &MI = *MBBI; 81 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 82 Changed |= expandMI(MBB, MI, NMBBI); 83 MBBI = NMBBI; 84 } 85 } 86 if (Changed) 87 MF.RenumberBlocks(); 88 return Changed; 89 } 90 91 bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI, 92 MachineBasicBlock::iterator &NMBBI) { 93 switch (MI.getOpcode()) { 94 case PPC::ATOMIC_SWAP_I128: 95 case PPC::ATOMIC_LOAD_ADD_I128: 96 case PPC::ATOMIC_LOAD_SUB_I128: 97 case PPC::ATOMIC_LOAD_XOR_I128: 98 case PPC::ATOMIC_LOAD_NAND_I128: 99 case PPC::ATOMIC_LOAD_AND_I128: 100 case PPC::ATOMIC_LOAD_OR_I128: 101 return expandAtomicRMW128(MBB, MI, NMBBI); 102 case PPC::ATOMIC_CMP_SWAP_I128: 103 return expandAtomicCmpSwap128(MBB, MI, NMBBI); 104 case PPC::BUILD_QUADWORD: { 105 Register Dst = MI.getOperand(0).getReg(); 106 Register DstHi = TRI->getSubReg(Dst, PPC::sub_gp8_x0); 107 Register DstLo = TRI->getSubReg(Dst, PPC::sub_gp8_x1); 108 Register Lo = MI.getOperand(1).getReg(); 109 Register Hi = MI.getOperand(2).getReg(); 110 PairedCopy(TII, MBB, MI, MI.getDebugLoc(), DstHi, DstLo, Hi, Lo); 111 MI.eraseFromParent(); 112 return true; 113 } 114 default: 115 return false; 116 } 117 } 118 119 bool PPCExpandAtomicPseudo::expandAtomicRMW128( 120 MachineBasicBlock &MBB, MachineInstr &MI, 121 MachineBasicBlock::iterator &NMBBI) { 122 const MCInstrDesc &LL = TII->get(PPC::LQARX); 123 const MCInstrDesc &SC = TII->get(PPC::STQCX); 124 DebugLoc DL = MI.getDebugLoc(); 125 MachineFunction *MF = MBB.getParent(); 126 const BasicBlock *BB = MBB.getBasicBlock(); 127 // Create layout of control flow. 128 MachineFunction::iterator MFI = ++MBB.getIterator(); 129 MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB); 130 MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); 131 MF->insert(MFI, LoopMBB); 132 MF->insert(MFI, ExitMBB); 133 ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), 134 MBB.end()); 135 ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 136 MBB.addSuccessor(LoopMBB); 137 138 // For non-min/max operations, control flow is kinda like: 139 // MBB: 140 // ... 141 // LoopMBB: 142 // lqarx in, ptr 143 // addc out.sub_x1, in.sub_x1, op.sub_x1 144 // adde out.sub_x0, in.sub_x0, op.sub_x0 145 // stqcx out, ptr 146 // bne- LoopMBB 147 // ExitMBB: 148 // ... 149 Register Old = MI.getOperand(0).getReg(); 150 Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); 151 Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); 152 Register Scratch = MI.getOperand(1).getReg(); 153 Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); 154 Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); 155 Register RA = MI.getOperand(2).getReg(); 156 Register RB = MI.getOperand(3).getReg(); 157 Register IncrLo = MI.getOperand(4).getReg(); 158 Register IncrHi = MI.getOperand(5).getReg(); 159 unsigned RMWOpcode = MI.getOpcode(); 160 161 MachineBasicBlock *CurrentMBB = LoopMBB; 162 BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); 163 164 switch (RMWOpcode) { 165 case PPC::ATOMIC_SWAP_I128: 166 PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, 167 IncrHi, IncrLo); 168 break; 169 case PPC::ATOMIC_LOAD_ADD_I128: 170 BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo) 171 .addReg(IncrLo) 172 .addReg(OldLo); 173 BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi) 174 .addReg(IncrHi) 175 .addReg(OldHi); 176 break; 177 case PPC::ATOMIC_LOAD_SUB_I128: 178 BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo) 179 .addReg(IncrLo) 180 .addReg(OldLo); 181 BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi) 182 .addReg(IncrHi) 183 .addReg(OldHi); 184 break; 185 186 #define TRIVIAL_ATOMICRMW(Opcode, Instr) \ 187 case Opcode: \ 188 BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \ 189 .addReg(IncrLo) \ 190 .addReg(OldLo); \ 191 BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \ 192 .addReg(IncrHi) \ 193 .addReg(OldHi); \ 194 break 195 196 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8); 197 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8); 198 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8); 199 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8); 200 #undef TRIVIAL_ATOMICRMW 201 default: 202 llvm_unreachable("Unhandled atomic RMW operation"); 203 } 204 BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); 205 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 206 .addImm(PPC::PRED_NE) 207 .addReg(PPC::CR0) 208 .addMBB(LoopMBB); 209 CurrentMBB->addSuccessor(LoopMBB); 210 CurrentMBB->addSuccessor(ExitMBB); 211 recomputeLiveIns(*LoopMBB); 212 recomputeLiveIns(*ExitMBB); 213 NMBBI = MBB.end(); 214 MI.eraseFromParent(); 215 return true; 216 } 217 218 bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( 219 MachineBasicBlock &MBB, MachineInstr &MI, 220 MachineBasicBlock::iterator &NMBBI) { 221 const MCInstrDesc &LL = TII->get(PPC::LQARX); 222 const MCInstrDesc &SC = TII->get(PPC::STQCX); 223 DebugLoc DL = MI.getDebugLoc(); 224 MachineFunction *MF = MBB.getParent(); 225 const BasicBlock *BB = MBB.getBasicBlock(); 226 Register Old = MI.getOperand(0).getReg(); 227 Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); 228 Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); 229 Register Scratch = MI.getOperand(1).getReg(); 230 Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); 231 Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); 232 Register RA = MI.getOperand(2).getReg(); 233 Register RB = MI.getOperand(3).getReg(); 234 Register CmpLo = MI.getOperand(4).getReg(); 235 Register CmpHi = MI.getOperand(5).getReg(); 236 Register NewLo = MI.getOperand(6).getReg(); 237 Register NewHi = MI.getOperand(7).getReg(); 238 // Create layout of control flow. 239 // loop: 240 // old = lqarx ptr 241 // <compare old, cmp> 242 // bne 0, fail 243 // succ: 244 // stqcx new ptr 245 // bne 0, loop 246 // b exit 247 // fail: 248 // stqcx old ptr 249 // exit: 250 // .... 251 MachineFunction::iterator MFI = ++MBB.getIterator(); 252 MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB); 253 MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB); 254 MachineBasicBlock *CmpFailMBB = MF->CreateMachineBasicBlock(BB); 255 MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); 256 MF->insert(MFI, LoopCmpMBB); 257 MF->insert(MFI, CmpSuccMBB); 258 MF->insert(MFI, CmpFailMBB); 259 MF->insert(MFI, ExitMBB); 260 ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), 261 MBB.end()); 262 ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 263 MBB.addSuccessor(LoopCmpMBB); 264 // Build loop. 265 MachineBasicBlock *CurrentMBB = LoopCmpMBB; 266 BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); 267 BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchLo) 268 .addReg(OldLo) 269 .addReg(CmpLo); 270 BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchHi) 271 .addReg(OldHi) 272 .addReg(CmpHi); 273 BuildMI(CurrentMBB, DL, TII->get(PPC::OR8_rec), ScratchLo) 274 .addReg(ScratchLo) 275 .addReg(ScratchHi); 276 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 277 .addImm(PPC::PRED_NE) 278 .addReg(PPC::CR0) 279 .addMBB(CmpFailMBB); 280 CurrentMBB->addSuccessor(CmpSuccMBB); 281 CurrentMBB->addSuccessor(CmpFailMBB); 282 // Build succ. 283 CurrentMBB = CmpSuccMBB; 284 PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, 285 NewHi, NewLo); 286 BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); 287 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) 288 .addImm(PPC::PRED_NE) 289 .addReg(PPC::CR0) 290 .addMBB(LoopCmpMBB); 291 BuildMI(CurrentMBB, DL, TII->get(PPC::B)).addMBB(ExitMBB); 292 CurrentMBB->addSuccessor(LoopCmpMBB); 293 CurrentMBB->addSuccessor(ExitMBB); 294 CurrentMBB = CmpFailMBB; 295 BuildMI(CurrentMBB, DL, SC).addReg(Old).addReg(RA).addReg(RB); 296 CurrentMBB->addSuccessor(ExitMBB); 297 298 recomputeLiveIns(*LoopCmpMBB); 299 recomputeLiveIns(*CmpSuccMBB); 300 recomputeLiveIns(*CmpFailMBB); 301 recomputeLiveIns(*ExitMBB); 302 NMBBI = MBB.end(); 303 MI.eraseFromParent(); 304 return true; 305 } 306 307 } // namespace 308 309 INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic", 310 false, false) 311 312 char PPCExpandAtomicPseudo::ID = 0; 313 FunctionPass *llvm::createPPCExpandAtomicPseudoPass() { 314 return new PPCExpandAtomicPseudo(); 315 } 316