//===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file contains a pass that expands atomic pseudo instructions into // target instructions post RA. With such method, LL/SC loop is considered as // a whole blob and make spilling unlikely happens in the LL/SC loop. // //===----------------------------------------------------------------------===// #include "MCTargetDesc/PPCPredicates.h" #include "PPC.h" #include "PPCInstrInfo.h" #include "PPCTargetMachine.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" using namespace llvm; #define DEBUG_TYPE "ppc-atomic-expand" namespace { class PPCExpandAtomicPseudo : public MachineFunctionPass { public: const PPCInstrInfo *TII; const PPCRegisterInfo *TRI; static char ID; PPCExpandAtomicPseudo() : MachineFunctionPass(ID) { initializePPCExpandAtomicPseudoPass(*PassRegistry::getPassRegistry()); } bool runOnMachineFunction(MachineFunction &MF) override; private: bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock::iterator &NMBBI); bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock::iterator &NMBBI); bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock::iterator &NMBBI); }; static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, Register Dest0, Register Dest1, Register Src0, Register Src1) { const MCInstrDesc &OR = TII->get(PPC::OR8); const MCInstrDesc &XOR = TII->get(PPC::XOR8); if (Dest0 == Src1 && Dest1 == Src0) { // The most tricky case, swapping values. BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1); BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1); } else if (Dest0 != Src0 || Dest1 != Src1) { if (Dest0 == Src1 || Dest1 != Src0) { BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); } else { BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0); BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1); } } } bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; TII = static_cast(MF.getSubtarget().getInstrInfo()); TRI = &TII->getRegisterInfo(); for (MachineBasicBlock &MBB : MF) { for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end(); MBBI != MBBE;) { MachineInstr &MI = *MBBI; MachineBasicBlock::iterator NMBBI = std::next(MBBI); Changed |= expandMI(MBB, MI, NMBBI); MBBI = NMBBI; } } if (Changed) MF.RenumberBlocks(); return Changed; } bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock::iterator &NMBBI) { switch (MI.getOpcode()) { case PPC::ATOMIC_SWAP_I128: case PPC::ATOMIC_LOAD_ADD_I128: case PPC::ATOMIC_LOAD_SUB_I128: case PPC::ATOMIC_LOAD_XOR_I128: case PPC::ATOMIC_LOAD_NAND_I128: case PPC::ATOMIC_LOAD_AND_I128: case PPC::ATOMIC_LOAD_OR_I128: return expandAtomicRMW128(MBB, MI, NMBBI); case PPC::ATOMIC_CMP_SWAP_I128: return expandAtomicCmpSwap128(MBB, MI, NMBBI); case PPC::BUILD_QUADWORD: { Register Dst = MI.getOperand(0).getReg(); Register DstHi = TRI->getSubReg(Dst, PPC::sub_gp8_x0); Register DstLo = TRI->getSubReg(Dst, PPC::sub_gp8_x1); Register Lo = MI.getOperand(1).getReg(); Register Hi = MI.getOperand(2).getReg(); PairedCopy(TII, MBB, MI, MI.getDebugLoc(), DstHi, DstLo, Hi, Lo); MI.eraseFromParent(); return true; } default: return false; } } bool PPCExpandAtomicPseudo::expandAtomicRMW128( MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock::iterator &NMBBI) { const MCInstrDesc &LL = TII->get(PPC::LQARX); const MCInstrDesc &SC = TII->get(PPC::STQCX); DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB.getParent(); const BasicBlock *BB = MBB.getBasicBlock(); // Create layout of control flow. MachineFunction::iterator MFI = ++MBB.getIterator(); MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); MF->insert(MFI, LoopMBB); MF->insert(MFI, ExitMBB); ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), MBB.end()); ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); MBB.addSuccessor(LoopMBB); // For non-min/max operations, control flow is kinda like: // MBB: // ... // LoopMBB: // lqarx in, ptr // addc out.sub_x1, in.sub_x1, op.sub_x1 // adde out.sub_x0, in.sub_x0, op.sub_x0 // stqcx out, ptr // bne- LoopMBB // ExitMBB: // ... Register Old = MI.getOperand(0).getReg(); Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); Register Scratch = MI.getOperand(1).getReg(); Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); Register RA = MI.getOperand(2).getReg(); Register RB = MI.getOperand(3).getReg(); Register IncrLo = MI.getOperand(4).getReg(); Register IncrHi = MI.getOperand(5).getReg(); unsigned RMWOpcode = MI.getOpcode(); MachineBasicBlock *CurrentMBB = LoopMBB; BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); switch (RMWOpcode) { case PPC::ATOMIC_SWAP_I128: PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, IncrHi, IncrLo); break; case PPC::ATOMIC_LOAD_ADD_I128: BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo) .addReg(IncrLo) .addReg(OldLo); BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi) .addReg(IncrHi) .addReg(OldHi); break; case PPC::ATOMIC_LOAD_SUB_I128: BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo) .addReg(IncrLo) .addReg(OldLo); BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi) .addReg(IncrHi) .addReg(OldHi); break; #define TRIVIAL_ATOMICRMW(Opcode, Instr) \ case Opcode: \ BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \ .addReg(IncrLo) \ .addReg(OldLo); \ BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \ .addReg(IncrHi) \ .addReg(OldHi); \ break TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8); TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8); TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8); TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8); #undef TRIVIAL_ATOMICRMW default: llvm_unreachable("Unhandled atomic RMW operation"); } BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE) .addReg(PPC::CR0) .addMBB(LoopMBB); CurrentMBB->addSuccessor(LoopMBB); CurrentMBB->addSuccessor(ExitMBB); recomputeLiveIns(*LoopMBB); recomputeLiveIns(*ExitMBB); NMBBI = MBB.end(); MI.eraseFromParent(); return true; } bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128( MachineBasicBlock &MBB, MachineInstr &MI, MachineBasicBlock::iterator &NMBBI) { const MCInstrDesc &LL = TII->get(PPC::LQARX); const MCInstrDesc &SC = TII->get(PPC::STQCX); DebugLoc DL = MI.getDebugLoc(); MachineFunction *MF = MBB.getParent(); const BasicBlock *BB = MBB.getBasicBlock(); Register Old = MI.getOperand(0).getReg(); Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0); Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1); Register Scratch = MI.getOperand(1).getReg(); Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0); Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1); Register RA = MI.getOperand(2).getReg(); Register RB = MI.getOperand(3).getReg(); Register CmpLo = MI.getOperand(4).getReg(); Register CmpHi = MI.getOperand(5).getReg(); Register NewLo = MI.getOperand(6).getReg(); Register NewHi = MI.getOperand(7).getReg(); // Create layout of control flow. // loop: // old = lqarx ptr // // bne 0, fail // succ: // stqcx new ptr // bne 0, loop // b exit // fail: // stqcx old ptr // exit: // .... MachineFunction::iterator MFI = ++MBB.getIterator(); MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *CmpFailMBB = MF->CreateMachineBasicBlock(BB); MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB); MF->insert(MFI, LoopCmpMBB); MF->insert(MFI, CmpSuccMBB); MF->insert(MFI, CmpFailMBB); MF->insert(MFI, ExitMBB); ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()), MBB.end()); ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); MBB.addSuccessor(LoopCmpMBB); // Build loop. MachineBasicBlock *CurrentMBB = LoopCmpMBB; BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB); BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchLo) .addReg(OldLo) .addReg(CmpLo); BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchHi) .addReg(OldHi) .addReg(CmpHi); BuildMI(CurrentMBB, DL, TII->get(PPC::OR8_rec), ScratchLo) .addReg(ScratchLo) .addReg(ScratchHi); BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE) .addReg(PPC::CR0) .addMBB(CmpFailMBB); CurrentMBB->addSuccessor(CmpSuccMBB); CurrentMBB->addSuccessor(CmpFailMBB); // Build succ. CurrentMBB = CmpSuccMBB; PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo, NewHi, NewLo); BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB); BuildMI(CurrentMBB, DL, TII->get(PPC::BCC)) .addImm(PPC::PRED_NE) .addReg(PPC::CR0) .addMBB(LoopCmpMBB); BuildMI(CurrentMBB, DL, TII->get(PPC::B)).addMBB(ExitMBB); CurrentMBB->addSuccessor(LoopCmpMBB); CurrentMBB->addSuccessor(ExitMBB); CurrentMBB = CmpFailMBB; BuildMI(CurrentMBB, DL, SC).addReg(Old).addReg(RA).addReg(RB); CurrentMBB->addSuccessor(ExitMBB); recomputeLiveIns(*LoopCmpMBB); recomputeLiveIns(*CmpSuccMBB); recomputeLiveIns(*CmpFailMBB); recomputeLiveIns(*ExitMBB); NMBBI = MBB.end(); MI.eraseFromParent(); return true; } } // namespace INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic", false, false) char PPCExpandAtomicPseudo::ID = 0; FunctionPass *llvm::createPPCExpandAtomicPseudoPass() { return new PPCExpandAtomicPseudo(); }