1 //===-- PPCExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands atomic pseudo instructions into
10 // target instructions post RA. With such method, LL/SC loop is considered as
11 // a whole blob and make spilling unlikely happens in the LL/SC loop.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPC.h"
17 #include "PPCInstrInfo.h"
18
19 #include "llvm/CodeGen/LivePhysRegs.h"
20 #include "llvm/CodeGen/MachineFunctionPass.h"
21 #include "llvm/CodeGen/MachineInstrBuilder.h"
22
23 using namespace llvm;
24
25 #define DEBUG_TYPE "ppc-atomic-expand"
26
27 namespace {
28
29 class PPCExpandAtomicPseudo : public MachineFunctionPass {
30 public:
31 const PPCInstrInfo *TII;
32 const PPCRegisterInfo *TRI;
33 static char ID;
34
PPCExpandAtomicPseudo()35 PPCExpandAtomicPseudo() : MachineFunctionPass(ID) {}
36
37 bool runOnMachineFunction(MachineFunction &MF) override;
38
39 private:
40 bool expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
41 MachineBasicBlock::iterator &NMBBI);
42 bool expandAtomicRMW128(MachineBasicBlock &MBB, MachineInstr &MI,
43 MachineBasicBlock::iterator &NMBBI);
44 bool expandAtomicCmpSwap128(MachineBasicBlock &MBB, MachineInstr &MI,
45 MachineBasicBlock::iterator &NMBBI);
46 };
47
PairedCopy(const PPCInstrInfo * TII,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,Register Dest0,Register Dest1,Register Src0,Register Src1)48 static void PairedCopy(const PPCInstrInfo *TII, MachineBasicBlock &MBB,
49 MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
50 Register Dest0, Register Dest1, Register Src0,
51 Register Src1) {
52 const MCInstrDesc &OR = TII->get(PPC::OR8);
53 const MCInstrDesc &XOR = TII->get(PPC::XOR8);
54 if (Dest0 == Src1 && Dest1 == Src0) {
55 // The most tricky case, swapping values.
56 BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1);
57 BuildMI(MBB, MBBI, DL, XOR, Dest1).addReg(Dest0).addReg(Dest1);
58 BuildMI(MBB, MBBI, DL, XOR, Dest0).addReg(Dest0).addReg(Dest1);
59 } else if (Dest0 != Src0 || Dest1 != Src1) {
60 if (Dest0 == Src1 || Dest1 != Src0) {
61 BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1);
62 BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0);
63 } else {
64 BuildMI(MBB, MBBI, DL, OR, Dest0).addReg(Src0).addReg(Src0);
65 BuildMI(MBB, MBBI, DL, OR, Dest1).addReg(Src1).addReg(Src1);
66 }
67 }
68 }
69
runOnMachineFunction(MachineFunction & MF)70 bool PPCExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
71 bool Changed = false;
72 TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
73 TRI = &TII->getRegisterInfo();
74 for (MachineBasicBlock &MBB : MF) {
75 for (MachineBasicBlock::iterator MBBI = MBB.begin(), MBBE = MBB.end();
76 MBBI != MBBE;) {
77 MachineInstr &MI = *MBBI;
78 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
79 Changed |= expandMI(MBB, MI, NMBBI);
80 MBBI = NMBBI;
81 }
82 }
83 if (Changed)
84 MF.RenumberBlocks();
85 return Changed;
86 }
87
expandMI(MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock::iterator & NMBBI)88 bool PPCExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB, MachineInstr &MI,
89 MachineBasicBlock::iterator &NMBBI) {
90 switch (MI.getOpcode()) {
91 case PPC::ATOMIC_SWAP_I128:
92 case PPC::ATOMIC_LOAD_ADD_I128:
93 case PPC::ATOMIC_LOAD_SUB_I128:
94 case PPC::ATOMIC_LOAD_XOR_I128:
95 case PPC::ATOMIC_LOAD_NAND_I128:
96 case PPC::ATOMIC_LOAD_AND_I128:
97 case PPC::ATOMIC_LOAD_OR_I128:
98 return expandAtomicRMW128(MBB, MI, NMBBI);
99 case PPC::ATOMIC_CMP_SWAP_I128:
100 return expandAtomicCmpSwap128(MBB, MI, NMBBI);
101 case PPC::BUILD_QUADWORD: {
102 Register Dst = MI.getOperand(0).getReg();
103 Register DstHi = TRI->getSubReg(Dst, PPC::sub_gp8_x0);
104 Register DstLo = TRI->getSubReg(Dst, PPC::sub_gp8_x1);
105 Register Lo = MI.getOperand(1).getReg();
106 Register Hi = MI.getOperand(2).getReg();
107 PairedCopy(TII, MBB, MI, MI.getDebugLoc(), DstHi, DstLo, Hi, Lo);
108 MI.eraseFromParent();
109 return true;
110 }
111 default:
112 return false;
113 }
114 }
115
expandAtomicRMW128(MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock::iterator & NMBBI)116 bool PPCExpandAtomicPseudo::expandAtomicRMW128(
117 MachineBasicBlock &MBB, MachineInstr &MI,
118 MachineBasicBlock::iterator &NMBBI) {
119 const MCInstrDesc &LL = TII->get(PPC::LQARX);
120 const MCInstrDesc &SC = TII->get(PPC::STQCX);
121 DebugLoc DL = MI.getDebugLoc();
122 MachineFunction *MF = MBB.getParent();
123 const BasicBlock *BB = MBB.getBasicBlock();
124 // Create layout of control flow.
125 MachineFunction::iterator MFI = ++MBB.getIterator();
126 MachineBasicBlock *LoopMBB = MF->CreateMachineBasicBlock(BB);
127 MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
128 MF->insert(MFI, LoopMBB);
129 MF->insert(MFI, ExitMBB);
130 ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
131 MBB.end());
132 ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
133 MBB.addSuccessor(LoopMBB);
134
135 // For non-min/max operations, control flow is kinda like:
136 // MBB:
137 // ...
138 // LoopMBB:
139 // lqarx in, ptr
140 // addc out.sub_x1, in.sub_x1, op.sub_x1
141 // adde out.sub_x0, in.sub_x0, op.sub_x0
142 // stqcx out, ptr
143 // bne- LoopMBB
144 // ExitMBB:
145 // ...
146 Register Old = MI.getOperand(0).getReg();
147 Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0);
148 Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1);
149 Register Scratch = MI.getOperand(1).getReg();
150 Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0);
151 Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1);
152 Register RA = MI.getOperand(2).getReg();
153 Register RB = MI.getOperand(3).getReg();
154 Register IncrLo = MI.getOperand(4).getReg();
155 Register IncrHi = MI.getOperand(5).getReg();
156 unsigned RMWOpcode = MI.getOpcode();
157
158 MachineBasicBlock *CurrentMBB = LoopMBB;
159 BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB);
160
161 switch (RMWOpcode) {
162 case PPC::ATOMIC_SWAP_I128:
163 PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
164 IncrHi, IncrLo);
165 break;
166 case PPC::ATOMIC_LOAD_ADD_I128:
167 BuildMI(CurrentMBB, DL, TII->get(PPC::ADDC8), ScratchLo)
168 .addReg(IncrLo)
169 .addReg(OldLo);
170 BuildMI(CurrentMBB, DL, TII->get(PPC::ADDE8), ScratchHi)
171 .addReg(IncrHi)
172 .addReg(OldHi);
173 break;
174 case PPC::ATOMIC_LOAD_SUB_I128:
175 BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFC8), ScratchLo)
176 .addReg(IncrLo)
177 .addReg(OldLo);
178 BuildMI(CurrentMBB, DL, TII->get(PPC::SUBFE8), ScratchHi)
179 .addReg(IncrHi)
180 .addReg(OldHi);
181 break;
182
183 #define TRIVIAL_ATOMICRMW(Opcode, Instr) \
184 case Opcode: \
185 BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchLo) \
186 .addReg(IncrLo) \
187 .addReg(OldLo); \
188 BuildMI(CurrentMBB, DL, TII->get((Instr)), ScratchHi) \
189 .addReg(IncrHi) \
190 .addReg(OldHi); \
191 break
192
193 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_OR_I128, PPC::OR8);
194 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_XOR_I128, PPC::XOR8);
195 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_AND_I128, PPC::AND8);
196 TRIVIAL_ATOMICRMW(PPC::ATOMIC_LOAD_NAND_I128, PPC::NAND8);
197 #undef TRIVIAL_ATOMICRMW
198 default:
199 llvm_unreachable("Unhandled atomic RMW operation");
200 }
201 BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB);
202 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
203 .addImm(PPC::PRED_NE)
204 .addReg(PPC::CR0)
205 .addMBB(LoopMBB);
206 CurrentMBB->addSuccessor(LoopMBB);
207 CurrentMBB->addSuccessor(ExitMBB);
208 fullyRecomputeLiveIns({ExitMBB, LoopMBB});
209 NMBBI = MBB.end();
210 MI.eraseFromParent();
211 return true;
212 }
213
expandAtomicCmpSwap128(MachineBasicBlock & MBB,MachineInstr & MI,MachineBasicBlock::iterator & NMBBI)214 bool PPCExpandAtomicPseudo::expandAtomicCmpSwap128(
215 MachineBasicBlock &MBB, MachineInstr &MI,
216 MachineBasicBlock::iterator &NMBBI) {
217 const MCInstrDesc &LL = TII->get(PPC::LQARX);
218 const MCInstrDesc &SC = TII->get(PPC::STQCX);
219 DebugLoc DL = MI.getDebugLoc();
220 MachineFunction *MF = MBB.getParent();
221 const BasicBlock *BB = MBB.getBasicBlock();
222 Register Old = MI.getOperand(0).getReg();
223 Register OldHi = TRI->getSubReg(Old, PPC::sub_gp8_x0);
224 Register OldLo = TRI->getSubReg(Old, PPC::sub_gp8_x1);
225 Register Scratch = MI.getOperand(1).getReg();
226 Register ScratchHi = TRI->getSubReg(Scratch, PPC::sub_gp8_x0);
227 Register ScratchLo = TRI->getSubReg(Scratch, PPC::sub_gp8_x1);
228 Register RA = MI.getOperand(2).getReg();
229 Register RB = MI.getOperand(3).getReg();
230 Register CmpLo = MI.getOperand(4).getReg();
231 Register CmpHi = MI.getOperand(5).getReg();
232 Register NewLo = MI.getOperand(6).getReg();
233 Register NewHi = MI.getOperand(7).getReg();
234 // Create layout of control flow.
235 // loop:
236 // old = lqarx ptr
237 // <compare old, cmp>
238 // bne 0, exit
239 // succ:
240 // stqcx new ptr
241 // bne 0, loop
242 // exit:
243 // ....
244 MachineFunction::iterator MFI = ++MBB.getIterator();
245 MachineBasicBlock *LoopCmpMBB = MF->CreateMachineBasicBlock(BB);
246 MachineBasicBlock *CmpSuccMBB = MF->CreateMachineBasicBlock(BB);
247 MachineBasicBlock *ExitMBB = MF->CreateMachineBasicBlock(BB);
248 MF->insert(MFI, LoopCmpMBB);
249 MF->insert(MFI, CmpSuccMBB);
250 MF->insert(MFI, ExitMBB);
251 ExitMBB->splice(ExitMBB->begin(), &MBB, std::next(MI.getIterator()),
252 MBB.end());
253 ExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
254 MBB.addSuccessor(LoopCmpMBB);
255 // Build loop.
256 MachineBasicBlock *CurrentMBB = LoopCmpMBB;
257 BuildMI(CurrentMBB, DL, LL, Old).addReg(RA).addReg(RB);
258 BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchLo)
259 .addReg(OldLo)
260 .addReg(CmpLo);
261 BuildMI(CurrentMBB, DL, TII->get(PPC::XOR8), ScratchHi)
262 .addReg(OldHi)
263 .addReg(CmpHi);
264 BuildMI(CurrentMBB, DL, TII->get(PPC::OR8_rec), ScratchLo)
265 .addReg(ScratchLo)
266 .addReg(ScratchHi);
267 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
268 .addImm(PPC::PRED_NE)
269 .addReg(PPC::CR0)
270 .addMBB(ExitMBB);
271 CurrentMBB->addSuccessor(CmpSuccMBB);
272 CurrentMBB->addSuccessor(ExitMBB);
273 // Build succ.
274 CurrentMBB = CmpSuccMBB;
275 PairedCopy(TII, *CurrentMBB, CurrentMBB->end(), DL, ScratchHi, ScratchLo,
276 NewHi, NewLo);
277 BuildMI(CurrentMBB, DL, SC).addReg(Scratch).addReg(RA).addReg(RB);
278 BuildMI(CurrentMBB, DL, TII->get(PPC::BCC))
279 .addImm(PPC::PRED_NE)
280 .addReg(PPC::CR0)
281 .addMBB(LoopCmpMBB);
282 CurrentMBB->addSuccessor(LoopCmpMBB);
283 CurrentMBB->addSuccessor(ExitMBB);
284
285 fullyRecomputeLiveIns({ExitMBB, CmpSuccMBB, LoopCmpMBB});
286 NMBBI = MBB.end();
287 MI.eraseFromParent();
288 return true;
289 }
290
291 } // namespace
292
293 INITIALIZE_PASS(PPCExpandAtomicPseudo, DEBUG_TYPE, "PowerPC Expand Atomic",
294 false, false)
295
296 char PPCExpandAtomicPseudo::ID = 0;
createPPCExpandAtomicPseudoPass()297 FunctionPass *llvm::createPPCExpandAtomicPseudoPass() {
298 return new PPCExpandAtomicPseudo();
299 }
300