xref: /freebsd/contrib/llvm-project/llvm/lib/Target/LoongArch/LoongArchExpandAtomicPseudoInsts.cpp (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 //==- LoongArchExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands atomic pseudo instructions into
10 // target instructions. This pass should be run at the last possible moment,
11 // avoiding the possibility for other passes to break the requirements for
12 // forward progress in the LL/SC block.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "LoongArch.h"
17 #include "LoongArchInstrInfo.h"
18 #include "LoongArchTargetMachine.h"
19 
20 #include "llvm/CodeGen/LivePhysRegs.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 
24 using namespace llvm;
25 
26 #define LoongArch_EXPAND_ATOMIC_PSEUDO_NAME                                    \
27   "LoongArch atomic pseudo instruction expansion pass"
28 
29 namespace {
30 
31 class LoongArchExpandAtomicPseudo : public MachineFunctionPass {
32 public:
33   const LoongArchInstrInfo *TII;
34   static char ID;
35 
36   LoongArchExpandAtomicPseudo() : MachineFunctionPass(ID) {
37     initializeLoongArchExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
38   }
39 
40   bool runOnMachineFunction(MachineFunction &MF) override;
41 
42   StringRef getPassName() const override {
43     return LoongArch_EXPAND_ATOMIC_PSEUDO_NAME;
44   }
45 
46 private:
47   bool expandMBB(MachineBasicBlock &MBB);
48   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
49                 MachineBasicBlock::iterator &NextMBBI);
50   bool expandAtomicBinOp(MachineBasicBlock &MBB,
51                          MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
52                          bool IsMasked, int Width,
53                          MachineBasicBlock::iterator &NextMBBI);
54   bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
55                             MachineBasicBlock::iterator MBBI,
56                             AtomicRMWInst::BinOp, bool IsMasked, int Width,
57                             MachineBasicBlock::iterator &NextMBBI);
58   bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
59                            MachineBasicBlock::iterator MBBI, bool IsMasked,
60                            int Width, MachineBasicBlock::iterator &NextMBBI);
61 };
62 
63 char LoongArchExpandAtomicPseudo::ID = 0;
64 
65 bool LoongArchExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
66   TII =
67       static_cast<const LoongArchInstrInfo *>(MF.getSubtarget().getInstrInfo());
68   bool Modified = false;
69   for (auto &MBB : MF)
70     Modified |= expandMBB(MBB);
71   return Modified;
72 }
73 
74 bool LoongArchExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
75   bool Modified = false;
76 
77   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
78   while (MBBI != E) {
79     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
80     Modified |= expandMI(MBB, MBBI, NMBBI);
81     MBBI = NMBBI;
82   }
83 
84   return Modified;
85 }
86 
87 bool LoongArchExpandAtomicPseudo::expandMI(
88     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
89     MachineBasicBlock::iterator &NextMBBI) {
90   switch (MBBI->getOpcode()) {
91   case LoongArch::PseudoMaskedAtomicSwap32:
92     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
93                              NextMBBI);
94   case LoongArch::PseudoAtomicSwap32:
95     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32,
96                              NextMBBI);
97   case LoongArch::PseudoMaskedAtomicLoadAdd32:
98     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
99   case LoongArch::PseudoMaskedAtomicLoadSub32:
100     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
101   case LoongArch::PseudoAtomicLoadNand32:
102     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
103                              NextMBBI);
104   case LoongArch::PseudoAtomicLoadNand64:
105     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
106                              NextMBBI);
107   case LoongArch::PseudoMaskedAtomicLoadNand32:
108     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
109                              NextMBBI);
110   case LoongArch::PseudoAtomicLoadAdd32:
111     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32,
112                              NextMBBI);
113   case LoongArch::PseudoAtomicLoadSub32:
114     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32,
115                              NextMBBI);
116   case LoongArch::PseudoAtomicLoadAnd32:
117     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32,
118                              NextMBBI);
119   case LoongArch::PseudoAtomicLoadOr32:
120     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI);
121   case LoongArch::PseudoAtomicLoadXor32:
122     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
123                              NextMBBI);
124   case LoongArch::PseudoMaskedAtomicLoadUMax32:
125     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
126                                 NextMBBI);
127   case LoongArch::PseudoMaskedAtomicLoadUMin32:
128     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
129                                 NextMBBI);
130   case LoongArch::PseudoCmpXchg32:
131     return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
132   case LoongArch::PseudoCmpXchg64:
133     return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
134   case LoongArch::PseudoMaskedCmpXchg32:
135     return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
136   case LoongArch::PseudoMaskedAtomicLoadMax32:
137     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
138                                 NextMBBI);
139   case LoongArch::PseudoMaskedAtomicLoadMin32:
140     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
141                                 NextMBBI);
142   }
143   return false;
144 }
145 
146 static void doAtomicBinOpExpansion(const LoongArchInstrInfo *TII,
147                                    MachineInstr &MI, DebugLoc DL,
148                                    MachineBasicBlock *ThisMBB,
149                                    MachineBasicBlock *LoopMBB,
150                                    MachineBasicBlock *DoneMBB,
151                                    AtomicRMWInst::BinOp BinOp, int Width) {
152   Register DestReg = MI.getOperand(0).getReg();
153   Register ScratchReg = MI.getOperand(1).getReg();
154   Register AddrReg = MI.getOperand(2).getReg();
155   Register IncrReg = MI.getOperand(3).getReg();
156 
157   // .loop:
158   //   ll.[w|d] dest, (addr)
159   //   binop scratch, dest, val
160   //   sc.[w|d] scratch, scratch, (addr)
161   //   beqz scratch, loop
162   BuildMI(LoopMBB, DL,
163           TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
164       .addReg(AddrReg)
165       .addImm(0);
166   switch (BinOp) {
167   default:
168     llvm_unreachable("Unexpected AtomicRMW BinOp");
169   case AtomicRMWInst::Xchg:
170     BuildMI(LoopMBB, DL, TII->get(LoongArch::OR), ScratchReg)
171         .addReg(IncrReg)
172         .addReg(LoongArch::R0);
173     break;
174   case AtomicRMWInst::Nand:
175     BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg)
176         .addReg(DestReg)
177         .addReg(IncrReg);
178     BuildMI(LoopMBB, DL, TII->get(LoongArch::NOR), ScratchReg)
179         .addReg(ScratchReg)
180         .addReg(LoongArch::R0);
181     break;
182   case AtomicRMWInst::Add:
183     BuildMI(LoopMBB, DL, TII->get(LoongArch::ADD_W), ScratchReg)
184         .addReg(DestReg)
185         .addReg(IncrReg);
186     break;
187   case AtomicRMWInst::Sub:
188     BuildMI(LoopMBB, DL, TII->get(LoongArch::SUB_W), ScratchReg)
189         .addReg(DestReg)
190         .addReg(IncrReg);
191     break;
192   case AtomicRMWInst::And:
193     BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg)
194         .addReg(DestReg)
195         .addReg(IncrReg);
196     break;
197   case AtomicRMWInst::Or:
198     BuildMI(LoopMBB, DL, TII->get(LoongArch::OR), ScratchReg)
199         .addReg(DestReg)
200         .addReg(IncrReg);
201     break;
202   case AtomicRMWInst::Xor:
203     BuildMI(LoopMBB, DL, TII->get(LoongArch::XOR), ScratchReg)
204         .addReg(DestReg)
205         .addReg(IncrReg);
206     break;
207   }
208   BuildMI(LoopMBB, DL,
209           TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D), ScratchReg)
210       .addReg(ScratchReg)
211       .addReg(AddrReg)
212       .addImm(0);
213   BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ))
214       .addReg(ScratchReg)
215       .addMBB(LoopMBB);
216 }
217 
218 static void insertMaskedMerge(const LoongArchInstrInfo *TII, DebugLoc DL,
219                               MachineBasicBlock *MBB, Register DestReg,
220                               Register OldValReg, Register NewValReg,
221                               Register MaskReg, Register ScratchReg) {
222   assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
223   assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
224   assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
225 
226   // res = oldval ^ ((oldval ^ newval) & masktargetdata);
227   BuildMI(MBB, DL, TII->get(LoongArch::XOR), ScratchReg)
228       .addReg(OldValReg)
229       .addReg(NewValReg);
230   BuildMI(MBB, DL, TII->get(LoongArch::AND), ScratchReg)
231       .addReg(ScratchReg)
232       .addReg(MaskReg);
233   BuildMI(MBB, DL, TII->get(LoongArch::XOR), DestReg)
234       .addReg(OldValReg)
235       .addReg(ScratchReg);
236 }
237 
238 static void doMaskedAtomicBinOpExpansion(
239     const LoongArchInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
240     MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
241     MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
242   assert(Width == 32 && "Should never need to expand masked 64-bit operations");
243   Register DestReg = MI.getOperand(0).getReg();
244   Register ScratchReg = MI.getOperand(1).getReg();
245   Register AddrReg = MI.getOperand(2).getReg();
246   Register IncrReg = MI.getOperand(3).getReg();
247   Register MaskReg = MI.getOperand(4).getReg();
248 
249   // .loop:
250   //   ll.w destreg, (alignedaddr)
251   //   binop scratch, destreg, incr
252   //   xor scratch, destreg, scratch
253   //   and scratch, scratch, masktargetdata
254   //   xor scratch, destreg, scratch
255   //   sc.w scratch, scratch, (alignedaddr)
256   //   beqz scratch, loop
257   BuildMI(LoopMBB, DL, TII->get(LoongArch::LL_W), DestReg)
258       .addReg(AddrReg)
259       .addImm(0);
260   switch (BinOp) {
261   default:
262     llvm_unreachable("Unexpected AtomicRMW BinOp");
263   case AtomicRMWInst::Xchg:
264     BuildMI(LoopMBB, DL, TII->get(LoongArch::ADDI_W), ScratchReg)
265         .addReg(IncrReg)
266         .addImm(0);
267     break;
268   case AtomicRMWInst::Add:
269     BuildMI(LoopMBB, DL, TII->get(LoongArch::ADD_W), ScratchReg)
270         .addReg(DestReg)
271         .addReg(IncrReg);
272     break;
273   case AtomicRMWInst::Sub:
274     BuildMI(LoopMBB, DL, TII->get(LoongArch::SUB_W), ScratchReg)
275         .addReg(DestReg)
276         .addReg(IncrReg);
277     break;
278   case AtomicRMWInst::Nand:
279     BuildMI(LoopMBB, DL, TII->get(LoongArch::AND), ScratchReg)
280         .addReg(DestReg)
281         .addReg(IncrReg);
282     BuildMI(LoopMBB, DL, TII->get(LoongArch::NOR), ScratchReg)
283         .addReg(ScratchReg)
284         .addReg(LoongArch::R0);
285     // TODO: support other AtomicRMWInst.
286   }
287 
288   insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
289                     ScratchReg);
290 
291   BuildMI(LoopMBB, DL, TII->get(LoongArch::SC_W), ScratchReg)
292       .addReg(ScratchReg)
293       .addReg(AddrReg)
294       .addImm(0);
295   BuildMI(LoopMBB, DL, TII->get(LoongArch::BEQZ))
296       .addReg(ScratchReg)
297       .addMBB(LoopMBB);
298 }
299 
300 bool LoongArchExpandAtomicPseudo::expandAtomicBinOp(
301     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
302     AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
303     MachineBasicBlock::iterator &NextMBBI) {
304   MachineInstr &MI = *MBBI;
305   DebugLoc DL = MI.getDebugLoc();
306 
307   MachineFunction *MF = MBB.getParent();
308   auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
309   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
310 
311   // Insert new MBBs.
312   MF->insert(++MBB.getIterator(), LoopMBB);
313   MF->insert(++LoopMBB->getIterator(), DoneMBB);
314 
315   // Set up successors and transfer remaining instructions to DoneMBB.
316   LoopMBB->addSuccessor(LoopMBB);
317   LoopMBB->addSuccessor(DoneMBB);
318   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
319   DoneMBB->transferSuccessors(&MBB);
320   MBB.addSuccessor(LoopMBB);
321 
322   if (IsMasked)
323     doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
324                                  Width);
325   else
326     doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
327 
328   NextMBBI = MBB.end();
329   MI.eraseFromParent();
330 
331   LivePhysRegs LiveRegs;
332   computeAndAddLiveIns(LiveRegs, *LoopMBB);
333   computeAndAddLiveIns(LiveRegs, *DoneMBB);
334 
335   return true;
336 }
337 
338 static void insertSext(const LoongArchInstrInfo *TII, DebugLoc DL,
339                        MachineBasicBlock *MBB, Register ValReg,
340                        Register ShamtReg) {
341   BuildMI(MBB, DL, TII->get(LoongArch::SLL_W), ValReg)
342       .addReg(ValReg)
343       .addReg(ShamtReg);
344   BuildMI(MBB, DL, TII->get(LoongArch::SRA_W), ValReg)
345       .addReg(ValReg)
346       .addReg(ShamtReg);
347 }
348 
349 bool LoongArchExpandAtomicPseudo::expandAtomicMinMaxOp(
350     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
351     AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
352     MachineBasicBlock::iterator &NextMBBI) {
353   assert(IsMasked == true &&
354          "Should only need to expand masked atomic max/min");
355   assert(Width == 32 && "Should never need to expand masked 64-bit operations");
356 
357   MachineInstr &MI = *MBBI;
358   DebugLoc DL = MI.getDebugLoc();
359   MachineFunction *MF = MBB.getParent();
360   auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
361   auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
362   auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
363   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
364 
365   // Insert new MBBs.
366   MF->insert(++MBB.getIterator(), LoopHeadMBB);
367   MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
368   MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
369   MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
370 
371   // Set up successors and transfer remaining instructions to DoneMBB.
372   LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
373   LoopHeadMBB->addSuccessor(LoopTailMBB);
374   LoopIfBodyMBB->addSuccessor(LoopTailMBB);
375   LoopTailMBB->addSuccessor(LoopHeadMBB);
376   LoopTailMBB->addSuccessor(DoneMBB);
377   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
378   DoneMBB->transferSuccessors(&MBB);
379   MBB.addSuccessor(LoopHeadMBB);
380 
381   Register DestReg = MI.getOperand(0).getReg();
382   Register Scratch1Reg = MI.getOperand(1).getReg();
383   Register Scratch2Reg = MI.getOperand(2).getReg();
384   Register AddrReg = MI.getOperand(3).getReg();
385   Register IncrReg = MI.getOperand(4).getReg();
386   Register MaskReg = MI.getOperand(5).getReg();
387 
388   //
389   // .loophead:
390   //   ll.w destreg, (alignedaddr)
391   //   and scratch2, destreg, mask
392   //   move scratch1, destreg
393   BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::LL_W), DestReg)
394       .addReg(AddrReg)
395       .addImm(0);
396   BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), Scratch2Reg)
397       .addReg(DestReg)
398       .addReg(MaskReg);
399   BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::OR), Scratch1Reg)
400       .addReg(DestReg)
401       .addReg(LoongArch::R0);
402 
403   switch (BinOp) {
404   default:
405     llvm_unreachable("Unexpected AtomicRMW BinOp");
406   // bgeu scratch2, incr, .looptail
407   case AtomicRMWInst::UMax:
408     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
409         .addReg(Scratch2Reg)
410         .addReg(IncrReg)
411         .addMBB(LoopTailMBB);
412     break;
413   // bgeu incr, scratch2, .looptail
414   case AtomicRMWInst::UMin:
415     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGEU))
416         .addReg(IncrReg)
417         .addReg(Scratch2Reg)
418         .addMBB(LoopTailMBB);
419     break;
420   case AtomicRMWInst::Max:
421     insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
422     // bge scratch2, incr, .looptail
423     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
424         .addReg(Scratch2Reg)
425         .addReg(IncrReg)
426         .addMBB(LoopTailMBB);
427     break;
428   case AtomicRMWInst::Min:
429     insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
430     // bge incr, scratch2, .looptail
431     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BGE))
432         .addReg(IncrReg)
433         .addReg(Scratch2Reg)
434         .addMBB(LoopTailMBB);
435     break;
436     // TODO: support other AtomicRMWInst.
437   }
438 
439   // .loopifbody:
440   //   xor scratch1, destreg, incr
441   //   and scratch1, scratch1, mask
442   //   xor scratch1, destreg, scratch1
443   insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
444                     MaskReg, Scratch1Reg);
445 
446   // .looptail:
447   //   sc.w scratch1, scratch1, (addr)
448   //   beqz scratch1, loop
449   BuildMI(LoopTailMBB, DL, TII->get(LoongArch::SC_W), Scratch1Reg)
450       .addReg(Scratch1Reg)
451       .addReg(AddrReg)
452       .addImm(0);
453   BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
454       .addReg(Scratch1Reg)
455       .addMBB(LoopHeadMBB);
456 
457   NextMBBI = MBB.end();
458   MI.eraseFromParent();
459 
460   LivePhysRegs LiveRegs;
461   computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
462   computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
463   computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
464   computeAndAddLiveIns(LiveRegs, *DoneMBB);
465 
466   return true;
467 }
468 
469 bool LoongArchExpandAtomicPseudo::expandAtomicCmpXchg(
470     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
471     int Width, MachineBasicBlock::iterator &NextMBBI) {
472   MachineInstr &MI = *MBBI;
473   DebugLoc DL = MI.getDebugLoc();
474   MachineFunction *MF = MBB.getParent();
475   auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
476   auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
477   auto TailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
478   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
479 
480   // Insert new MBBs.
481   MF->insert(++MBB.getIterator(), LoopHeadMBB);
482   MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
483   MF->insert(++LoopTailMBB->getIterator(), TailMBB);
484   MF->insert(++TailMBB->getIterator(), DoneMBB);
485 
486   // Set up successors and transfer remaining instructions to DoneMBB.
487   LoopHeadMBB->addSuccessor(LoopTailMBB);
488   LoopHeadMBB->addSuccessor(TailMBB);
489   LoopTailMBB->addSuccessor(DoneMBB);
490   LoopTailMBB->addSuccessor(LoopHeadMBB);
491   TailMBB->addSuccessor(DoneMBB);
492   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
493   DoneMBB->transferSuccessors(&MBB);
494   MBB.addSuccessor(LoopHeadMBB);
495 
496   Register DestReg = MI.getOperand(0).getReg();
497   Register ScratchReg = MI.getOperand(1).getReg();
498   Register AddrReg = MI.getOperand(2).getReg();
499   Register CmpValReg = MI.getOperand(3).getReg();
500   Register NewValReg = MI.getOperand(4).getReg();
501 
502   if (!IsMasked) {
503     // .loophead:
504     //   ll.[w|d] dest, (addr)
505     //   bne dest, cmpval, tail
506     BuildMI(LoopHeadMBB, DL,
507             TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
508         .addReg(AddrReg)
509         .addImm(0);
510     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE))
511         .addReg(DestReg)
512         .addReg(CmpValReg)
513         .addMBB(TailMBB);
514     // .looptail:
515     //   move scratch, newval
516     //   sc.[w|d] scratch, scratch, (addr)
517     //   beqz scratch, loophead
518     //   b done
519     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
520         .addReg(NewValReg)
521         .addReg(LoongArch::R0);
522     BuildMI(LoopTailMBB, DL,
523             TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D),
524             ScratchReg)
525         .addReg(ScratchReg)
526         .addReg(AddrReg)
527         .addImm(0);
528     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
529         .addReg(ScratchReg)
530         .addMBB(LoopHeadMBB);
531     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
532   } else {
533     // .loophead:
534     //   ll.[w|d] dest, (addr)
535     //   and scratch, dest, mask
536     //   bne scratch, cmpval, tail
537     Register MaskReg = MI.getOperand(5).getReg();
538     BuildMI(LoopHeadMBB, DL,
539             TII->get(Width == 32 ? LoongArch::LL_W : LoongArch::LL_D), DestReg)
540         .addReg(AddrReg)
541         .addImm(0);
542     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::AND), ScratchReg)
543         .addReg(DestReg)
544         .addReg(MaskReg);
545     BuildMI(LoopHeadMBB, DL, TII->get(LoongArch::BNE))
546         .addReg(ScratchReg)
547         .addReg(CmpValReg)
548         .addMBB(TailMBB);
549 
550     // .looptail:
551     //   andn scratch, dest, mask
552     //   or scratch, scratch, newval
553     //   sc.[w|d] scratch, scratch, (addr)
554     //   beqz scratch, loophead
555     //   b done
556     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::ANDN), ScratchReg)
557         .addReg(DestReg)
558         .addReg(MaskReg);
559     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::OR), ScratchReg)
560         .addReg(ScratchReg)
561         .addReg(NewValReg);
562     BuildMI(LoopTailMBB, DL,
563             TII->get(Width == 32 ? LoongArch::SC_W : LoongArch::SC_D),
564             ScratchReg)
565         .addReg(ScratchReg)
566         .addReg(AddrReg)
567         .addImm(0);
568     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::BEQZ))
569         .addReg(ScratchReg)
570         .addMBB(LoopHeadMBB);
571     BuildMI(LoopTailMBB, DL, TII->get(LoongArch::B)).addMBB(DoneMBB);
572   }
573 
574   AtomicOrdering FailureOrdering =
575       static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
576   int hint;
577 
578   switch (FailureOrdering) {
579   case AtomicOrdering::Acquire:
580   case AtomicOrdering::AcquireRelease:
581   case AtomicOrdering::SequentiallyConsistent:
582     // acquire
583     hint = 0b10100;
584     break;
585   default:
586     hint = 0x700;
587   }
588 
589   // .tail:
590   //   dbar 0x700 | acquire
591   BuildMI(TailMBB, DL, TII->get(LoongArch::DBAR)).addImm(hint);
592 
593   NextMBBI = MBB.end();
594   MI.eraseFromParent();
595 
596   LivePhysRegs LiveRegs;
597   computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
598   computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
599   computeAndAddLiveIns(LiveRegs, *TailMBB);
600   computeAndAddLiveIns(LiveRegs, *DoneMBB);
601 
602   return true;
603 }
604 
605 } // end namespace
606 
607 INITIALIZE_PASS(LoongArchExpandAtomicPseudo, "loongarch-expand-atomic-pseudo",
608                 LoongArch_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
609 
610 namespace llvm {
611 
612 FunctionPass *createLoongArchExpandAtomicPseudoPass() {
613   return new LoongArchExpandAtomicPseudo();
614 }
615 
616 } // end namespace llvm
617