xref: /freebsd/contrib/llvm-project/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp (revision b5a3a89c50671a1ad29e7c43fe15e7b16feac239)
1 //===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands atomic pseudo instructions into
10 // target instructions. This pass should be run at the last possible moment,
11 // avoiding the possibility for other passes to break the requirements for
12 // forward progress in the LR/SC block.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "RISCV.h"
17 #include "RISCVInstrInfo.h"
18 #include "RISCVTargetMachine.h"
19 
20 #include "llvm/CodeGen/LivePhysRegs.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 
24 using namespace llvm;
25 
26 #define RISCV_EXPAND_ATOMIC_PSEUDO_NAME                                        \
27   "RISCV atomic pseudo instruction expansion pass"
28 
29 namespace {
30 
31 class RISCVExpandAtomicPseudo : public MachineFunctionPass {
32 public:
33   const RISCVInstrInfo *TII;
34   static char ID;
35 
36   RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {
37     initializeRISCVExpandAtomicPseudoPass(*PassRegistry::getPassRegistry());
38   }
39 
40   bool runOnMachineFunction(MachineFunction &MF) override;
41 
42   StringRef getPassName() const override {
43     return RISCV_EXPAND_ATOMIC_PSEUDO_NAME;
44   }
45 
46 private:
47   bool expandMBB(MachineBasicBlock &MBB);
48   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
49                 MachineBasicBlock::iterator &NextMBBI);
50   bool expandAtomicBinOp(MachineBasicBlock &MBB,
51                          MachineBasicBlock::iterator MBBI, AtomicRMWInst::BinOp,
52                          bool IsMasked, int Width,
53                          MachineBasicBlock::iterator &NextMBBI);
54   bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
55                             MachineBasicBlock::iterator MBBI,
56                             AtomicRMWInst::BinOp, bool IsMasked, int Width,
57                             MachineBasicBlock::iterator &NextMBBI);
58   bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
59                            MachineBasicBlock::iterator MBBI, bool IsMasked,
60                            int Width, MachineBasicBlock::iterator &NextMBBI);
61 };
62 
63 char RISCVExpandAtomicPseudo::ID = 0;
64 
65 bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
66   TII = static_cast<const RISCVInstrInfo *>(MF.getSubtarget().getInstrInfo());
67   bool Modified = false;
68   for (auto &MBB : MF)
69     Modified |= expandMBB(MBB);
70   return Modified;
71 }
72 
73 bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
74   bool Modified = false;
75 
76   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
77   while (MBBI != E) {
78     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
79     Modified |= expandMI(MBB, MBBI, NMBBI);
80     MBBI = NMBBI;
81   }
82 
83   return Modified;
84 }
85 
86 bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
87                                        MachineBasicBlock::iterator MBBI,
88                                        MachineBasicBlock::iterator &NextMBBI) {
89   // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
90   // expanded instructions for each pseudo is correct in the Size field of the
91   // tablegen definition for the pseudo.
92   switch (MBBI->getOpcode()) {
93   case RISCV::PseudoAtomicLoadNand32:
94     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
95                              NextMBBI);
96   case RISCV::PseudoAtomicLoadNand64:
97     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
98                              NextMBBI);
99   case RISCV::PseudoMaskedAtomicSwap32:
100     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
101                              NextMBBI);
102   case RISCV::PseudoMaskedAtomicLoadAdd32:
103     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
104   case RISCV::PseudoMaskedAtomicLoadSub32:
105     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
106   case RISCV::PseudoMaskedAtomicLoadNand32:
107     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
108                              NextMBBI);
109   case RISCV::PseudoMaskedAtomicLoadMax32:
110     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
111                                 NextMBBI);
112   case RISCV::PseudoMaskedAtomicLoadMin32:
113     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
114                                 NextMBBI);
115   case RISCV::PseudoMaskedAtomicLoadUMax32:
116     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
117                                 NextMBBI);
118   case RISCV::PseudoMaskedAtomicLoadUMin32:
119     return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
120                                 NextMBBI);
121   case RISCV::PseudoCmpXchg32:
122     return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
123   case RISCV::PseudoCmpXchg64:
124     return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
125   case RISCV::PseudoMaskedCmpXchg32:
126     return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
127   }
128 
129   return false;
130 }
131 
132 static unsigned getLRForRMW32(AtomicOrdering Ordering) {
133   switch (Ordering) {
134   default:
135     llvm_unreachable("Unexpected AtomicOrdering");
136   case AtomicOrdering::Monotonic:
137     return RISCV::LR_W;
138   case AtomicOrdering::Acquire:
139     return RISCV::LR_W_AQ;
140   case AtomicOrdering::Release:
141     return RISCV::LR_W;
142   case AtomicOrdering::AcquireRelease:
143     return RISCV::LR_W_AQ;
144   case AtomicOrdering::SequentiallyConsistent:
145     return RISCV::LR_W_AQ_RL;
146   }
147 }
148 
149 static unsigned getSCForRMW32(AtomicOrdering Ordering) {
150   switch (Ordering) {
151   default:
152     llvm_unreachable("Unexpected AtomicOrdering");
153   case AtomicOrdering::Monotonic:
154     return RISCV::SC_W;
155   case AtomicOrdering::Acquire:
156     return RISCV::SC_W;
157   case AtomicOrdering::Release:
158     return RISCV::SC_W_RL;
159   case AtomicOrdering::AcquireRelease:
160     return RISCV::SC_W_RL;
161   case AtomicOrdering::SequentiallyConsistent:
162     return RISCV::SC_W_AQ_RL;
163   }
164 }
165 
166 static unsigned getLRForRMW64(AtomicOrdering Ordering) {
167   switch (Ordering) {
168   default:
169     llvm_unreachable("Unexpected AtomicOrdering");
170   case AtomicOrdering::Monotonic:
171     return RISCV::LR_D;
172   case AtomicOrdering::Acquire:
173     return RISCV::LR_D_AQ;
174   case AtomicOrdering::Release:
175     return RISCV::LR_D;
176   case AtomicOrdering::AcquireRelease:
177     return RISCV::LR_D_AQ;
178   case AtomicOrdering::SequentiallyConsistent:
179     return RISCV::LR_D_AQ_RL;
180   }
181 }
182 
183 static unsigned getSCForRMW64(AtomicOrdering Ordering) {
184   switch (Ordering) {
185   default:
186     llvm_unreachable("Unexpected AtomicOrdering");
187   case AtomicOrdering::Monotonic:
188     return RISCV::SC_D;
189   case AtomicOrdering::Acquire:
190     return RISCV::SC_D;
191   case AtomicOrdering::Release:
192     return RISCV::SC_D_RL;
193   case AtomicOrdering::AcquireRelease:
194     return RISCV::SC_D_RL;
195   case AtomicOrdering::SequentiallyConsistent:
196     return RISCV::SC_D_AQ_RL;
197   }
198 }
199 
200 static unsigned getLRForRMW(AtomicOrdering Ordering, int Width) {
201   if (Width == 32)
202     return getLRForRMW32(Ordering);
203   if (Width == 64)
204     return getLRForRMW64(Ordering);
205   llvm_unreachable("Unexpected LR width\n");
206 }
207 
208 static unsigned getSCForRMW(AtomicOrdering Ordering, int Width) {
209   if (Width == 32)
210     return getSCForRMW32(Ordering);
211   if (Width == 64)
212     return getSCForRMW64(Ordering);
213   llvm_unreachable("Unexpected SC width\n");
214 }
215 
216 static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
217                                    DebugLoc DL, MachineBasicBlock *ThisMBB,
218                                    MachineBasicBlock *LoopMBB,
219                                    MachineBasicBlock *DoneMBB,
220                                    AtomicRMWInst::BinOp BinOp, int Width) {
221   Register DestReg = MI.getOperand(0).getReg();
222   Register ScratchReg = MI.getOperand(1).getReg();
223   Register AddrReg = MI.getOperand(2).getReg();
224   Register IncrReg = MI.getOperand(3).getReg();
225   AtomicOrdering Ordering =
226       static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
227 
228   // .loop:
229   //   lr.[w|d] dest, (addr)
230   //   binop scratch, dest, val
231   //   sc.[w|d] scratch, scratch, (addr)
232   //   bnez scratch, loop
233   BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
234       .addReg(AddrReg);
235   switch (BinOp) {
236   default:
237     llvm_unreachable("Unexpected AtomicRMW BinOp");
238   case AtomicRMWInst::Nand:
239     BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
240         .addReg(DestReg)
241         .addReg(IncrReg);
242     BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
243         .addReg(ScratchReg)
244         .addImm(-1);
245     break;
246   }
247   BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
248       .addReg(AddrReg)
249       .addReg(ScratchReg);
250   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
251       .addReg(ScratchReg)
252       .addReg(RISCV::X0)
253       .addMBB(LoopMBB);
254 }
255 
256 static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
257                               MachineBasicBlock *MBB, Register DestReg,
258                               Register OldValReg, Register NewValReg,
259                               Register MaskReg, Register ScratchReg) {
260   assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
261   assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
262   assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
263 
264   // We select bits from newval and oldval using:
265   // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
266   // r = oldval ^ ((oldval ^ newval) & masktargetdata);
267   BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
268       .addReg(OldValReg)
269       .addReg(NewValReg);
270   BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
271       .addReg(ScratchReg)
272       .addReg(MaskReg);
273   BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
274       .addReg(OldValReg)
275       .addReg(ScratchReg);
276 }
277 
278 static void doMaskedAtomicBinOpExpansion(
279     const RISCVInstrInfo *TII, MachineInstr &MI, DebugLoc DL,
280     MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopMBB,
281     MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width) {
282   assert(Width == 32 && "Should never need to expand masked 64-bit operations");
283   Register DestReg = MI.getOperand(0).getReg();
284   Register ScratchReg = MI.getOperand(1).getReg();
285   Register AddrReg = MI.getOperand(2).getReg();
286   Register IncrReg = MI.getOperand(3).getReg();
287   Register MaskReg = MI.getOperand(4).getReg();
288   AtomicOrdering Ordering =
289       static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
290 
291   // .loop:
292   //   lr.w destreg, (alignedaddr)
293   //   binop scratch, destreg, incr
294   //   xor scratch, destreg, scratch
295   //   and scratch, scratch, masktargetdata
296   //   xor scratch, destreg, scratch
297   //   sc.w scratch, scratch, (alignedaddr)
298   //   bnez scratch, loop
299   BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
300       .addReg(AddrReg);
301   switch (BinOp) {
302   default:
303     llvm_unreachable("Unexpected AtomicRMW BinOp");
304   case AtomicRMWInst::Xchg:
305     BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
306         .addReg(IncrReg)
307         .addImm(0);
308     break;
309   case AtomicRMWInst::Add:
310     BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
311         .addReg(DestReg)
312         .addReg(IncrReg);
313     break;
314   case AtomicRMWInst::Sub:
315     BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
316         .addReg(DestReg)
317         .addReg(IncrReg);
318     break;
319   case AtomicRMWInst::Nand:
320     BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
321         .addReg(DestReg)
322         .addReg(IncrReg);
323     BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
324         .addReg(ScratchReg)
325         .addImm(-1);
326     break;
327   }
328 
329   insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
330                     ScratchReg);
331 
332   BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering)), ScratchReg)
333       .addReg(AddrReg)
334       .addReg(ScratchReg);
335   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
336       .addReg(ScratchReg)
337       .addReg(RISCV::X0)
338       .addMBB(LoopMBB);
339 }
340 
341 bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
342     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
343     AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
344     MachineBasicBlock::iterator &NextMBBI) {
345   MachineInstr &MI = *MBBI;
346   DebugLoc DL = MI.getDebugLoc();
347 
348   MachineFunction *MF = MBB.getParent();
349   auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
350   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
351 
352   // Insert new MBBs.
353   MF->insert(++MBB.getIterator(), LoopMBB);
354   MF->insert(++LoopMBB->getIterator(), DoneMBB);
355 
356   // Set up successors and transfer remaining instructions to DoneMBB.
357   LoopMBB->addSuccessor(LoopMBB);
358   LoopMBB->addSuccessor(DoneMBB);
359   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
360   DoneMBB->transferSuccessors(&MBB);
361   MBB.addSuccessor(LoopMBB);
362 
363   if (!IsMasked)
364     doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width);
365   else
366     doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
367                                  Width);
368 
369   NextMBBI = MBB.end();
370   MI.eraseFromParent();
371 
372   LivePhysRegs LiveRegs;
373   computeAndAddLiveIns(LiveRegs, *LoopMBB);
374   computeAndAddLiveIns(LiveRegs, *DoneMBB);
375 
376   return true;
377 }
378 
379 static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
380                        MachineBasicBlock *MBB, Register ValReg,
381                        Register ShamtReg) {
382   BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
383       .addReg(ValReg)
384       .addReg(ShamtReg);
385   BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
386       .addReg(ValReg)
387       .addReg(ShamtReg);
388 }
389 
390 bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
391     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
392     AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
393     MachineBasicBlock::iterator &NextMBBI) {
394   assert(IsMasked == true &&
395          "Should only need to expand masked atomic max/min");
396   assert(Width == 32 && "Should never need to expand masked 64-bit operations");
397 
398   MachineInstr &MI = *MBBI;
399   DebugLoc DL = MI.getDebugLoc();
400   MachineFunction *MF = MBB.getParent();
401   auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
402   auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
403   auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
404   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
405 
406   // Insert new MBBs.
407   MF->insert(++MBB.getIterator(), LoopHeadMBB);
408   MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
409   MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
410   MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
411 
412   // Set up successors and transfer remaining instructions to DoneMBB.
413   LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
414   LoopHeadMBB->addSuccessor(LoopTailMBB);
415   LoopIfBodyMBB->addSuccessor(LoopTailMBB);
416   LoopTailMBB->addSuccessor(LoopHeadMBB);
417   LoopTailMBB->addSuccessor(DoneMBB);
418   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
419   DoneMBB->transferSuccessors(&MBB);
420   MBB.addSuccessor(LoopHeadMBB);
421 
422   Register DestReg = MI.getOperand(0).getReg();
423   Register Scratch1Reg = MI.getOperand(1).getReg();
424   Register Scratch2Reg = MI.getOperand(2).getReg();
425   Register AddrReg = MI.getOperand(3).getReg();
426   Register IncrReg = MI.getOperand(4).getReg();
427   Register MaskReg = MI.getOperand(5).getReg();
428   bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
429   AtomicOrdering Ordering =
430       static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
431 
432   //
433   // .loophead:
434   //   lr.w destreg, (alignedaddr)
435   //   and scratch2, destreg, mask
436   //   mv scratch1, destreg
437   //   [sext scratch2 if signed min/max]
438   //   ifnochangeneeded scratch2, incr, .looptail
439   BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering)), DestReg)
440       .addReg(AddrReg);
441   BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
442       .addReg(DestReg)
443       .addReg(MaskReg);
444   BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
445       .addReg(DestReg)
446       .addImm(0);
447 
448   switch (BinOp) {
449   default:
450     llvm_unreachable("Unexpected AtomicRMW BinOp");
451   case AtomicRMWInst::Max: {
452     insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
453     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
454         .addReg(Scratch2Reg)
455         .addReg(IncrReg)
456         .addMBB(LoopTailMBB);
457     break;
458   }
459   case AtomicRMWInst::Min: {
460     insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
461     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
462         .addReg(IncrReg)
463         .addReg(Scratch2Reg)
464         .addMBB(LoopTailMBB);
465     break;
466   }
467   case AtomicRMWInst::UMax:
468     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
469         .addReg(Scratch2Reg)
470         .addReg(IncrReg)
471         .addMBB(LoopTailMBB);
472     break;
473   case AtomicRMWInst::UMin:
474     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
475         .addReg(IncrReg)
476         .addReg(Scratch2Reg)
477         .addMBB(LoopTailMBB);
478     break;
479   }
480 
481   // .loopifbody:
482   //   xor scratch1, destreg, incr
483   //   and scratch1, scratch1, mask
484   //   xor scratch1, destreg, scratch1
485   insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
486                     MaskReg, Scratch1Reg);
487 
488   // .looptail:
489   //   sc.w scratch1, scratch1, (addr)
490   //   bnez scratch1, loop
491   BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering)), Scratch1Reg)
492       .addReg(AddrReg)
493       .addReg(Scratch1Reg);
494   BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
495       .addReg(Scratch1Reg)
496       .addReg(RISCV::X0)
497       .addMBB(LoopHeadMBB);
498 
499   NextMBBI = MBB.end();
500   MI.eraseFromParent();
501 
502   LivePhysRegs LiveRegs;
503   computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
504   computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
505   computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
506   computeAndAddLiveIns(LiveRegs, *DoneMBB);
507 
508   return true;
509 }
510 
511 // If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
512 // operation, it can be folded into the cmpxchg expansion by
513 // modifying the branch within 'LoopHead' (which performs the same
514 // comparison). This is a valid transformation because after altering the
515 // LoopHead's BNE destination, the BNE following the cmpxchg becomes
516 // redundant and and be deleted. In the case of a masked cmpxchg, an
517 // appropriate AND and BNE must be matched.
518 //
519 // On success, returns true and deletes the matching BNE or AND+BNE, sets the
520 // LoopHeadBNETarget argument to the target that should be used within the
521 // loop head, and removes that block as a successor to MBB.
522 bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
523                                  MachineBasicBlock::iterator MBBI,
524                                  Register DestReg, Register CmpValReg,
525                                  Register MaskReg,
526                                  MachineBasicBlock *&LoopHeadBNETarget) {
527   SmallVector<MachineInstr *> ToErase;
528   auto E = MBB.end();
529   if (MBBI == E)
530     return false;
531   MBBI = skipDebugInstructionsForward(MBBI, E);
532 
533   // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
534   if (MaskReg.isValid()) {
535     if (MBBI == E || MBBI->getOpcode() != RISCV::AND)
536       return false;
537     Register ANDOp1 = MBBI->getOperand(1).getReg();
538     Register ANDOp2 = MBBI->getOperand(2).getReg();
539     if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
540         !(ANDOp1 == MaskReg && ANDOp2 == DestReg))
541       return false;
542     // We now expect the BNE to use the result of the AND as an operand.
543     DestReg = MBBI->getOperand(0).getReg();
544     ToErase.push_back(&*MBBI);
545     MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
546   }
547 
548   // Match BNE DestReg, MaskReg.
549   if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)
550     return false;
551   Register BNEOp0 = MBBI->getOperand(0).getReg();
552   Register BNEOp1 = MBBI->getOperand(1).getReg();
553   if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
554       !(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
555     return false;
556   ToErase.push_back(&*MBBI);
557   LoopHeadBNETarget = MBBI->getOperand(2).getMBB();
558   MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
559   if (MBBI != E)
560     return false;
561 
562   MBB.removeSuccessor(LoopHeadBNETarget);
563   for (auto *MI : ToErase)
564     MI->eraseFromParent();
565   return true;
566 }
567 
568 bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
569     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, bool IsMasked,
570     int Width, MachineBasicBlock::iterator &NextMBBI) {
571   MachineInstr &MI = *MBBI;
572   DebugLoc DL = MI.getDebugLoc();
573   MachineFunction *MF = MBB.getParent();
574   auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
575   auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
576   auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
577 
578   Register DestReg = MI.getOperand(0).getReg();
579   Register ScratchReg = MI.getOperand(1).getReg();
580   Register AddrReg = MI.getOperand(2).getReg();
581   Register CmpValReg = MI.getOperand(3).getReg();
582   Register NewValReg = MI.getOperand(4).getReg();
583   Register MaskReg = IsMasked ? MI.getOperand(5).getReg() : Register();
584 
585   MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
586   tryToFoldBNEOnCmpXchgResult(MBB, std::next(MBBI), DestReg, CmpValReg, MaskReg,
587                               LoopHeadBNETarget);
588 
589   // Insert new MBBs.
590   MF->insert(++MBB.getIterator(), LoopHeadMBB);
591   MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
592   MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
593 
594   // Set up successors and transfer remaining instructions to DoneMBB.
595   LoopHeadMBB->addSuccessor(LoopTailMBB);
596   LoopHeadMBB->addSuccessor(LoopHeadBNETarget);
597   LoopTailMBB->addSuccessor(DoneMBB);
598   LoopTailMBB->addSuccessor(LoopHeadMBB);
599   DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
600   DoneMBB->transferSuccessors(&MBB);
601   MBB.addSuccessor(LoopHeadMBB);
602 
603   AtomicOrdering Ordering =
604       static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
605 
606   if (!IsMasked) {
607     // .loophead:
608     //   lr.[w|d] dest, (addr)
609     //   bne dest, cmpval, done
610     BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
611         .addReg(AddrReg);
612     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
613         .addReg(DestReg)
614         .addReg(CmpValReg)
615         .addMBB(LoopHeadBNETarget);
616     // .looptail:
617     //   sc.[w|d] scratch, newval, (addr)
618     //   bnez scratch, loophead
619     BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
620         .addReg(AddrReg)
621         .addReg(NewValReg);
622     BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
623         .addReg(ScratchReg)
624         .addReg(RISCV::X0)
625         .addMBB(LoopHeadMBB);
626   } else {
627     // .loophead:
628     //   lr.w dest, (addr)
629     //   and scratch, dest, mask
630     //   bne scratch, cmpval, done
631     Register MaskReg = MI.getOperand(5).getReg();
632     BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width)), DestReg)
633         .addReg(AddrReg);
634     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
635         .addReg(DestReg)
636         .addReg(MaskReg);
637     BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
638         .addReg(ScratchReg)
639         .addReg(CmpValReg)
640         .addMBB(LoopHeadBNETarget);
641 
642     // .looptail:
643     //   xor scratch, dest, newval
644     //   and scratch, scratch, mask
645     //   xor scratch, dest, scratch
646     //   sc.w scratch, scratch, (adrr)
647     //   bnez scratch, loophead
648     insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
649                       MaskReg, ScratchReg);
650     BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width)), ScratchReg)
651         .addReg(AddrReg)
652         .addReg(ScratchReg);
653     BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
654         .addReg(ScratchReg)
655         .addReg(RISCV::X0)
656         .addMBB(LoopHeadMBB);
657   }
658 
659   NextMBBI = MBB.end();
660   MI.eraseFromParent();
661 
662   LivePhysRegs LiveRegs;
663   computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
664   computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
665   computeAndAddLiveIns(LiveRegs, *DoneMBB);
666 
667   return true;
668 }
669 
670 } // end of anonymous namespace
671 
672 INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
673                 RISCV_EXPAND_ATOMIC_PSEUDO_NAME, false, false)
674 
675 namespace llvm {
676 
677 FunctionPass *createRISCVExpandAtomicPseudoPass() {
678   return new RISCVExpandAtomicPseudo();
679 }
680 
681 } // end of namespace llvm
682