xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (revision 53120fbb68952b7d620c2c0e1cf05c5017fc1b27)
1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/CodeGen/LivePhysRegs.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineConstantPool.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/TargetSubtargetInfo.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/CodeGen.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/TargetParser/Triple.h"
38 #include <cassert>
39 #include <cstdint>
40 #include <iterator>
41 #include <utility>
42 
43 using namespace llvm;
44 
45 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
46 
47 namespace {
48 
49 class AArch64ExpandPseudo : public MachineFunctionPass {
50 public:
51   const AArch64InstrInfo *TII;
52 
53   static char ID;
54 
55   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
56     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
57   }
58 
59   bool runOnMachineFunction(MachineFunction &Fn) override;
60 
61   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
62 
63 private:
64   bool expandMBB(MachineBasicBlock &MBB);
65   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
66                 MachineBasicBlock::iterator &NextMBBI);
67   bool expandMultiVecPseudo(MachineBasicBlock &MBB,
68                             MachineBasicBlock::iterator MBBI,
69                             TargetRegisterClass ContiguousClass,
70                             TargetRegisterClass StridedClass,
71                             unsigned ContiguousOpc, unsigned StridedOpc);
72   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
73                     unsigned BitSize);
74 
75   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
76                             MachineBasicBlock::iterator MBBI);
77   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
78                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
79                       unsigned ExtendImm, unsigned ZeroReg,
80                       MachineBasicBlock::iterator &NextMBBI);
81   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
82                           MachineBasicBlock::iterator MBBI,
83                           MachineBasicBlock::iterator &NextMBBI);
84   bool expandSetTagLoop(MachineBasicBlock &MBB,
85                         MachineBasicBlock::iterator MBBI,
86                         MachineBasicBlock::iterator &NextMBBI);
87   bool expandSVESpillFill(MachineBasicBlock &MBB,
88                           MachineBasicBlock::iterator MBBI, unsigned Opc,
89                           unsigned N);
90   bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
91                            MachineBasicBlock::iterator MBBI);
92   bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
93   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
94                                     MachineBasicBlock::iterator MBBI);
95   MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
96                                      MachineBasicBlock::iterator MBBI);
97   MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
98                                         MachineBasicBlock::iterator MBBI);
99 };
100 
101 } // end anonymous namespace
102 
103 char AArch64ExpandPseudo::ID = 0;
104 
105 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
106                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
107 
108 /// Transfer implicit operands on the pseudo instruction to the
109 /// instructions created from the expansion.
110 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
111                            MachineInstrBuilder &DefMI) {
112   const MCInstrDesc &Desc = OldMI.getDesc();
113   for (const MachineOperand &MO :
114        llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
115     assert(MO.isReg() && MO.getReg());
116     if (MO.isUse())
117       UseMI.add(MO);
118     else
119       DefMI.add(MO);
120   }
121 }
122 
123 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
124 /// real move-immediate instructions to synthesize the immediate.
125 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
126                                        MachineBasicBlock::iterator MBBI,
127                                        unsigned BitSize) {
128   MachineInstr &MI = *MBBI;
129   Register DstReg = MI.getOperand(0).getReg();
130   uint64_t RenamableState =
131       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
132   uint64_t Imm = MI.getOperand(1).getImm();
133 
134   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
135     // Useless def, and we don't want to risk creating an invalid ORR (which
136     // would really write to sp).
137     MI.eraseFromParent();
138     return true;
139   }
140 
141   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
142   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
143   assert(Insn.size() != 0);
144 
145   SmallVector<MachineInstrBuilder, 4> MIBS;
146   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
147     bool LastItem = std::next(I) == E;
148     switch (I->Opcode)
149     {
150     default: llvm_unreachable("unhandled!"); break;
151 
152     case AArch64::ORRWri:
153     case AArch64::ORRXri:
154       if (I->Op1 == 0) {
155         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
156                            .add(MI.getOperand(0))
157                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
158                            .addImm(I->Op2));
159       } else {
160         Register DstReg = MI.getOperand(0).getReg();
161         bool DstIsDead = MI.getOperand(0).isDead();
162         MIBS.push_back(
163             BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
164                 .addReg(DstReg, RegState::Define |
165                                     getDeadRegState(DstIsDead && LastItem) |
166                                     RenamableState)
167                 .addReg(DstReg)
168                 .addImm(I->Op2));
169       }
170       break;
171     case AArch64::ANDXri:
172     case AArch64::EORXri:
173       if (I->Op1 == 0) {
174         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
175                            .add(MI.getOperand(0))
176                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
177                            .addImm(I->Op2));
178       } else {
179         Register DstReg = MI.getOperand(0).getReg();
180         bool DstIsDead = MI.getOperand(0).isDead();
181         MIBS.push_back(
182             BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
183                 .addReg(DstReg, RegState::Define |
184                                     getDeadRegState(DstIsDead && LastItem) |
185                                     RenamableState)
186                 .addReg(DstReg)
187                 .addImm(I->Op2));
188       }
189       break;
190     case AArch64::MOVNWi:
191     case AArch64::MOVNXi:
192     case AArch64::MOVZWi:
193     case AArch64::MOVZXi: {
194       bool DstIsDead = MI.getOperand(0).isDead();
195       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
196         .addReg(DstReg, RegState::Define |
197                 getDeadRegState(DstIsDead && LastItem) |
198                 RenamableState)
199         .addImm(I->Op1)
200         .addImm(I->Op2));
201       } break;
202     case AArch64::MOVKWi:
203     case AArch64::MOVKXi: {
204       Register DstReg = MI.getOperand(0).getReg();
205       bool DstIsDead = MI.getOperand(0).isDead();
206       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
207         .addReg(DstReg,
208                 RegState::Define |
209                 getDeadRegState(DstIsDead && LastItem) |
210                 RenamableState)
211         .addReg(DstReg)
212         .addImm(I->Op1)
213         .addImm(I->Op2));
214       } break;
215     }
216   }
217   transferImpOps(MI, MIBS.front(), MIBS.back());
218   MI.eraseFromParent();
219   return true;
220 }
221 
222 bool AArch64ExpandPseudo::expandCMP_SWAP(
223     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
224     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
225     MachineBasicBlock::iterator &NextMBBI) {
226   MachineInstr &MI = *MBBI;
227   MIMetadata MIMD(MI);
228   const MachineOperand &Dest = MI.getOperand(0);
229   Register StatusReg = MI.getOperand(1).getReg();
230   bool StatusDead = MI.getOperand(1).isDead();
231   // Duplicating undef operands into 2 instructions does not guarantee the same
232   // value on both; However undef should be replaced by xzr anyway.
233   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
234   Register AddrReg = MI.getOperand(2).getReg();
235   Register DesiredReg = MI.getOperand(3).getReg();
236   Register NewReg = MI.getOperand(4).getReg();
237 
238   MachineFunction *MF = MBB.getParent();
239   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
240   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
241   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
242 
243   MF->insert(++MBB.getIterator(), LoadCmpBB);
244   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
245   MF->insert(++StoreBB->getIterator(), DoneBB);
246 
247   // .Lloadcmp:
248   //     mov wStatus, 0
249   //     ldaxr xDest, [xAddr]
250   //     cmp xDest, xDesired
251   //     b.ne .Ldone
252   if (!StatusDead)
253     BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
254       .addImm(0).addImm(0);
255   BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
256       .addReg(AddrReg);
257   BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
258       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
259       .addReg(DesiredReg)
260       .addImm(ExtendImm);
261   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
262       .addImm(AArch64CC::NE)
263       .addMBB(DoneBB)
264       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
265   LoadCmpBB->addSuccessor(DoneBB);
266   LoadCmpBB->addSuccessor(StoreBB);
267 
268   // .Lstore:
269   //     stlxr wStatus, xNew, [xAddr]
270   //     cbnz wStatus, .Lloadcmp
271   BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
272       .addReg(NewReg)
273       .addReg(AddrReg);
274   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
275       .addReg(StatusReg, getKillRegState(StatusDead))
276       .addMBB(LoadCmpBB);
277   StoreBB->addSuccessor(LoadCmpBB);
278   StoreBB->addSuccessor(DoneBB);
279 
280   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
281   DoneBB->transferSuccessors(&MBB);
282 
283   MBB.addSuccessor(LoadCmpBB);
284 
285   NextMBBI = MBB.end();
286   MI.eraseFromParent();
287 
288   // Recompute livein lists.
289   LivePhysRegs LiveRegs;
290   computeAndAddLiveIns(LiveRegs, *DoneBB);
291   computeAndAddLiveIns(LiveRegs, *StoreBB);
292   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
293   // Do an extra pass around the loop to get loop carried registers right.
294   StoreBB->clearLiveIns();
295   computeAndAddLiveIns(LiveRegs, *StoreBB);
296   LoadCmpBB->clearLiveIns();
297   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
298 
299   return true;
300 }
301 
302 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
303     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
304     MachineBasicBlock::iterator &NextMBBI) {
305   MachineInstr &MI = *MBBI;
306   MIMetadata MIMD(MI);
307   MachineOperand &DestLo = MI.getOperand(0);
308   MachineOperand &DestHi = MI.getOperand(1);
309   Register StatusReg = MI.getOperand(2).getReg();
310   bool StatusDead = MI.getOperand(2).isDead();
311   // Duplicating undef operands into 2 instructions does not guarantee the same
312   // value on both; However undef should be replaced by xzr anyway.
313   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
314   Register AddrReg = MI.getOperand(3).getReg();
315   Register DesiredLoReg = MI.getOperand(4).getReg();
316   Register DesiredHiReg = MI.getOperand(5).getReg();
317   Register NewLoReg = MI.getOperand(6).getReg();
318   Register NewHiReg = MI.getOperand(7).getReg();
319 
320   unsigned LdxpOp, StxpOp;
321 
322   switch (MI.getOpcode()) {
323   case AArch64::CMP_SWAP_128_MONOTONIC:
324     LdxpOp = AArch64::LDXPX;
325     StxpOp = AArch64::STXPX;
326     break;
327   case AArch64::CMP_SWAP_128_RELEASE:
328     LdxpOp = AArch64::LDXPX;
329     StxpOp = AArch64::STLXPX;
330     break;
331   case AArch64::CMP_SWAP_128_ACQUIRE:
332     LdxpOp = AArch64::LDAXPX;
333     StxpOp = AArch64::STXPX;
334     break;
335   case AArch64::CMP_SWAP_128:
336     LdxpOp = AArch64::LDAXPX;
337     StxpOp = AArch64::STLXPX;
338     break;
339   default:
340     llvm_unreachable("Unexpected opcode");
341   }
342 
343   MachineFunction *MF = MBB.getParent();
344   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
345   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
346   auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
347   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
348 
349   MF->insert(++MBB.getIterator(), LoadCmpBB);
350   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
351   MF->insert(++StoreBB->getIterator(), FailBB);
352   MF->insert(++FailBB->getIterator(), DoneBB);
353 
354   // .Lloadcmp:
355   //     ldaxp xDestLo, xDestHi, [xAddr]
356   //     cmp xDestLo, xDesiredLo
357   //     sbcs xDestHi, xDesiredHi
358   //     b.ne .Ldone
359   BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
360       .addReg(DestLo.getReg(), RegState::Define)
361       .addReg(DestHi.getReg(), RegState::Define)
362       .addReg(AddrReg);
363   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
364       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
365       .addReg(DesiredLoReg)
366       .addImm(0);
367   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
368     .addUse(AArch64::WZR)
369     .addUse(AArch64::WZR)
370     .addImm(AArch64CC::EQ);
371   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
372       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
373       .addReg(DesiredHiReg)
374       .addImm(0);
375   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
376       .addUse(StatusReg, RegState::Kill)
377       .addUse(StatusReg, RegState::Kill)
378       .addImm(AArch64CC::EQ);
379   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
380       .addUse(StatusReg, getKillRegState(StatusDead))
381       .addMBB(FailBB);
382   LoadCmpBB->addSuccessor(FailBB);
383   LoadCmpBB->addSuccessor(StoreBB);
384 
385   // .Lstore:
386   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
387   //     cbnz wStatus, .Lloadcmp
388   BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
389       .addReg(NewLoReg)
390       .addReg(NewHiReg)
391       .addReg(AddrReg);
392   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
393       .addReg(StatusReg, getKillRegState(StatusDead))
394       .addMBB(LoadCmpBB);
395   BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
396   StoreBB->addSuccessor(LoadCmpBB);
397   StoreBB->addSuccessor(DoneBB);
398 
399   // .Lfail:
400   //     stlxp wStatus, xDestLo, xDestHi, [xAddr]
401   //     cbnz wStatus, .Lloadcmp
402   BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
403       .addReg(DestLo.getReg())
404       .addReg(DestHi.getReg())
405       .addReg(AddrReg);
406   BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
407       .addReg(StatusReg, getKillRegState(StatusDead))
408       .addMBB(LoadCmpBB);
409   FailBB->addSuccessor(LoadCmpBB);
410   FailBB->addSuccessor(DoneBB);
411 
412   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
413   DoneBB->transferSuccessors(&MBB);
414 
415   MBB.addSuccessor(LoadCmpBB);
416 
417   NextMBBI = MBB.end();
418   MI.eraseFromParent();
419 
420   // Recompute liveness bottom up.
421   LivePhysRegs LiveRegs;
422   computeAndAddLiveIns(LiveRegs, *DoneBB);
423   computeAndAddLiveIns(LiveRegs, *FailBB);
424   computeAndAddLiveIns(LiveRegs, *StoreBB);
425   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
426 
427   // Do an extra pass in the loop to get the loop carried dependencies right.
428   FailBB->clearLiveIns();
429   computeAndAddLiveIns(LiveRegs, *FailBB);
430   StoreBB->clearLiveIns();
431   computeAndAddLiveIns(LiveRegs, *StoreBB);
432   LoadCmpBB->clearLiveIns();
433   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
434 
435   return true;
436 }
437 
438 /// \brief Expand Pseudos to Instructions with destructive operands.
439 ///
440 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
441 /// or for fixing relaxed register allocation conditions to comply with
442 /// the instructions register constraints. The latter case may be cheaper
443 /// than setting the register constraints in the register allocator,
444 /// since that will insert regular MOV instructions rather than MOVPRFX.
445 ///
446 /// Example (after register allocation):
447 ///
448 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
449 ///
450 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
451 /// * We cannot map directly to FSUB_ZPmZ_B because the register
452 ///   constraints of the instruction are not met.
453 /// * Also the _ZERO specifies the false lanes need to be zeroed.
454 ///
455 /// We first try to see if the destructive operand == result operand,
456 /// if not, we try to swap the operands, e.g.
457 ///
458 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
459 ///
460 /// But because FSUB_ZPmZ is not commutative, this is semantically
461 /// different, so we need a reverse instruction:
462 ///
463 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
464 ///
465 /// Then we implement the zeroing of the false lanes of Z0 by adding
466 /// a zeroing MOVPRFX instruction:
467 ///
468 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
469 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
470 ///
471 /// Note that this can only be done for _ZERO or _UNDEF variants where
472 /// we can guarantee the false lanes to be zeroed (by implementing this)
473 /// or that they are undef (don't care / not used), otherwise the
474 /// swapping of operands is illegal because the operation is not
475 /// (or cannot be emulated to be) fully commutative.
476 bool AArch64ExpandPseudo::expand_DestructiveOp(
477                             MachineInstr &MI,
478                             MachineBasicBlock &MBB,
479                             MachineBasicBlock::iterator MBBI) {
480   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
481   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
482   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
483   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
484   Register DstReg = MI.getOperand(0).getReg();
485   bool DstIsDead = MI.getOperand(0).isDead();
486   bool UseRev = false;
487   unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
488 
489   switch (DType) {
490   case AArch64::DestructiveBinaryComm:
491   case AArch64::DestructiveBinaryCommWithRev:
492     if (DstReg == MI.getOperand(3).getReg()) {
493       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
494       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
495       UseRev = true;
496       break;
497     }
498     [[fallthrough]];
499   case AArch64::DestructiveBinary:
500   case AArch64::DestructiveBinaryImm:
501     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
502     break;
503   case AArch64::DestructiveUnaryPassthru:
504     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
505     break;
506   case AArch64::DestructiveTernaryCommWithRev:
507     std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
508     if (DstReg == MI.getOperand(3).getReg()) {
509       // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
510       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
511       UseRev = true;
512     } else if (DstReg == MI.getOperand(4).getReg()) {
513       // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
514       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
515       UseRev = true;
516     }
517     break;
518   default:
519     llvm_unreachable("Unsupported Destructive Operand type");
520   }
521 
522   // MOVPRFX can only be used if the destination operand
523   // is the destructive operand, not as any other operand,
524   // so the Destructive Operand must be unique.
525   bool DOPRegIsUnique = false;
526   switch (DType) {
527   case AArch64::DestructiveBinary:
528     DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
529     break;
530   case AArch64::DestructiveBinaryComm:
531   case AArch64::DestructiveBinaryCommWithRev:
532     DOPRegIsUnique =
533       DstReg != MI.getOperand(DOPIdx).getReg() ||
534       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
535     break;
536   case AArch64::DestructiveUnaryPassthru:
537   case AArch64::DestructiveBinaryImm:
538     DOPRegIsUnique = true;
539     break;
540   case AArch64::DestructiveTernaryCommWithRev:
541     DOPRegIsUnique =
542         DstReg != MI.getOperand(DOPIdx).getReg() ||
543         (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
544          MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
545     break;
546   }
547 
548   // Resolve the reverse opcode
549   if (UseRev) {
550     int NewOpcode;
551     // e.g. DIV -> DIVR
552     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
553       Opcode = NewOpcode;
554     // e.g. DIVR -> DIV
555     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
556       Opcode = NewOpcode;
557   }
558 
559   // Get the right MOVPRFX
560   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
561   unsigned MovPrfx, LSLZero, MovPrfxZero;
562   switch (ElementSize) {
563   case AArch64::ElementSizeNone:
564   case AArch64::ElementSizeB:
565     MovPrfx = AArch64::MOVPRFX_ZZ;
566     LSLZero = AArch64::LSL_ZPmI_B;
567     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
568     break;
569   case AArch64::ElementSizeH:
570     MovPrfx = AArch64::MOVPRFX_ZZ;
571     LSLZero = AArch64::LSL_ZPmI_H;
572     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
573     break;
574   case AArch64::ElementSizeS:
575     MovPrfx = AArch64::MOVPRFX_ZZ;
576     LSLZero = AArch64::LSL_ZPmI_S;
577     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
578     break;
579   case AArch64::ElementSizeD:
580     MovPrfx = AArch64::MOVPRFX_ZZ;
581     LSLZero = AArch64::LSL_ZPmI_D;
582     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
583     break;
584   default:
585     llvm_unreachable("Unsupported ElementSize");
586   }
587 
588   //
589   // Create the destructive operation (if required)
590   //
591   MachineInstrBuilder PRFX, DOP;
592   if (FalseZero) {
593     // If we cannot prefix the requested instruction we'll instead emit a
594     // prefixed_zeroing_mov for DestructiveBinary.
595     assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
596             DType == AArch64::DestructiveBinaryComm ||
597             DType == AArch64::DestructiveBinaryCommWithRev) &&
598            "The destructive operand should be unique");
599     assert(ElementSize != AArch64::ElementSizeNone &&
600            "This instruction is unpredicated");
601 
602     // Merge source operand into destination register
603     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
604                .addReg(DstReg, RegState::Define)
605                .addReg(MI.getOperand(PredIdx).getReg())
606                .addReg(MI.getOperand(DOPIdx).getReg());
607 
608     // After the movprfx, the destructive operand is same as Dst
609     DOPIdx = 0;
610 
611     // Create the additional LSL to zero the lanes when the DstReg is not
612     // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
613     // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
614     if ((DType == AArch64::DestructiveBinary ||
615          DType == AArch64::DestructiveBinaryComm ||
616          DType == AArch64::DestructiveBinaryCommWithRev) &&
617         !DOPRegIsUnique) {
618       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
619           .addReg(DstReg, RegState::Define)
620           .add(MI.getOperand(PredIdx))
621           .addReg(DstReg)
622           .addImm(0);
623     }
624   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
625     assert(DOPRegIsUnique && "The destructive operand should be unique");
626     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
627                .addReg(DstReg, RegState::Define)
628                .addReg(MI.getOperand(DOPIdx).getReg());
629     DOPIdx = 0;
630   }
631 
632   //
633   // Create the destructive operation
634   //
635   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
636     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
637 
638   switch (DType) {
639   case AArch64::DestructiveUnaryPassthru:
640     DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
641         .add(MI.getOperand(PredIdx))
642         .add(MI.getOperand(SrcIdx));
643     break;
644   case AArch64::DestructiveBinary:
645   case AArch64::DestructiveBinaryImm:
646   case AArch64::DestructiveBinaryComm:
647   case AArch64::DestructiveBinaryCommWithRev:
648     DOP.add(MI.getOperand(PredIdx))
649        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
650        .add(MI.getOperand(SrcIdx));
651     break;
652   case AArch64::DestructiveTernaryCommWithRev:
653     DOP.add(MI.getOperand(PredIdx))
654         .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
655         .add(MI.getOperand(SrcIdx))
656         .add(MI.getOperand(Src2Idx));
657     break;
658   }
659 
660   if (PRFX) {
661     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
662     transferImpOps(MI, PRFX, DOP);
663   } else
664     transferImpOps(MI, DOP, DOP);
665 
666   MI.eraseFromParent();
667   return true;
668 }
669 
670 bool AArch64ExpandPseudo::expandSetTagLoop(
671     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
672     MachineBasicBlock::iterator &NextMBBI) {
673   MachineInstr &MI = *MBBI;
674   DebugLoc DL = MI.getDebugLoc();
675   Register SizeReg = MI.getOperand(0).getReg();
676   Register AddressReg = MI.getOperand(1).getReg();
677 
678   MachineFunction *MF = MBB.getParent();
679 
680   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
681   const unsigned OpCode1 =
682       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
683   const unsigned OpCode2 =
684       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
685 
686   unsigned Size = MI.getOperand(2).getImm();
687   assert(Size > 0 && Size % 16 == 0);
688   if (Size % (16 * 2) != 0) {
689     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
690         .addReg(AddressReg)
691         .addReg(AddressReg)
692         .addImm(1);
693     Size -= 16;
694   }
695   MachineBasicBlock::iterator I =
696       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
697           .addImm(Size);
698   expandMOVImm(MBB, I, 64);
699 
700   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
701   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
702 
703   MF->insert(++MBB.getIterator(), LoopBB);
704   MF->insert(++LoopBB->getIterator(), DoneBB);
705 
706   BuildMI(LoopBB, DL, TII->get(OpCode2))
707       .addDef(AddressReg)
708       .addReg(AddressReg)
709       .addReg(AddressReg)
710       .addImm(2)
711       .cloneMemRefs(MI)
712       .setMIFlags(MI.getFlags());
713   BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
714       .addDef(SizeReg)
715       .addReg(SizeReg)
716       .addImm(16 * 2)
717       .addImm(0);
718   BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
719       .addImm(AArch64CC::NE)
720       .addMBB(LoopBB)
721       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
722 
723   LoopBB->addSuccessor(LoopBB);
724   LoopBB->addSuccessor(DoneBB);
725 
726   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
727   DoneBB->transferSuccessors(&MBB);
728 
729   MBB.addSuccessor(LoopBB);
730 
731   NextMBBI = MBB.end();
732   MI.eraseFromParent();
733   // Recompute liveness bottom up.
734   LivePhysRegs LiveRegs;
735   computeAndAddLiveIns(LiveRegs, *DoneBB);
736   computeAndAddLiveIns(LiveRegs, *LoopBB);
737   // Do an extra pass in the loop to get the loop carried dependencies right.
738   // FIXME: is this necessary?
739   LoopBB->clearLiveIns();
740   computeAndAddLiveIns(LiveRegs, *LoopBB);
741   DoneBB->clearLiveIns();
742   computeAndAddLiveIns(LiveRegs, *DoneBB);
743 
744   return true;
745 }
746 
747 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
748                                              MachineBasicBlock::iterator MBBI,
749                                              unsigned Opc, unsigned N) {
750   assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
751           Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
752          "Unexpected opcode");
753   unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
754                         ? RegState::Define
755                         : 0;
756   unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
757                       ? AArch64::zsub0
758                       : AArch64::psub0;
759   const TargetRegisterInfo *TRI =
760       MBB.getParent()->getSubtarget().getRegisterInfo();
761   MachineInstr &MI = *MBBI;
762   for (unsigned Offset = 0; Offset < N; ++Offset) {
763     int ImmOffset = MI.getOperand(2).getImm() + Offset;
764     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
765     assert(ImmOffset >= -256 && ImmOffset < 256 &&
766            "Immediate spill offset out of range");
767     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
768         .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),
769                 RState)
770         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
771         .addImm(ImmOffset);
772   }
773   MI.eraseFromParent();
774   return true;
775 }
776 
777 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
778     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
779   // Expand CALL_RVMARKER pseudo to:
780   // - a branch to the call target, followed by
781   // - the special `mov x29, x29` marker, and
782   // - another branch, to the runtime function
783   // Mark the sequence as bundle, to avoid passes moving other code in between.
784   MachineInstr &MI = *MBBI;
785 
786   MachineInstr *OriginalCall;
787   MachineOperand &RVTarget = MI.getOperand(0);
788   MachineOperand &CallTarget = MI.getOperand(1);
789   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
790          "invalid operand for regular call");
791   assert(RVTarget.isGlobal() && "invalid operand for attached call");
792   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
793   OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
794   OriginalCall->addOperand(CallTarget);
795 
796   unsigned RegMaskStartIdx = 2;
797   // Skip register arguments. Those are added during ISel, but are not
798   // needed for the concrete branch.
799   while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
800     auto MOP = MI.getOperand(RegMaskStartIdx);
801     assert(MOP.isReg() && "can only add register operands");
802     OriginalCall->addOperand(MachineOperand::CreateReg(
803         MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
804         /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
805     RegMaskStartIdx++;
806   }
807   for (const MachineOperand &MO :
808        llvm::drop_begin(MI.operands(), RegMaskStartIdx))
809     OriginalCall->addOperand(MO);
810 
811   BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
812                      .addReg(AArch64::FP, RegState::Define)
813                      .addReg(AArch64::XZR)
814                      .addReg(AArch64::FP)
815                      .addImm(0);
816 
817   auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
818                      .add(RVTarget)
819                      .getInstr();
820 
821   if (MI.shouldUpdateCallSiteInfo())
822     MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
823 
824   MI.eraseFromParent();
825   finalizeBundle(MBB, OriginalCall->getIterator(),
826                  std::next(RVCall->getIterator()));
827   return true;
828 }
829 
830 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
831                                          MachineBasicBlock::iterator MBBI) {
832   // Expand CALL_BTI pseudo to:
833   // - a branch to the call target
834   // - a BTI instruction
835   // Mark the sequence as a bundle, to avoid passes moving other code in
836   // between.
837 
838   MachineInstr &MI = *MBBI;
839   MachineOperand &CallTarget = MI.getOperand(0);
840   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
841          "invalid operand for regular call");
842   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
843   MachineInstr *Call =
844       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
845   Call->addOperand(CallTarget);
846 
847   // 1 because we already added the branch target above.
848   unsigned RegMaskStartIdx = 1;
849   // The branch is BL <target>, so we cannot attach the arguments of the called
850   // function to it. Those must be added as implicitly used by the branch.
851   while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
852     auto MOP = MI.getOperand(RegMaskStartIdx);
853     assert(MOP.isReg() && "can only add register operands");
854     Call->addOperand(MachineOperand::CreateReg(
855         MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
856         /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
857     RegMaskStartIdx++;
858   }
859   for (const MachineOperand &MO :
860        llvm::drop_begin(MI.operands(), RegMaskStartIdx))
861     Call->addOperand(MO);
862 
863   Call->setCFIType(*MBB.getParent(), MI.getCFIType());
864 
865   MachineInstr *BTI =
866       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
867           // BTI J so that setjmp can to BR to this.
868           .addImm(36)
869           .getInstr();
870 
871   if (MI.shouldUpdateCallSiteInfo())
872     MBB.getParent()->moveCallSiteInfo(&MI, Call);
873 
874   MI.eraseFromParent();
875   finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
876   return true;
877 }
878 
879 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
880     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
881   Register CtxReg = MBBI->getOperand(0).getReg();
882   Register BaseReg = MBBI->getOperand(1).getReg();
883   int Offset = MBBI->getOperand(2).getImm();
884   DebugLoc DL(MBBI->getDebugLoc());
885   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
886 
887   if (STI.getTargetTriple().getArchName() != "arm64e") {
888     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
889         .addUse(CtxReg)
890         .addUse(BaseReg)
891         .addImm(Offset / 8)
892         .setMIFlag(MachineInstr::FrameSetup);
893     MBBI->eraseFromParent();
894     return true;
895   }
896 
897   // We need to sign the context in an address-discriminated way. 0xc31a is a
898   // fixed random value, chosen as part of the ABI.
899   //     add x16, xBase, #Offset
900   //     movk x16, #0xc31a, lsl #48
901   //     mov x17, x22/xzr
902   //     pacdb x17, x16
903   //     str x17, [xBase, #Offset]
904   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
905   BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
906       .addUse(BaseReg)
907       .addImm(abs(Offset))
908       .addImm(0)
909       .setMIFlag(MachineInstr::FrameSetup);
910   BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
911       .addUse(AArch64::X16)
912       .addImm(0xc31a)
913       .addImm(48)
914       .setMIFlag(MachineInstr::FrameSetup);
915   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
916   // move it somewhere before signing.
917   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
918       .addUse(AArch64::XZR)
919       .addUse(CtxReg)
920       .addImm(0)
921       .setMIFlag(MachineInstr::FrameSetup);
922   BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
923       .addUse(AArch64::X17)
924       .addUse(AArch64::X16)
925       .setMIFlag(MachineInstr::FrameSetup);
926   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
927       .addUse(AArch64::X17)
928       .addUse(BaseReg)
929       .addImm(Offset / 8)
930       .setMIFlag(MachineInstr::FrameSetup);
931 
932   MBBI->eraseFromParent();
933   return true;
934 }
935 
936 MachineBasicBlock *
937 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
938                                      MachineBasicBlock::iterator MBBI) {
939   MachineInstr &MI = *MBBI;
940   assert((std::next(MBBI) != MBB.end() ||
941           MI.getParent()->successors().begin() !=
942               MI.getParent()->successors().end()) &&
943          "Unexpected unreachable in block that restores ZA");
944 
945   // Compare TPIDR2_EL0 value against 0.
946   DebugLoc DL = MI.getDebugLoc();
947   MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
948                                 .add(MI.getOperand(0));
949 
950   // Split MBB and create two new blocks:
951   //  - MBB now contains all instructions before RestoreZAPseudo.
952   //  - SMBB contains the RestoreZAPseudo instruction only.
953   //  - EndBB contains all instructions after RestoreZAPseudo.
954   MachineInstr &PrevMI = *std::prev(MBBI);
955   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
956   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
957                                  ? *SMBB->successors().begin()
958                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
959 
960   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
961   Cbz.addMBB(SMBB);
962   BuildMI(&MBB, DL, TII->get(AArch64::B))
963       .addMBB(EndBB);
964   MBB.addSuccessor(EndBB);
965 
966   // Replace the pseudo with a call (BL).
967   MachineInstrBuilder MIB =
968       BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
969   MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
970   for (unsigned I = 2; I < MI.getNumOperands(); ++I)
971     MIB.add(MI.getOperand(I));
972   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
973 
974   MI.eraseFromParent();
975   return EndBB;
976 }
977 
978 MachineBasicBlock *
979 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
980                                         MachineBasicBlock::iterator MBBI) {
981   MachineInstr &MI = *MBBI;
982   // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
983   // Exception handling code generated by Clang may introduce unreachables and it
984   // seems unnecessary to restore pstate.sm when that happens. Note that it is
985   // not just an optimisation, the code below expects a successor instruction/block
986   // in order to split the block at MBBI.
987   if (std::next(MBBI) == MBB.end() &&
988       MI.getParent()->successors().begin() ==
989           MI.getParent()->successors().end()) {
990     MI.eraseFromParent();
991     return &MBB;
992   }
993 
994   // Expand the pseudo into smstart or smstop instruction. The pseudo has the
995   // following operands:
996   //
997   //   MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask>
998   //
999   // The pseudo is expanded into a conditional smstart/smstop, with a
1000   // check if pstate.sm (register) equals the expected value, and if not,
1001   // invokes the smstart/smstop.
1002   //
1003   // As an example, the following block contains a normal call from a
1004   // streaming-compatible function:
1005   //
1006   // OrigBB:
1007   //   MSRpstatePseudo 3, 0, %0, 0, <regmask>             <- Conditional SMSTOP
1008   //   bl @normal_callee
1009   //   MSRpstatePseudo 3, 1, %0, 0, <regmask>             <- Conditional SMSTART
1010   //
1011   // ...which will be transformed into:
1012   //
1013   // OrigBB:
1014   //   TBNZx %0:gpr64, 0, SMBB
1015   //   b EndBB
1016   //
1017   // SMBB:
1018   //   MSRpstatesvcrImm1 3, 0, <regmask>                  <- SMSTOP
1019   //
1020   // EndBB:
1021   //   bl @normal_callee
1022   //   MSRcond_pstatesvcrImm1 3, 1, <regmask>             <- SMSTART
1023   //
1024   DebugLoc DL = MI.getDebugLoc();
1025 
1026   // Create the conditional branch based on the third operand of the
1027   // instruction, which tells us if we are wrapping a normal or streaming
1028   // function.
1029   // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1030   // expected value for the callee (0 for a normal callee and 1 for a streaming
1031   // callee).
1032   auto PStateSM = MI.getOperand(2).getReg();
1033   auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1034   unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
1035   bool IsStreamingCallee = MI.getOperand(3).getImm();
1036   unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW;
1037   MachineInstrBuilder Tbx =
1038       BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
1039 
1040   // Split MBB and create two new blocks:
1041   //  - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1042   //  - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1043   //  - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1044   MachineInstr &PrevMI = *std::prev(MBBI);
1045   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
1046   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
1047                                  ? *SMBB->successors().begin()
1048                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
1049 
1050   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1051   Tbx.addMBB(SMBB);
1052   BuildMI(&MBB, DL, TII->get(AArch64::B))
1053       .addMBB(EndBB);
1054   MBB.addSuccessor(EndBB);
1055 
1056   // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1057   MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
1058                                     TII->get(AArch64::MSRpstatesvcrImm1));
1059   // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1060   // these contain the CopyFromReg for the first argument and the flag to
1061   // indicate whether the callee is streaming or normal).
1062   MIB.add(MI.getOperand(0));
1063   MIB.add(MI.getOperand(1));
1064   for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1065     MIB.add(MI.getOperand(i));
1066 
1067   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1068 
1069   MI.eraseFromParent();
1070   return EndBB;
1071 }
1072 
1073 bool AArch64ExpandPseudo::expandMultiVecPseudo(
1074     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1075     TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1076     unsigned ContiguousOp, unsigned StridedOpc) {
1077   MachineInstr &MI = *MBBI;
1078   Register Tuple = MI.getOperand(0).getReg();
1079 
1080   auto ContiguousRange = ContiguousClass.getRegisters();
1081   auto StridedRange = StridedClass.getRegisters();
1082   unsigned Opc;
1083   if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {
1084     Opc = ContiguousOp;
1085   } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {
1086     Opc = StridedOpc;
1087   } else
1088     llvm_unreachable("Cannot expand Multi-Vector pseudo");
1089 
1090   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1091                                 .add(MI.getOperand(0))
1092                                 .add(MI.getOperand(1))
1093                                 .add(MI.getOperand(2))
1094                                 .add(MI.getOperand(3));
1095   transferImpOps(MI, MIB, MIB);
1096   MI.eraseFromParent();
1097   return true;
1098 }
1099 
1100 /// If MBBI references a pseudo instruction that should be expanded here,
1101 /// do the expansion and return true.  Otherwise return false.
1102 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1103                                    MachineBasicBlock::iterator MBBI,
1104                                    MachineBasicBlock::iterator &NextMBBI) {
1105   MachineInstr &MI = *MBBI;
1106   unsigned Opcode = MI.getOpcode();
1107 
1108   // Check if we can expand the destructive op
1109   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1110   if (OrigInstr != -1) {
1111     auto &Orig = TII->get(OrigInstr);
1112     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1113         AArch64::NotDestructive) {
1114       return expand_DestructiveOp(MI, MBB, MBBI);
1115     }
1116   }
1117 
1118   switch (Opcode) {
1119   default:
1120     break;
1121 
1122   case AArch64::BSPv8i8:
1123   case AArch64::BSPv16i8: {
1124     Register DstReg = MI.getOperand(0).getReg();
1125     if (DstReg == MI.getOperand(3).getReg()) {
1126       // Expand to BIT
1127       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1128               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1129                                                   : AArch64::BITv16i8))
1130           .add(MI.getOperand(0))
1131           .add(MI.getOperand(3))
1132           .add(MI.getOperand(2))
1133           .add(MI.getOperand(1));
1134     } else if (DstReg == MI.getOperand(2).getReg()) {
1135       // Expand to BIF
1136       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1137               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1138                                                   : AArch64::BIFv16i8))
1139           .add(MI.getOperand(0))
1140           .add(MI.getOperand(2))
1141           .add(MI.getOperand(3))
1142           .add(MI.getOperand(1));
1143     } else {
1144       // Expand to BSL, use additional move if required
1145       if (DstReg == MI.getOperand(1).getReg()) {
1146         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1147                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1148                                                     : AArch64::BSLv16i8))
1149             .add(MI.getOperand(0))
1150             .add(MI.getOperand(1))
1151             .add(MI.getOperand(2))
1152             .add(MI.getOperand(3));
1153       } else {
1154         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1155                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1156                                                     : AArch64::ORRv16i8))
1157             .addReg(DstReg,
1158                     RegState::Define |
1159                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1160             .add(MI.getOperand(1))
1161             .add(MI.getOperand(1));
1162         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1163                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1164                                                     : AArch64::BSLv16i8))
1165             .add(MI.getOperand(0))
1166             .addReg(DstReg,
1167                     RegState::Kill |
1168                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1169             .add(MI.getOperand(2))
1170             .add(MI.getOperand(3));
1171       }
1172     }
1173     MI.eraseFromParent();
1174     return true;
1175   }
1176 
1177   case AArch64::ADDWrr:
1178   case AArch64::SUBWrr:
1179   case AArch64::ADDXrr:
1180   case AArch64::SUBXrr:
1181   case AArch64::ADDSWrr:
1182   case AArch64::SUBSWrr:
1183   case AArch64::ADDSXrr:
1184   case AArch64::SUBSXrr:
1185   case AArch64::ANDWrr:
1186   case AArch64::ANDXrr:
1187   case AArch64::BICWrr:
1188   case AArch64::BICXrr:
1189   case AArch64::ANDSWrr:
1190   case AArch64::ANDSXrr:
1191   case AArch64::BICSWrr:
1192   case AArch64::BICSXrr:
1193   case AArch64::EONWrr:
1194   case AArch64::EONXrr:
1195   case AArch64::EORWrr:
1196   case AArch64::EORXrr:
1197   case AArch64::ORNWrr:
1198   case AArch64::ORNXrr:
1199   case AArch64::ORRWrr:
1200   case AArch64::ORRXrr: {
1201     unsigned Opcode;
1202     switch (MI.getOpcode()) {
1203     default:
1204       return false;
1205     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
1206     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
1207     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
1208     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
1209     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
1210     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
1211     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
1212     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
1213     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
1214     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
1215     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
1216     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
1217     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
1218     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
1219     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
1220     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
1221     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
1222     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
1223     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
1224     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
1225     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
1226     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
1227     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
1228     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
1229     }
1230     MachineFunction &MF = *MBB.getParent();
1231     // Try to create new inst without implicit operands added.
1232     MachineInstr *NewMI = MF.CreateMachineInstr(
1233         TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1234     MBB.insert(MBBI, NewMI);
1235     MachineInstrBuilder MIB1(MF, NewMI);
1236     MIB1->setPCSections(MF, MI.getPCSections());
1237     MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1238         .add(MI.getOperand(1))
1239         .add(MI.getOperand(2))
1240         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1241     transferImpOps(MI, MIB1, MIB1);
1242     if (auto DebugNumber = MI.peekDebugInstrNum())
1243       NewMI->setDebugInstrNum(DebugNumber);
1244     MI.eraseFromParent();
1245     return true;
1246   }
1247 
1248   case AArch64::LOADgot: {
1249     MachineFunction *MF = MBB.getParent();
1250     Register DstReg = MI.getOperand(0).getReg();
1251     const MachineOperand &MO1 = MI.getOperand(1);
1252     unsigned Flags = MO1.getTargetFlags();
1253 
1254     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1255       // Tiny codemodel expand to LDR
1256       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1257                                         TII->get(AArch64::LDRXl), DstReg);
1258 
1259       if (MO1.isGlobal()) {
1260         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1261       } else if (MO1.isSymbol()) {
1262         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1263       } else {
1264         assert(MO1.isCPI() &&
1265                "Only expect globals, externalsymbols, or constant pools");
1266         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1267       }
1268     } else {
1269       // Small codemodel expand into ADRP + LDR.
1270       MachineFunction &MF = *MI.getParent()->getParent();
1271       DebugLoc DL = MI.getDebugLoc();
1272       MachineInstrBuilder MIB1 =
1273           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1274 
1275       MachineInstrBuilder MIB2;
1276       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1277         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1278         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1279         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1280         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1281                    .addDef(Reg32)
1282                    .addReg(DstReg, RegState::Kill)
1283                    .addReg(DstReg, DstFlags | RegState::Implicit);
1284       } else {
1285         Register DstReg = MI.getOperand(0).getReg();
1286         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1287                    .add(MI.getOperand(0))
1288                    .addUse(DstReg, RegState::Kill);
1289       }
1290 
1291       if (MO1.isGlobal()) {
1292         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1293         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1294                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1295       } else if (MO1.isSymbol()) {
1296         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1297         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1298                                                         AArch64II::MO_PAGEOFF |
1299                                                         AArch64II::MO_NC);
1300       } else {
1301         assert(MO1.isCPI() &&
1302                "Only expect globals, externalsymbols, or constant pools");
1303         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1304                                   Flags | AArch64II::MO_PAGE);
1305         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1306                                   Flags | AArch64II::MO_PAGEOFF |
1307                                       AArch64II::MO_NC);
1308       }
1309 
1310       transferImpOps(MI, MIB1, MIB2);
1311     }
1312     MI.eraseFromParent();
1313     return true;
1314   }
1315   case AArch64::MOVaddrBA: {
1316     MachineFunction &MF = *MI.getParent()->getParent();
1317     if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1318       // blockaddress expressions have to come from a constant pool because the
1319       // largest addend (and hence offset within a function) allowed for ADRP is
1320       // only 8MB.
1321       const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1322       assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1323 
1324       MachineConstantPool *MCP = MF.getConstantPool();
1325       unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1326 
1327       Register DstReg = MI.getOperand(0).getReg();
1328       auto MIB1 =
1329           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1330               .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1331       auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1332                           TII->get(AArch64::LDRXui), DstReg)
1333                       .addUse(DstReg)
1334                       .addConstantPoolIndex(
1335                           CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1336       transferImpOps(MI, MIB1, MIB2);
1337       MI.eraseFromParent();
1338       return true;
1339     }
1340   }
1341     [[fallthrough]];
1342   case AArch64::MOVaddr:
1343   case AArch64::MOVaddrJT:
1344   case AArch64::MOVaddrCP:
1345   case AArch64::MOVaddrTLS:
1346   case AArch64::MOVaddrEXT: {
1347     // Expand into ADRP + ADD.
1348     Register DstReg = MI.getOperand(0).getReg();
1349     assert(DstReg != AArch64::XZR);
1350     MachineInstrBuilder MIB1 =
1351         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1352             .add(MI.getOperand(1));
1353 
1354     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1355       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1356       // We do so by creating a MOVK that sets bits 48-63 of the register to
1357       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1358       // the small code model so we can assume a binary size of <= 4GB, which
1359       // makes the untagged PC relative offset positive. The binary must also be
1360       // loaded into address range [0, 2^48). Both of these properties need to
1361       // be ensured at runtime when using tagged addresses.
1362       auto Tag = MI.getOperand(1);
1363       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1364       Tag.setOffset(0x100000000);
1365       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1366           .addReg(DstReg)
1367           .add(Tag)
1368           .addImm(48);
1369     }
1370 
1371     MachineInstrBuilder MIB2 =
1372         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1373             .add(MI.getOperand(0))
1374             .addReg(DstReg)
1375             .add(MI.getOperand(2))
1376             .addImm(0);
1377 
1378     transferImpOps(MI, MIB1, MIB2);
1379     MI.eraseFromParent();
1380     return true;
1381   }
1382   case AArch64::ADDlowTLS:
1383     // Produce a plain ADD
1384     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1385         .add(MI.getOperand(0))
1386         .add(MI.getOperand(1))
1387         .add(MI.getOperand(2))
1388         .addImm(0);
1389     MI.eraseFromParent();
1390     return true;
1391 
1392   case AArch64::MOVbaseTLS: {
1393     Register DstReg = MI.getOperand(0).getReg();
1394     auto SysReg = AArch64SysReg::TPIDR_EL0;
1395     MachineFunction *MF = MBB.getParent();
1396     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1397       SysReg = AArch64SysReg::TPIDR_EL3;
1398     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1399       SysReg = AArch64SysReg::TPIDR_EL2;
1400     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1401       SysReg = AArch64SysReg::TPIDR_EL1;
1402     else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1403       SysReg = AArch64SysReg::TPIDRRO_EL0;
1404     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1405         .addImm(SysReg);
1406     MI.eraseFromParent();
1407     return true;
1408   }
1409 
1410   case AArch64::MOVi32imm:
1411     return expandMOVImm(MBB, MBBI, 32);
1412   case AArch64::MOVi64imm:
1413     return expandMOVImm(MBB, MBBI, 64);
1414   case AArch64::RET_ReallyLR: {
1415     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1416     // function and missing live-ins. We are fine in practice because callee
1417     // saved register handling ensures the register value is restored before
1418     // RET, but we need the undef flag here to appease the MachineVerifier
1419     // liveness checks.
1420     MachineInstrBuilder MIB =
1421         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1422           .addReg(AArch64::LR, RegState::Undef);
1423     transferImpOps(MI, MIB, MIB);
1424     MI.eraseFromParent();
1425     return true;
1426   }
1427   case AArch64::CMP_SWAP_8:
1428     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1429                           AArch64::SUBSWrx,
1430                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1431                           AArch64::WZR, NextMBBI);
1432   case AArch64::CMP_SWAP_16:
1433     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1434                           AArch64::SUBSWrx,
1435                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1436                           AArch64::WZR, NextMBBI);
1437   case AArch64::CMP_SWAP_32:
1438     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1439                           AArch64::SUBSWrs,
1440                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1441                           AArch64::WZR, NextMBBI);
1442   case AArch64::CMP_SWAP_64:
1443     return expandCMP_SWAP(MBB, MBBI,
1444                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1445                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1446                           AArch64::XZR, NextMBBI);
1447   case AArch64::CMP_SWAP_128:
1448   case AArch64::CMP_SWAP_128_RELEASE:
1449   case AArch64::CMP_SWAP_128_ACQUIRE:
1450   case AArch64::CMP_SWAP_128_MONOTONIC:
1451     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1452 
1453   case AArch64::AESMCrrTied:
1454   case AArch64::AESIMCrrTied: {
1455     MachineInstrBuilder MIB =
1456     BuildMI(MBB, MBBI, MI.getDebugLoc(),
1457             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1458                                                       AArch64::AESIMCrr))
1459       .add(MI.getOperand(0))
1460       .add(MI.getOperand(1));
1461     transferImpOps(MI, MIB, MIB);
1462     MI.eraseFromParent();
1463     return true;
1464    }
1465    case AArch64::IRGstack: {
1466      MachineFunction &MF = *MBB.getParent();
1467      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1468      const AArch64FrameLowering *TFI =
1469          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1470 
1471      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1472      // almost always point to SP-after-prologue; if not, emit a longer
1473      // instruction sequence.
1474      int BaseOffset = -AFI->getTaggedBasePointerOffset();
1475      Register FrameReg;
1476      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1477          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1478          /*PreferFP=*/false,
1479          /*ForSimm=*/true);
1480      Register SrcReg = FrameReg;
1481      if (FrameRegOffset) {
1482        // Use output register as temporary.
1483        SrcReg = MI.getOperand(0).getReg();
1484        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1485                        FrameRegOffset, TII);
1486      }
1487      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1488          .add(MI.getOperand(0))
1489          .addUse(SrcReg)
1490          .add(MI.getOperand(2));
1491      MI.eraseFromParent();
1492      return true;
1493    }
1494    case AArch64::TAGPstack: {
1495      int64_t Offset = MI.getOperand(2).getImm();
1496      BuildMI(MBB, MBBI, MI.getDebugLoc(),
1497              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1498          .add(MI.getOperand(0))
1499          .add(MI.getOperand(1))
1500          .addImm(std::abs(Offset))
1501          .add(MI.getOperand(4));
1502      MI.eraseFromParent();
1503      return true;
1504    }
1505    case AArch64::STGloop_wback:
1506    case AArch64::STZGloop_wback:
1507      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1508    case AArch64::STGloop:
1509    case AArch64::STZGloop:
1510      report_fatal_error(
1511          "Non-writeback variants of STGloop / STZGloop should not "
1512          "survive past PrologEpilogInserter.");
1513    case AArch64::STR_ZZZZXI:
1514      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1515    case AArch64::STR_ZZZXI:
1516      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1517    case AArch64::STR_ZZXI:
1518      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1519    case AArch64::STR_PPXI:
1520      return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
1521    case AArch64::LDR_ZZZZXI:
1522      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1523    case AArch64::LDR_ZZZXI:
1524      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1525    case AArch64::LDR_ZZXI:
1526      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1527    case AArch64::LDR_PPXI:
1528      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
1529    case AArch64::BLR_RVMARKER:
1530      return expandCALL_RVMARKER(MBB, MBBI);
1531    case AArch64::BLR_BTI:
1532      return expandCALL_BTI(MBB, MBBI);
1533    case AArch64::StoreSwiftAsyncContext:
1534      return expandStoreSwiftAsyncContext(MBB, MBBI);
1535    case AArch64::RestoreZAPseudo: {
1536      auto *NewMBB = expandRestoreZA(MBB, MBBI);
1537      if (NewMBB != &MBB)
1538        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1539      return true;
1540    }
1541    case AArch64::MSRpstatePseudo: {
1542      auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1543      if (NewMBB != &MBB)
1544        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1545      return true;
1546    }
1547    case AArch64::COALESCER_BARRIER_FPR16:
1548    case AArch64::COALESCER_BARRIER_FPR32:
1549    case AArch64::COALESCER_BARRIER_FPR64:
1550    case AArch64::COALESCER_BARRIER_FPR128:
1551      MI.eraseFromParent();
1552      return true;
1553    case AArch64::LD1B_2Z_IMM_PSEUDO:
1554      return expandMultiVecPseudo(
1555          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1556          AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1557    case AArch64::LD1H_2Z_IMM_PSEUDO:
1558      return expandMultiVecPseudo(
1559          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1560          AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1561    case AArch64::LD1W_2Z_IMM_PSEUDO:
1562      return expandMultiVecPseudo(
1563          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1564          AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1565    case AArch64::LD1D_2Z_IMM_PSEUDO:
1566      return expandMultiVecPseudo(
1567          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1568          AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1569    case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1570      return expandMultiVecPseudo(
1571          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1572          AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1573    case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1574      return expandMultiVecPseudo(
1575          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1576          AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1577    case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1578      return expandMultiVecPseudo(
1579          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1580          AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1581    case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1582      return expandMultiVecPseudo(
1583          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1584          AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1585    case AArch64::LD1B_2Z_PSEUDO:
1586      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1587                                  AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1588                                  AArch64::LD1B_2Z_STRIDED);
1589    case AArch64::LD1H_2Z_PSEUDO:
1590      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1591                                  AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1592                                  AArch64::LD1H_2Z_STRIDED);
1593    case AArch64::LD1W_2Z_PSEUDO:
1594      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1595                                  AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1596                                  AArch64::LD1W_2Z_STRIDED);
1597    case AArch64::LD1D_2Z_PSEUDO:
1598      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1599                                  AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1600                                  AArch64::LD1D_2Z_STRIDED);
1601    case AArch64::LDNT1B_2Z_PSEUDO:
1602      return expandMultiVecPseudo(
1603          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1604          AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1605    case AArch64::LDNT1H_2Z_PSEUDO:
1606      return expandMultiVecPseudo(
1607          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1608          AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1609    case AArch64::LDNT1W_2Z_PSEUDO:
1610      return expandMultiVecPseudo(
1611          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1612          AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1613    case AArch64::LDNT1D_2Z_PSEUDO:
1614      return expandMultiVecPseudo(
1615          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1616          AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1617    case AArch64::LD1B_4Z_IMM_PSEUDO:
1618      return expandMultiVecPseudo(
1619          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1620          AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1621    case AArch64::LD1H_4Z_IMM_PSEUDO:
1622      return expandMultiVecPseudo(
1623          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1624          AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1625    case AArch64::LD1W_4Z_IMM_PSEUDO:
1626      return expandMultiVecPseudo(
1627          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1628          AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1629    case AArch64::LD1D_4Z_IMM_PSEUDO:
1630      return expandMultiVecPseudo(
1631          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1632          AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1633    case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1634      return expandMultiVecPseudo(
1635          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1636          AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1637    case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1638      return expandMultiVecPseudo(
1639          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1640          AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1641    case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1642      return expandMultiVecPseudo(
1643          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1644          AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1645    case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1646      return expandMultiVecPseudo(
1647          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1648          AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1649    case AArch64::LD1B_4Z_PSEUDO:
1650      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1651                                  AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1652                                  AArch64::LD1B_4Z_STRIDED);
1653    case AArch64::LD1H_4Z_PSEUDO:
1654      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1655                                  AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1656                                  AArch64::LD1H_4Z_STRIDED);
1657    case AArch64::LD1W_4Z_PSEUDO:
1658      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1659                                  AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1660                                  AArch64::LD1W_4Z_STRIDED);
1661    case AArch64::LD1D_4Z_PSEUDO:
1662      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1663                                  AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1664                                  AArch64::LD1D_4Z_STRIDED);
1665    case AArch64::LDNT1B_4Z_PSEUDO:
1666      return expandMultiVecPseudo(
1667          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1668          AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1669    case AArch64::LDNT1H_4Z_PSEUDO:
1670      return expandMultiVecPseudo(
1671          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1672          AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1673    case AArch64::LDNT1W_4Z_PSEUDO:
1674      return expandMultiVecPseudo(
1675          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1676          AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1677    case AArch64::LDNT1D_4Z_PSEUDO:
1678      return expandMultiVecPseudo(
1679          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1680          AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1681   }
1682   return false;
1683 }
1684 
1685 /// Iterate over the instructions in basic block MBB and expand any
1686 /// pseudo instructions.  Return true if anything was modified.
1687 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1688   bool Modified = false;
1689 
1690   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1691   while (MBBI != E) {
1692     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1693     Modified |= expandMI(MBB, MBBI, NMBBI);
1694     MBBI = NMBBI;
1695   }
1696 
1697   return Modified;
1698 }
1699 
1700 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1701   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1702 
1703   bool Modified = false;
1704   for (auto &MBB : MF)
1705     Modified |= expandMBB(MBB);
1706   return Modified;
1707 }
1708 
1709 /// Returns an instance of the pseudo instruction expansion pass.
1710 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1711   return new AArch64ExpandPseudo();
1712 }
1713