xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (revision a03411e84728e9b267056fd31c7d1d9d1dc1b01e)
1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/CodeGen/LivePhysRegs.h"
24 #include "llvm/CodeGen/MachineBasicBlock.h"
25 #include "llvm/CodeGen/MachineConstantPool.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineOperand.h"
31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/CodeGen.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include "llvm/TargetParser/Triple.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <limits>
43 #include <utility>
44 
45 using namespace llvm;
46 
47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
48 
49 namespace {
50 
51 class AArch64ExpandPseudo : public MachineFunctionPass {
52 public:
53   const AArch64InstrInfo *TII;
54 
55   static char ID;
56 
57   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
58     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
59   }
60 
61   bool runOnMachineFunction(MachineFunction &Fn) override;
62 
63   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
64 
65 private:
66   bool expandMBB(MachineBasicBlock &MBB);
67   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
68                 MachineBasicBlock::iterator &NextMBBI);
69   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
70                     unsigned BitSize);
71 
72   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
73                             MachineBasicBlock::iterator MBBI);
74   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
75                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
76                       unsigned ExtendImm, unsigned ZeroReg,
77                       MachineBasicBlock::iterator &NextMBBI);
78   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
79                           MachineBasicBlock::iterator MBBI,
80                           MachineBasicBlock::iterator &NextMBBI);
81   bool expandSetTagLoop(MachineBasicBlock &MBB,
82                         MachineBasicBlock::iterator MBBI,
83                         MachineBasicBlock::iterator &NextMBBI);
84   bool expandSVESpillFill(MachineBasicBlock &MBB,
85                           MachineBasicBlock::iterator MBBI, unsigned Opc,
86                           unsigned N);
87   bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
88                            MachineBasicBlock::iterator MBBI);
89   bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
90   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
91                                     MachineBasicBlock::iterator MBBI);
92   MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
93                                      MachineBasicBlock::iterator MBBI);
94   MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
95                                         MachineBasicBlock::iterator MBBI);
96 };
97 
98 } // end anonymous namespace
99 
100 char AArch64ExpandPseudo::ID = 0;
101 
102 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
103                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
104 
105 /// Transfer implicit operands on the pseudo instruction to the
106 /// instructions created from the expansion.
107 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
108                            MachineInstrBuilder &DefMI) {
109   const MCInstrDesc &Desc = OldMI.getDesc();
110   for (const MachineOperand &MO :
111        llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
112     assert(MO.isReg() && MO.getReg());
113     if (MO.isUse())
114       UseMI.add(MO);
115     else
116       DefMI.add(MO);
117   }
118 }
119 
120 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
121 /// real move-immediate instructions to synthesize the immediate.
122 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
123                                        MachineBasicBlock::iterator MBBI,
124                                        unsigned BitSize) {
125   MachineInstr &MI = *MBBI;
126   Register DstReg = MI.getOperand(0).getReg();
127   uint64_t RenamableState =
128       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
129   uint64_t Imm = MI.getOperand(1).getImm();
130 
131   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
132     // Useless def, and we don't want to risk creating an invalid ORR (which
133     // would really write to sp).
134     MI.eraseFromParent();
135     return true;
136   }
137 
138   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
139   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
140   assert(Insn.size() != 0);
141 
142   SmallVector<MachineInstrBuilder, 4> MIBS;
143   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
144     bool LastItem = std::next(I) == E;
145     switch (I->Opcode)
146     {
147     default: llvm_unreachable("unhandled!"); break;
148 
149     case AArch64::ORRWri:
150     case AArch64::ORRXri:
151       if (I->Op1 == 0) {
152         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
153                            .add(MI.getOperand(0))
154                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
155                            .addImm(I->Op2));
156       } else {
157         Register DstReg = MI.getOperand(0).getReg();
158         bool DstIsDead = MI.getOperand(0).isDead();
159         MIBS.push_back(
160             BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
161                 .addReg(DstReg, RegState::Define |
162                                     getDeadRegState(DstIsDead && LastItem) |
163                                     RenamableState)
164                 .addReg(DstReg)
165                 .addImm(I->Op2));
166       }
167       break;
168     case AArch64::ANDXri:
169       if (I->Op1 == 0) {
170         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
171                            .add(MI.getOperand(0))
172                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
173                            .addImm(I->Op2));
174       } else {
175         Register DstReg = MI.getOperand(0).getReg();
176         bool DstIsDead = MI.getOperand(0).isDead();
177         MIBS.push_back(
178             BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
179                 .addReg(DstReg, RegState::Define |
180                                     getDeadRegState(DstIsDead && LastItem) |
181                                     RenamableState)
182                 .addReg(DstReg)
183                 .addImm(I->Op2));
184       }
185       break;
186     case AArch64::MOVNWi:
187     case AArch64::MOVNXi:
188     case AArch64::MOVZWi:
189     case AArch64::MOVZXi: {
190       bool DstIsDead = MI.getOperand(0).isDead();
191       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
192         .addReg(DstReg, RegState::Define |
193                 getDeadRegState(DstIsDead && LastItem) |
194                 RenamableState)
195         .addImm(I->Op1)
196         .addImm(I->Op2));
197       } break;
198     case AArch64::MOVKWi:
199     case AArch64::MOVKXi: {
200       Register DstReg = MI.getOperand(0).getReg();
201       bool DstIsDead = MI.getOperand(0).isDead();
202       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
203         .addReg(DstReg,
204                 RegState::Define |
205                 getDeadRegState(DstIsDead && LastItem) |
206                 RenamableState)
207         .addReg(DstReg)
208         .addImm(I->Op1)
209         .addImm(I->Op2));
210       } break;
211     }
212   }
213   transferImpOps(MI, MIBS.front(), MIBS.back());
214   MI.eraseFromParent();
215   return true;
216 }
217 
218 bool AArch64ExpandPseudo::expandCMP_SWAP(
219     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
220     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
221     MachineBasicBlock::iterator &NextMBBI) {
222   MachineInstr &MI = *MBBI;
223   MIMetadata MIMD(MI);
224   const MachineOperand &Dest = MI.getOperand(0);
225   Register StatusReg = MI.getOperand(1).getReg();
226   bool StatusDead = MI.getOperand(1).isDead();
227   // Duplicating undef operands into 2 instructions does not guarantee the same
228   // value on both; However undef should be replaced by xzr anyway.
229   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
230   Register AddrReg = MI.getOperand(2).getReg();
231   Register DesiredReg = MI.getOperand(3).getReg();
232   Register NewReg = MI.getOperand(4).getReg();
233 
234   MachineFunction *MF = MBB.getParent();
235   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
236   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
237   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
238 
239   MF->insert(++MBB.getIterator(), LoadCmpBB);
240   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
241   MF->insert(++StoreBB->getIterator(), DoneBB);
242 
243   // .Lloadcmp:
244   //     mov wStatus, 0
245   //     ldaxr xDest, [xAddr]
246   //     cmp xDest, xDesired
247   //     b.ne .Ldone
248   if (!StatusDead)
249     BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
250       .addImm(0).addImm(0);
251   BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
252       .addReg(AddrReg);
253   BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
254       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
255       .addReg(DesiredReg)
256       .addImm(ExtendImm);
257   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
258       .addImm(AArch64CC::NE)
259       .addMBB(DoneBB)
260       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
261   LoadCmpBB->addSuccessor(DoneBB);
262   LoadCmpBB->addSuccessor(StoreBB);
263 
264   // .Lstore:
265   //     stlxr wStatus, xNew, [xAddr]
266   //     cbnz wStatus, .Lloadcmp
267   BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
268       .addReg(NewReg)
269       .addReg(AddrReg);
270   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
271       .addReg(StatusReg, getKillRegState(StatusDead))
272       .addMBB(LoadCmpBB);
273   StoreBB->addSuccessor(LoadCmpBB);
274   StoreBB->addSuccessor(DoneBB);
275 
276   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
277   DoneBB->transferSuccessors(&MBB);
278 
279   MBB.addSuccessor(LoadCmpBB);
280 
281   NextMBBI = MBB.end();
282   MI.eraseFromParent();
283 
284   // Recompute livein lists.
285   LivePhysRegs LiveRegs;
286   computeAndAddLiveIns(LiveRegs, *DoneBB);
287   computeAndAddLiveIns(LiveRegs, *StoreBB);
288   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
289   // Do an extra pass around the loop to get loop carried registers right.
290   StoreBB->clearLiveIns();
291   computeAndAddLiveIns(LiveRegs, *StoreBB);
292   LoadCmpBB->clearLiveIns();
293   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
294 
295   return true;
296 }
297 
298 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
299     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
300     MachineBasicBlock::iterator &NextMBBI) {
301   MachineInstr &MI = *MBBI;
302   MIMetadata MIMD(MI);
303   MachineOperand &DestLo = MI.getOperand(0);
304   MachineOperand &DestHi = MI.getOperand(1);
305   Register StatusReg = MI.getOperand(2).getReg();
306   bool StatusDead = MI.getOperand(2).isDead();
307   // Duplicating undef operands into 2 instructions does not guarantee the same
308   // value on both; However undef should be replaced by xzr anyway.
309   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
310   Register AddrReg = MI.getOperand(3).getReg();
311   Register DesiredLoReg = MI.getOperand(4).getReg();
312   Register DesiredHiReg = MI.getOperand(5).getReg();
313   Register NewLoReg = MI.getOperand(6).getReg();
314   Register NewHiReg = MI.getOperand(7).getReg();
315 
316   unsigned LdxpOp, StxpOp;
317 
318   switch (MI.getOpcode()) {
319   case AArch64::CMP_SWAP_128_MONOTONIC:
320     LdxpOp = AArch64::LDXPX;
321     StxpOp = AArch64::STXPX;
322     break;
323   case AArch64::CMP_SWAP_128_RELEASE:
324     LdxpOp = AArch64::LDXPX;
325     StxpOp = AArch64::STLXPX;
326     break;
327   case AArch64::CMP_SWAP_128_ACQUIRE:
328     LdxpOp = AArch64::LDAXPX;
329     StxpOp = AArch64::STXPX;
330     break;
331   case AArch64::CMP_SWAP_128:
332     LdxpOp = AArch64::LDAXPX;
333     StxpOp = AArch64::STLXPX;
334     break;
335   default:
336     llvm_unreachable("Unexpected opcode");
337   }
338 
339   MachineFunction *MF = MBB.getParent();
340   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
341   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
342   auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
343   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
344 
345   MF->insert(++MBB.getIterator(), LoadCmpBB);
346   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
347   MF->insert(++StoreBB->getIterator(), FailBB);
348   MF->insert(++FailBB->getIterator(), DoneBB);
349 
350   // .Lloadcmp:
351   //     ldaxp xDestLo, xDestHi, [xAddr]
352   //     cmp xDestLo, xDesiredLo
353   //     sbcs xDestHi, xDesiredHi
354   //     b.ne .Ldone
355   BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
356       .addReg(DestLo.getReg(), RegState::Define)
357       .addReg(DestHi.getReg(), RegState::Define)
358       .addReg(AddrReg);
359   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
360       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
361       .addReg(DesiredLoReg)
362       .addImm(0);
363   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
364     .addUse(AArch64::WZR)
365     .addUse(AArch64::WZR)
366     .addImm(AArch64CC::EQ);
367   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
368       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
369       .addReg(DesiredHiReg)
370       .addImm(0);
371   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
372       .addUse(StatusReg, RegState::Kill)
373       .addUse(StatusReg, RegState::Kill)
374       .addImm(AArch64CC::EQ);
375   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
376       .addUse(StatusReg, getKillRegState(StatusDead))
377       .addMBB(FailBB);
378   LoadCmpBB->addSuccessor(FailBB);
379   LoadCmpBB->addSuccessor(StoreBB);
380 
381   // .Lstore:
382   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
383   //     cbnz wStatus, .Lloadcmp
384   BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
385       .addReg(NewLoReg)
386       .addReg(NewHiReg)
387       .addReg(AddrReg);
388   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
389       .addReg(StatusReg, getKillRegState(StatusDead))
390       .addMBB(LoadCmpBB);
391   BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
392   StoreBB->addSuccessor(LoadCmpBB);
393   StoreBB->addSuccessor(DoneBB);
394 
395   // .Lfail:
396   //     stlxp wStatus, xDestLo, xDestHi, [xAddr]
397   //     cbnz wStatus, .Lloadcmp
398   BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
399       .addReg(DestLo.getReg())
400       .addReg(DestHi.getReg())
401       .addReg(AddrReg);
402   BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
403       .addReg(StatusReg, getKillRegState(StatusDead))
404       .addMBB(LoadCmpBB);
405   FailBB->addSuccessor(LoadCmpBB);
406   FailBB->addSuccessor(DoneBB);
407 
408   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
409   DoneBB->transferSuccessors(&MBB);
410 
411   MBB.addSuccessor(LoadCmpBB);
412 
413   NextMBBI = MBB.end();
414   MI.eraseFromParent();
415 
416   // Recompute liveness bottom up.
417   LivePhysRegs LiveRegs;
418   computeAndAddLiveIns(LiveRegs, *DoneBB);
419   computeAndAddLiveIns(LiveRegs, *FailBB);
420   computeAndAddLiveIns(LiveRegs, *StoreBB);
421   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
422 
423   // Do an extra pass in the loop to get the loop carried dependencies right.
424   FailBB->clearLiveIns();
425   computeAndAddLiveIns(LiveRegs, *FailBB);
426   StoreBB->clearLiveIns();
427   computeAndAddLiveIns(LiveRegs, *StoreBB);
428   LoadCmpBB->clearLiveIns();
429   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
430 
431   return true;
432 }
433 
434 /// \brief Expand Pseudos to Instructions with destructive operands.
435 ///
436 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
437 /// or for fixing relaxed register allocation conditions to comply with
438 /// the instructions register constraints. The latter case may be cheaper
439 /// than setting the register constraints in the register allocator,
440 /// since that will insert regular MOV instructions rather than MOVPRFX.
441 ///
442 /// Example (after register allocation):
443 ///
444 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
445 ///
446 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
447 /// * We cannot map directly to FSUB_ZPmZ_B because the register
448 ///   constraints of the instruction are not met.
449 /// * Also the _ZERO specifies the false lanes need to be zeroed.
450 ///
451 /// We first try to see if the destructive operand == result operand,
452 /// if not, we try to swap the operands, e.g.
453 ///
454 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
455 ///
456 /// But because FSUB_ZPmZ is not commutative, this is semantically
457 /// different, so we need a reverse instruction:
458 ///
459 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
460 ///
461 /// Then we implement the zeroing of the false lanes of Z0 by adding
462 /// a zeroing MOVPRFX instruction:
463 ///
464 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
465 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
466 ///
467 /// Note that this can only be done for _ZERO or _UNDEF variants where
468 /// we can guarantee the false lanes to be zeroed (by implementing this)
469 /// or that they are undef (don't care / not used), otherwise the
470 /// swapping of operands is illegal because the operation is not
471 /// (or cannot be emulated to be) fully commutative.
472 bool AArch64ExpandPseudo::expand_DestructiveOp(
473                             MachineInstr &MI,
474                             MachineBasicBlock &MBB,
475                             MachineBasicBlock::iterator MBBI) {
476   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
477   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
478   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
479   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
480   Register DstReg = MI.getOperand(0).getReg();
481   bool DstIsDead = MI.getOperand(0).isDead();
482   bool UseRev = false;
483   unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
484 
485   switch (DType) {
486   case AArch64::DestructiveBinaryComm:
487   case AArch64::DestructiveBinaryCommWithRev:
488     if (DstReg == MI.getOperand(3).getReg()) {
489       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
490       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
491       UseRev = true;
492       break;
493     }
494     [[fallthrough]];
495   case AArch64::DestructiveBinary:
496   case AArch64::DestructiveBinaryImm:
497     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
498     break;
499   case AArch64::DestructiveUnaryPassthru:
500     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
501     break;
502   case AArch64::DestructiveTernaryCommWithRev:
503     std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
504     if (DstReg == MI.getOperand(3).getReg()) {
505       // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
506       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
507       UseRev = true;
508     } else if (DstReg == MI.getOperand(4).getReg()) {
509       // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
510       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
511       UseRev = true;
512     }
513     break;
514   default:
515     llvm_unreachable("Unsupported Destructive Operand type");
516   }
517 
518   // MOVPRFX can only be used if the destination operand
519   // is the destructive operand, not as any other operand,
520   // so the Destructive Operand must be unique.
521   bool DOPRegIsUnique = false;
522   switch (DType) {
523   case AArch64::DestructiveBinary:
524     DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
525     break;
526   case AArch64::DestructiveBinaryComm:
527   case AArch64::DestructiveBinaryCommWithRev:
528     DOPRegIsUnique =
529       DstReg != MI.getOperand(DOPIdx).getReg() ||
530       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
531     break;
532   case AArch64::DestructiveUnaryPassthru:
533   case AArch64::DestructiveBinaryImm:
534     DOPRegIsUnique = true;
535     break;
536   case AArch64::DestructiveTernaryCommWithRev:
537     DOPRegIsUnique =
538         DstReg != MI.getOperand(DOPIdx).getReg() ||
539         (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
540          MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
541     break;
542   }
543 
544   // Resolve the reverse opcode
545   if (UseRev) {
546     int NewOpcode;
547     // e.g. DIV -> DIVR
548     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
549       Opcode = NewOpcode;
550     // e.g. DIVR -> DIV
551     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
552       Opcode = NewOpcode;
553   }
554 
555   // Get the right MOVPRFX
556   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
557   unsigned MovPrfx, LSLZero, MovPrfxZero;
558   switch (ElementSize) {
559   case AArch64::ElementSizeNone:
560   case AArch64::ElementSizeB:
561     MovPrfx = AArch64::MOVPRFX_ZZ;
562     LSLZero = AArch64::LSL_ZPmI_B;
563     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
564     break;
565   case AArch64::ElementSizeH:
566     MovPrfx = AArch64::MOVPRFX_ZZ;
567     LSLZero = AArch64::LSL_ZPmI_H;
568     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
569     break;
570   case AArch64::ElementSizeS:
571     MovPrfx = AArch64::MOVPRFX_ZZ;
572     LSLZero = AArch64::LSL_ZPmI_S;
573     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
574     break;
575   case AArch64::ElementSizeD:
576     MovPrfx = AArch64::MOVPRFX_ZZ;
577     LSLZero = AArch64::LSL_ZPmI_D;
578     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
579     break;
580   default:
581     llvm_unreachable("Unsupported ElementSize");
582   }
583 
584   //
585   // Create the destructive operation (if required)
586   //
587   MachineInstrBuilder PRFX, DOP;
588   if (FalseZero) {
589     // If we cannot prefix the requested instruction we'll instead emit a
590     // prefixed_zeroing_mov for DestructiveBinary.
591     assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
592             DType == AArch64::DestructiveBinaryComm ||
593             DType == AArch64::DestructiveBinaryCommWithRev) &&
594            "The destructive operand should be unique");
595     assert(ElementSize != AArch64::ElementSizeNone &&
596            "This instruction is unpredicated");
597 
598     // Merge source operand into destination register
599     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
600                .addReg(DstReg, RegState::Define)
601                .addReg(MI.getOperand(PredIdx).getReg())
602                .addReg(MI.getOperand(DOPIdx).getReg());
603 
604     // After the movprfx, the destructive operand is same as Dst
605     DOPIdx = 0;
606 
607     // Create the additional LSL to zero the lanes when the DstReg is not
608     // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
609     // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
610     if ((DType == AArch64::DestructiveBinary ||
611          DType == AArch64::DestructiveBinaryComm ||
612          DType == AArch64::DestructiveBinaryCommWithRev) &&
613         !DOPRegIsUnique) {
614       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
615           .addReg(DstReg, RegState::Define)
616           .add(MI.getOperand(PredIdx))
617           .addReg(DstReg)
618           .addImm(0);
619     }
620   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
621     assert(DOPRegIsUnique && "The destructive operand should be unique");
622     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
623                .addReg(DstReg, RegState::Define)
624                .addReg(MI.getOperand(DOPIdx).getReg());
625     DOPIdx = 0;
626   }
627 
628   //
629   // Create the destructive operation
630   //
631   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
632     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
633 
634   switch (DType) {
635   case AArch64::DestructiveUnaryPassthru:
636     DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
637         .add(MI.getOperand(PredIdx))
638         .add(MI.getOperand(SrcIdx));
639     break;
640   case AArch64::DestructiveBinary:
641   case AArch64::DestructiveBinaryImm:
642   case AArch64::DestructiveBinaryComm:
643   case AArch64::DestructiveBinaryCommWithRev:
644     DOP.add(MI.getOperand(PredIdx))
645        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
646        .add(MI.getOperand(SrcIdx));
647     break;
648   case AArch64::DestructiveTernaryCommWithRev:
649     DOP.add(MI.getOperand(PredIdx))
650         .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
651         .add(MI.getOperand(SrcIdx))
652         .add(MI.getOperand(Src2Idx));
653     break;
654   }
655 
656   if (PRFX) {
657     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
658     transferImpOps(MI, PRFX, DOP);
659   } else
660     transferImpOps(MI, DOP, DOP);
661 
662   MI.eraseFromParent();
663   return true;
664 }
665 
666 bool AArch64ExpandPseudo::expandSetTagLoop(
667     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
668     MachineBasicBlock::iterator &NextMBBI) {
669   MachineInstr &MI = *MBBI;
670   DebugLoc DL = MI.getDebugLoc();
671   Register SizeReg = MI.getOperand(0).getReg();
672   Register AddressReg = MI.getOperand(1).getReg();
673 
674   MachineFunction *MF = MBB.getParent();
675 
676   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
677   const unsigned OpCode1 =
678       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
679   const unsigned OpCode2 =
680       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
681 
682   unsigned Size = MI.getOperand(2).getImm();
683   assert(Size > 0 && Size % 16 == 0);
684   if (Size % (16 * 2) != 0) {
685     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
686         .addReg(AddressReg)
687         .addReg(AddressReg)
688         .addImm(1);
689     Size -= 16;
690   }
691   MachineBasicBlock::iterator I =
692       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
693           .addImm(Size);
694   expandMOVImm(MBB, I, 64);
695 
696   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
697   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
698 
699   MF->insert(++MBB.getIterator(), LoopBB);
700   MF->insert(++LoopBB->getIterator(), DoneBB);
701 
702   BuildMI(LoopBB, DL, TII->get(OpCode2))
703       .addDef(AddressReg)
704       .addReg(AddressReg)
705       .addReg(AddressReg)
706       .addImm(2)
707       .cloneMemRefs(MI)
708       .setMIFlags(MI.getFlags());
709   BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
710       .addDef(SizeReg)
711       .addReg(SizeReg)
712       .addImm(16 * 2)
713       .addImm(0);
714   BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
715       .addImm(AArch64CC::NE)
716       .addMBB(LoopBB)
717       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
718 
719   LoopBB->addSuccessor(LoopBB);
720   LoopBB->addSuccessor(DoneBB);
721 
722   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
723   DoneBB->transferSuccessors(&MBB);
724 
725   MBB.addSuccessor(LoopBB);
726 
727   NextMBBI = MBB.end();
728   MI.eraseFromParent();
729   // Recompute liveness bottom up.
730   LivePhysRegs LiveRegs;
731   computeAndAddLiveIns(LiveRegs, *DoneBB);
732   computeAndAddLiveIns(LiveRegs, *LoopBB);
733   // Do an extra pass in the loop to get the loop carried dependencies right.
734   // FIXME: is this necessary?
735   LoopBB->clearLiveIns();
736   computeAndAddLiveIns(LiveRegs, *LoopBB);
737   DoneBB->clearLiveIns();
738   computeAndAddLiveIns(LiveRegs, *DoneBB);
739 
740   return true;
741 }
742 
743 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
744                                              MachineBasicBlock::iterator MBBI,
745                                              unsigned Opc, unsigned N) {
746   const TargetRegisterInfo *TRI =
747       MBB.getParent()->getSubtarget().getRegisterInfo();
748   MachineInstr &MI = *MBBI;
749   for (unsigned Offset = 0; Offset < N; ++Offset) {
750     int ImmOffset = MI.getOperand(2).getImm() + Offset;
751     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
752     assert(ImmOffset >= -256 && ImmOffset < 256 &&
753            "Immediate spill offset out of range");
754     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
755         .addReg(
756             TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
757             Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
758         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
759         .addImm(ImmOffset);
760   }
761   MI.eraseFromParent();
762   return true;
763 }
764 
765 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
766     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
767   // Expand CALL_RVMARKER pseudo to:
768   // - a branch to the call target, followed by
769   // - the special `mov x29, x29` marker, and
770   // - another branch, to the runtime function
771   // Mark the sequence as bundle, to avoid passes moving other code in between.
772   MachineInstr &MI = *MBBI;
773 
774   MachineInstr *OriginalCall;
775   MachineOperand &RVTarget = MI.getOperand(0);
776   MachineOperand &CallTarget = MI.getOperand(1);
777   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
778          "invalid operand for regular call");
779   assert(RVTarget.isGlobal() && "invalid operand for attached call");
780   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
781   OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
782   OriginalCall->addOperand(CallTarget);
783 
784   unsigned RegMaskStartIdx = 2;
785   // Skip register arguments. Those are added during ISel, but are not
786   // needed for the concrete branch.
787   while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
788     auto MOP = MI.getOperand(RegMaskStartIdx);
789     assert(MOP.isReg() && "can only add register operands");
790     OriginalCall->addOperand(MachineOperand::CreateReg(
791         MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
792     RegMaskStartIdx++;
793   }
794   for (const MachineOperand &MO :
795        llvm::drop_begin(MI.operands(), RegMaskStartIdx))
796     OriginalCall->addOperand(MO);
797 
798   BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
799                      .addReg(AArch64::FP, RegState::Define)
800                      .addReg(AArch64::XZR)
801                      .addReg(AArch64::FP)
802                      .addImm(0);
803 
804   auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
805                      .add(RVTarget)
806                      .getInstr();
807 
808   if (MI.shouldUpdateCallSiteInfo())
809     MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
810 
811   MI.eraseFromParent();
812   finalizeBundle(MBB, OriginalCall->getIterator(),
813                  std::next(RVCall->getIterator()));
814   return true;
815 }
816 
817 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
818                                          MachineBasicBlock::iterator MBBI) {
819   // Expand CALL_BTI pseudo to:
820   // - a branch to the call target
821   // - a BTI instruction
822   // Mark the sequence as a bundle, to avoid passes moving other code in
823   // between.
824 
825   MachineInstr &MI = *MBBI;
826   MachineOperand &CallTarget = MI.getOperand(0);
827   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
828          "invalid operand for regular call");
829   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
830   MachineInstr *Call =
831       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
832   Call->addOperand(CallTarget);
833   Call->setCFIType(*MBB.getParent(), MI.getCFIType());
834 
835   MachineInstr *BTI =
836       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
837           // BTI J so that setjmp can to BR to this.
838           .addImm(36)
839           .getInstr();
840 
841   if (MI.shouldUpdateCallSiteInfo())
842     MBB.getParent()->moveCallSiteInfo(&MI, Call);
843 
844   MI.eraseFromParent();
845   finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
846   return true;
847 }
848 
849 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
850     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
851   Register CtxReg = MBBI->getOperand(0).getReg();
852   Register BaseReg = MBBI->getOperand(1).getReg();
853   int Offset = MBBI->getOperand(2).getImm();
854   DebugLoc DL(MBBI->getDebugLoc());
855   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
856 
857   if (STI.getTargetTriple().getArchName() != "arm64e") {
858     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
859         .addUse(CtxReg)
860         .addUse(BaseReg)
861         .addImm(Offset / 8)
862         .setMIFlag(MachineInstr::FrameSetup);
863     MBBI->eraseFromParent();
864     return true;
865   }
866 
867   // We need to sign the context in an address-discriminated way. 0xc31a is a
868   // fixed random value, chosen as part of the ABI.
869   //     add x16, xBase, #Offset
870   //     movk x16, #0xc31a, lsl #48
871   //     mov x17, x22/xzr
872   //     pacdb x17, x16
873   //     str x17, [xBase, #Offset]
874   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
875   BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
876       .addUse(BaseReg)
877       .addImm(abs(Offset))
878       .addImm(0)
879       .setMIFlag(MachineInstr::FrameSetup);
880   BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
881       .addUse(AArch64::X16)
882       .addImm(0xc31a)
883       .addImm(48)
884       .setMIFlag(MachineInstr::FrameSetup);
885   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
886   // move it somewhere before signing.
887   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
888       .addUse(AArch64::XZR)
889       .addUse(CtxReg)
890       .addImm(0)
891       .setMIFlag(MachineInstr::FrameSetup);
892   BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
893       .addUse(AArch64::X17)
894       .addUse(AArch64::X16)
895       .setMIFlag(MachineInstr::FrameSetup);
896   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
897       .addUse(AArch64::X17)
898       .addUse(BaseReg)
899       .addImm(Offset / 8)
900       .setMIFlag(MachineInstr::FrameSetup);
901 
902   MBBI->eraseFromParent();
903   return true;
904 }
905 
906 MachineBasicBlock *
907 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
908                                      MachineBasicBlock::iterator MBBI) {
909   MachineInstr &MI = *MBBI;
910   assert((std::next(MBBI) != MBB.end() ||
911           MI.getParent()->successors().begin() !=
912               MI.getParent()->successors().end()) &&
913          "Unexpected unreachable in block that restores ZA");
914 
915   // Compare TPIDR2_EL0 value against 0.
916   DebugLoc DL = MI.getDebugLoc();
917   MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
918                                 .add(MI.getOperand(0));
919 
920   // Split MBB and create two new blocks:
921   //  - MBB now contains all instructions before RestoreZAPseudo.
922   //  - SMBB contains the RestoreZAPseudo instruction only.
923   //  - EndBB contains all instructions after RestoreZAPseudo.
924   MachineInstr &PrevMI = *std::prev(MBBI);
925   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
926   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
927                                  ? *SMBB->successors().begin()
928                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
929 
930   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
931   Cbz.addMBB(SMBB);
932   BuildMI(&MBB, DL, TII->get(AArch64::B))
933       .addMBB(EndBB);
934   MBB.addSuccessor(EndBB);
935 
936   // Replace the pseudo with a call (BL).
937   MachineInstrBuilder MIB =
938       BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
939   MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
940   for (unsigned I = 2; I < MI.getNumOperands(); ++I)
941     MIB.add(MI.getOperand(I));
942   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
943 
944   MI.eraseFromParent();
945   return EndBB;
946 }
947 
948 MachineBasicBlock *
949 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
950                                         MachineBasicBlock::iterator MBBI) {
951   MachineInstr &MI = *MBBI;
952   // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
953   // Exception handling code generated by Clang may introduce unreachables and it
954   // seems unnecessary to restore pstate.sm when that happens. Note that it is
955   // not just an optimisation, the code below expects a successor instruction/block
956   // in order to split the block at MBBI.
957   if (std::next(MBBI) == MBB.end() &&
958       MI.getParent()->successors().begin() ==
959           MI.getParent()->successors().end()) {
960     MI.eraseFromParent();
961     return &MBB;
962   }
963 
964   // Expand the pseudo into smstart or smstop instruction. The pseudo has the
965   // following operands:
966   //
967   //   MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask>
968   //
969   // The pseudo is expanded into a conditional smstart/smstop, with a
970   // check if pstate.sm (register) equals the expected value, and if not,
971   // invokes the smstart/smstop.
972   //
973   // As an example, the following block contains a normal call from a
974   // streaming-compatible function:
975   //
976   // OrigBB:
977   //   MSRpstatePseudo 3, 0, %0, 0, <regmask>             <- Conditional SMSTOP
978   //   bl @normal_callee
979   //   MSRpstatePseudo 3, 1, %0, 0, <regmask>             <- Conditional SMSTART
980   //
981   // ...which will be transformed into:
982   //
983   // OrigBB:
984   //   TBNZx %0:gpr64, 0, SMBB
985   //   b EndBB
986   //
987   // SMBB:
988   //   MSRpstatesvcrImm1 3, 0, <regmask>                  <- SMSTOP
989   //
990   // EndBB:
991   //   bl @normal_callee
992   //   MSRcond_pstatesvcrImm1 3, 1, <regmask>             <- SMSTART
993   //
994   DebugLoc DL = MI.getDebugLoc();
995 
996   // Create the conditional branch based on the third operand of the
997   // instruction, which tells us if we are wrapping a normal or streaming
998   // function.
999   // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1000   // expected value for the callee (0 for a normal callee and 1 for a streaming
1001   // callee).
1002   auto PStateSM = MI.getOperand(2).getReg();
1003   bool IsStreamingCallee = MI.getOperand(3).getImm();
1004   unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX;
1005   MachineInstrBuilder Tbx =
1006       BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0);
1007 
1008   // Split MBB and create two new blocks:
1009   //  - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1010   //  - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1011   //  - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1012   MachineInstr &PrevMI = *std::prev(MBBI);
1013   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
1014   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
1015                                  ? *SMBB->successors().begin()
1016                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
1017 
1018   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1019   Tbx.addMBB(SMBB);
1020   BuildMI(&MBB, DL, TII->get(AArch64::B))
1021       .addMBB(EndBB);
1022   MBB.addSuccessor(EndBB);
1023 
1024   // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1025   MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
1026                                     TII->get(AArch64::MSRpstatesvcrImm1));
1027   // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1028   // these contain the CopyFromReg for the first argument and the flag to
1029   // indicate whether the callee is streaming or normal).
1030   MIB.add(MI.getOperand(0));
1031   MIB.add(MI.getOperand(1));
1032   for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1033     MIB.add(MI.getOperand(i));
1034 
1035   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1036 
1037   MI.eraseFromParent();
1038   return EndBB;
1039 }
1040 
1041 /// If MBBI references a pseudo instruction that should be expanded here,
1042 /// do the expansion and return true.  Otherwise return false.
1043 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1044                                    MachineBasicBlock::iterator MBBI,
1045                                    MachineBasicBlock::iterator &NextMBBI) {
1046   MachineInstr &MI = *MBBI;
1047   unsigned Opcode = MI.getOpcode();
1048 
1049   // Check if we can expand the destructive op
1050   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1051   if (OrigInstr != -1) {
1052     auto &Orig = TII->get(OrigInstr);
1053     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1054         AArch64::NotDestructive) {
1055       return expand_DestructiveOp(MI, MBB, MBBI);
1056     }
1057   }
1058 
1059   switch (Opcode) {
1060   default:
1061     break;
1062 
1063   case AArch64::BSPv8i8:
1064   case AArch64::BSPv16i8: {
1065     Register DstReg = MI.getOperand(0).getReg();
1066     if (DstReg == MI.getOperand(3).getReg()) {
1067       // Expand to BIT
1068       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1069               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1070                                                   : AArch64::BITv16i8))
1071           .add(MI.getOperand(0))
1072           .add(MI.getOperand(3))
1073           .add(MI.getOperand(2))
1074           .add(MI.getOperand(1));
1075     } else if (DstReg == MI.getOperand(2).getReg()) {
1076       // Expand to BIF
1077       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1078               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1079                                                   : AArch64::BIFv16i8))
1080           .add(MI.getOperand(0))
1081           .add(MI.getOperand(2))
1082           .add(MI.getOperand(3))
1083           .add(MI.getOperand(1));
1084     } else {
1085       // Expand to BSL, use additional move if required
1086       if (DstReg == MI.getOperand(1).getReg()) {
1087         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1088                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1089                                                     : AArch64::BSLv16i8))
1090             .add(MI.getOperand(0))
1091             .add(MI.getOperand(1))
1092             .add(MI.getOperand(2))
1093             .add(MI.getOperand(3));
1094       } else {
1095         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1096                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1097                                                     : AArch64::ORRv16i8))
1098             .addReg(DstReg,
1099                     RegState::Define |
1100                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1101             .add(MI.getOperand(1))
1102             .add(MI.getOperand(1));
1103         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1104                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1105                                                     : AArch64::BSLv16i8))
1106             .add(MI.getOperand(0))
1107             .addReg(DstReg,
1108                     RegState::Kill |
1109                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1110             .add(MI.getOperand(2))
1111             .add(MI.getOperand(3));
1112       }
1113     }
1114     MI.eraseFromParent();
1115     return true;
1116   }
1117 
1118   case AArch64::ADDWrr:
1119   case AArch64::SUBWrr:
1120   case AArch64::ADDXrr:
1121   case AArch64::SUBXrr:
1122   case AArch64::ADDSWrr:
1123   case AArch64::SUBSWrr:
1124   case AArch64::ADDSXrr:
1125   case AArch64::SUBSXrr:
1126   case AArch64::ANDWrr:
1127   case AArch64::ANDXrr:
1128   case AArch64::BICWrr:
1129   case AArch64::BICXrr:
1130   case AArch64::ANDSWrr:
1131   case AArch64::ANDSXrr:
1132   case AArch64::BICSWrr:
1133   case AArch64::BICSXrr:
1134   case AArch64::EONWrr:
1135   case AArch64::EONXrr:
1136   case AArch64::EORWrr:
1137   case AArch64::EORXrr:
1138   case AArch64::ORNWrr:
1139   case AArch64::ORNXrr:
1140   case AArch64::ORRWrr:
1141   case AArch64::ORRXrr: {
1142     unsigned Opcode;
1143     switch (MI.getOpcode()) {
1144     default:
1145       return false;
1146     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
1147     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
1148     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
1149     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
1150     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
1151     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
1152     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
1153     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
1154     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
1155     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
1156     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
1157     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
1158     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
1159     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
1160     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
1161     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
1162     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
1163     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
1164     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
1165     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
1166     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
1167     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
1168     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
1169     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
1170     }
1171     MachineFunction &MF = *MBB.getParent();
1172     // Try to create new inst without implicit operands added.
1173     MachineInstr *NewMI = MF.CreateMachineInstr(
1174         TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1175     MBB.insert(MBBI, NewMI);
1176     MachineInstrBuilder MIB1(MF, NewMI);
1177     MIB1->setPCSections(MF, MI.getPCSections());
1178     MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1179         .add(MI.getOperand(1))
1180         .add(MI.getOperand(2))
1181         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1182     transferImpOps(MI, MIB1, MIB1);
1183     if (auto DebugNumber = MI.peekDebugInstrNum())
1184       NewMI->setDebugInstrNum(DebugNumber);
1185     MI.eraseFromParent();
1186     return true;
1187   }
1188 
1189   case AArch64::LOADgot: {
1190     MachineFunction *MF = MBB.getParent();
1191     Register DstReg = MI.getOperand(0).getReg();
1192     const MachineOperand &MO1 = MI.getOperand(1);
1193     unsigned Flags = MO1.getTargetFlags();
1194 
1195     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1196       // Tiny codemodel expand to LDR
1197       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1198                                         TII->get(AArch64::LDRXl), DstReg);
1199 
1200       if (MO1.isGlobal()) {
1201         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1202       } else if (MO1.isSymbol()) {
1203         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1204       } else {
1205         assert(MO1.isCPI() &&
1206                "Only expect globals, externalsymbols, or constant pools");
1207         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1208       }
1209     } else {
1210       // Small codemodel expand into ADRP + LDR.
1211       MachineFunction &MF = *MI.getParent()->getParent();
1212       DebugLoc DL = MI.getDebugLoc();
1213       MachineInstrBuilder MIB1 =
1214           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1215 
1216       MachineInstrBuilder MIB2;
1217       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1218         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1219         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1220         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1221         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1222                    .addDef(Reg32)
1223                    .addReg(DstReg, RegState::Kill)
1224                    .addReg(DstReg, DstFlags | RegState::Implicit);
1225       } else {
1226         Register DstReg = MI.getOperand(0).getReg();
1227         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1228                    .add(MI.getOperand(0))
1229                    .addUse(DstReg, RegState::Kill);
1230       }
1231 
1232       if (MO1.isGlobal()) {
1233         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1234         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1235                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1236       } else if (MO1.isSymbol()) {
1237         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1238         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1239                                                         AArch64II::MO_PAGEOFF |
1240                                                         AArch64II::MO_NC);
1241       } else {
1242         assert(MO1.isCPI() &&
1243                "Only expect globals, externalsymbols, or constant pools");
1244         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1245                                   Flags | AArch64II::MO_PAGE);
1246         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1247                                   Flags | AArch64II::MO_PAGEOFF |
1248                                       AArch64II::MO_NC);
1249       }
1250 
1251       transferImpOps(MI, MIB1, MIB2);
1252     }
1253     MI.eraseFromParent();
1254     return true;
1255   }
1256   case AArch64::MOVaddrBA: {
1257     MachineFunction &MF = *MI.getParent()->getParent();
1258     if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1259       // blockaddress expressions have to come from a constant pool because the
1260       // largest addend (and hence offset within a function) allowed for ADRP is
1261       // only 8MB.
1262       const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1263       assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1264 
1265       MachineConstantPool *MCP = MF.getConstantPool();
1266       unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1267 
1268       Register DstReg = MI.getOperand(0).getReg();
1269       auto MIB1 =
1270           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1271               .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1272       auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1273                           TII->get(AArch64::LDRXui), DstReg)
1274                       .addUse(DstReg)
1275                       .addConstantPoolIndex(
1276                           CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1277       transferImpOps(MI, MIB1, MIB2);
1278       MI.eraseFromParent();
1279       return true;
1280     }
1281   }
1282     [[fallthrough]];
1283   case AArch64::MOVaddr:
1284   case AArch64::MOVaddrJT:
1285   case AArch64::MOVaddrCP:
1286   case AArch64::MOVaddrTLS:
1287   case AArch64::MOVaddrEXT: {
1288     // Expand into ADRP + ADD.
1289     Register DstReg = MI.getOperand(0).getReg();
1290     assert(DstReg != AArch64::XZR);
1291     MachineInstrBuilder MIB1 =
1292         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1293             .add(MI.getOperand(1));
1294 
1295     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1296       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1297       // We do so by creating a MOVK that sets bits 48-63 of the register to
1298       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1299       // the small code model so we can assume a binary size of <= 4GB, which
1300       // makes the untagged PC relative offset positive. The binary must also be
1301       // loaded into address range [0, 2^48). Both of these properties need to
1302       // be ensured at runtime when using tagged addresses.
1303       auto Tag = MI.getOperand(1);
1304       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1305       Tag.setOffset(0x100000000);
1306       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1307           .addReg(DstReg)
1308           .add(Tag)
1309           .addImm(48);
1310     }
1311 
1312     MachineInstrBuilder MIB2 =
1313         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1314             .add(MI.getOperand(0))
1315             .addReg(DstReg)
1316             .add(MI.getOperand(2))
1317             .addImm(0);
1318 
1319     transferImpOps(MI, MIB1, MIB2);
1320     MI.eraseFromParent();
1321     return true;
1322   }
1323   case AArch64::ADDlowTLS:
1324     // Produce a plain ADD
1325     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1326         .add(MI.getOperand(0))
1327         .add(MI.getOperand(1))
1328         .add(MI.getOperand(2))
1329         .addImm(0);
1330     MI.eraseFromParent();
1331     return true;
1332 
1333   case AArch64::MOVbaseTLS: {
1334     Register DstReg = MI.getOperand(0).getReg();
1335     auto SysReg = AArch64SysReg::TPIDR_EL0;
1336     MachineFunction *MF = MBB.getParent();
1337     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1338       SysReg = AArch64SysReg::TPIDR_EL3;
1339     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1340       SysReg = AArch64SysReg::TPIDR_EL2;
1341     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1342       SysReg = AArch64SysReg::TPIDR_EL1;
1343     else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1344       SysReg = AArch64SysReg::TPIDRRO_EL0;
1345     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1346         .addImm(SysReg);
1347     MI.eraseFromParent();
1348     return true;
1349   }
1350 
1351   case AArch64::MOVi32imm:
1352     return expandMOVImm(MBB, MBBI, 32);
1353   case AArch64::MOVi64imm:
1354     return expandMOVImm(MBB, MBBI, 64);
1355   case AArch64::RET_ReallyLR: {
1356     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1357     // function and missing live-ins. We are fine in practice because callee
1358     // saved register handling ensures the register value is restored before
1359     // RET, but we need the undef flag here to appease the MachineVerifier
1360     // liveness checks.
1361     MachineInstrBuilder MIB =
1362         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1363           .addReg(AArch64::LR, RegState::Undef);
1364     transferImpOps(MI, MIB, MIB);
1365     MI.eraseFromParent();
1366     return true;
1367   }
1368   case AArch64::CMP_SWAP_8:
1369     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1370                           AArch64::SUBSWrx,
1371                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1372                           AArch64::WZR, NextMBBI);
1373   case AArch64::CMP_SWAP_16:
1374     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1375                           AArch64::SUBSWrx,
1376                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1377                           AArch64::WZR, NextMBBI);
1378   case AArch64::CMP_SWAP_32:
1379     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1380                           AArch64::SUBSWrs,
1381                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1382                           AArch64::WZR, NextMBBI);
1383   case AArch64::CMP_SWAP_64:
1384     return expandCMP_SWAP(MBB, MBBI,
1385                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1386                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1387                           AArch64::XZR, NextMBBI);
1388   case AArch64::CMP_SWAP_128:
1389   case AArch64::CMP_SWAP_128_RELEASE:
1390   case AArch64::CMP_SWAP_128_ACQUIRE:
1391   case AArch64::CMP_SWAP_128_MONOTONIC:
1392     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1393 
1394   case AArch64::AESMCrrTied:
1395   case AArch64::AESIMCrrTied: {
1396     MachineInstrBuilder MIB =
1397     BuildMI(MBB, MBBI, MI.getDebugLoc(),
1398             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1399                                                       AArch64::AESIMCrr))
1400       .add(MI.getOperand(0))
1401       .add(MI.getOperand(1));
1402     transferImpOps(MI, MIB, MIB);
1403     MI.eraseFromParent();
1404     return true;
1405    }
1406    case AArch64::IRGstack: {
1407      MachineFunction &MF = *MBB.getParent();
1408      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1409      const AArch64FrameLowering *TFI =
1410          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1411 
1412      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1413      // almost always point to SP-after-prologue; if not, emit a longer
1414      // instruction sequence.
1415      int BaseOffset = -AFI->getTaggedBasePointerOffset();
1416      Register FrameReg;
1417      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1418          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1419          /*PreferFP=*/false,
1420          /*ForSimm=*/true);
1421      Register SrcReg = FrameReg;
1422      if (FrameRegOffset) {
1423        // Use output register as temporary.
1424        SrcReg = MI.getOperand(0).getReg();
1425        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1426                        FrameRegOffset, TII);
1427      }
1428      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1429          .add(MI.getOperand(0))
1430          .addUse(SrcReg)
1431          .add(MI.getOperand(2));
1432      MI.eraseFromParent();
1433      return true;
1434    }
1435    case AArch64::TAGPstack: {
1436      int64_t Offset = MI.getOperand(2).getImm();
1437      BuildMI(MBB, MBBI, MI.getDebugLoc(),
1438              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1439          .add(MI.getOperand(0))
1440          .add(MI.getOperand(1))
1441          .addImm(std::abs(Offset))
1442          .add(MI.getOperand(4));
1443      MI.eraseFromParent();
1444      return true;
1445    }
1446    case AArch64::STGloop_wback:
1447    case AArch64::STZGloop_wback:
1448      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1449    case AArch64::STGloop:
1450    case AArch64::STZGloop:
1451      report_fatal_error(
1452          "Non-writeback variants of STGloop / STZGloop should not "
1453          "survive past PrologEpilogInserter.");
1454    case AArch64::STR_ZZZZXI:
1455      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1456    case AArch64::STR_ZZZXI:
1457      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1458    case AArch64::STR_ZZXI:
1459      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1460    case AArch64::LDR_ZZZZXI:
1461      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1462    case AArch64::LDR_ZZZXI:
1463      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1464    case AArch64::LDR_ZZXI:
1465      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1466    case AArch64::BLR_RVMARKER:
1467      return expandCALL_RVMARKER(MBB, MBBI);
1468    case AArch64::BLR_BTI:
1469      return expandCALL_BTI(MBB, MBBI);
1470    case AArch64::StoreSwiftAsyncContext:
1471      return expandStoreSwiftAsyncContext(MBB, MBBI);
1472    case AArch64::RestoreZAPseudo: {
1473      auto *NewMBB = expandRestoreZA(MBB, MBBI);
1474      if (NewMBB != &MBB)
1475        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1476      return true;
1477    }
1478    case AArch64::MSRpstatePseudo: {
1479      auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1480      if (NewMBB != &MBB)
1481        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1482      return true;
1483    }
1484    case AArch64::OBSCURE_COPY: {
1485      if (MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) {
1486        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
1487            .add(MI.getOperand(0))
1488            .addReg(AArch64::XZR)
1489            .add(MI.getOperand(1))
1490            .addImm(0);
1491      }
1492      MI.eraseFromParent();
1493      return true;
1494    }
1495   }
1496   return false;
1497 }
1498 
1499 /// Iterate over the instructions in basic block MBB and expand any
1500 /// pseudo instructions.  Return true if anything was modified.
1501 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1502   bool Modified = false;
1503 
1504   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1505   while (MBBI != E) {
1506     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1507     Modified |= expandMI(MBB, MBBI, NMBBI);
1508     MBBI = NMBBI;
1509   }
1510 
1511   return Modified;
1512 }
1513 
1514 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1515   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1516 
1517   bool Modified = false;
1518   for (auto &MBB : MF)
1519     Modified |= expandMBB(MBB);
1520   return Modified;
1521 }
1522 
1523 /// Returns an instance of the pseudo instruction expansion pass.
1524 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1525   return new AArch64ExpandPseudo();
1526 }
1527