xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/CodeGen/LivePhysRegs.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineConstantPool.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/TargetSubtargetInfo.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/CodeGen.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/TargetParser/Triple.h"
38 #include <cassert>
39 #include <cstdint>
40 #include <iterator>
41 #include <utility>
42 
43 using namespace llvm;
44 
45 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
46 
47 namespace {
48 
49 class AArch64ExpandPseudo : public MachineFunctionPass {
50 public:
51   const AArch64InstrInfo *TII;
52 
53   static char ID;
54 
55   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
56     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
57   }
58 
59   bool runOnMachineFunction(MachineFunction &Fn) override;
60 
61   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
62 
63 private:
64   bool expandMBB(MachineBasicBlock &MBB);
65   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
66                 MachineBasicBlock::iterator &NextMBBI);
67   bool expandMultiVecPseudo(MachineBasicBlock &MBB,
68                             MachineBasicBlock::iterator MBBI,
69                             TargetRegisterClass ContiguousClass,
70                             TargetRegisterClass StridedClass,
71                             unsigned ContiguousOpc, unsigned StridedOpc);
72   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
73                     unsigned BitSize);
74 
75   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
76                             MachineBasicBlock::iterator MBBI);
77   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
78                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
79                       unsigned ExtendImm, unsigned ZeroReg,
80                       MachineBasicBlock::iterator &NextMBBI);
81   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
82                           MachineBasicBlock::iterator MBBI,
83                           MachineBasicBlock::iterator &NextMBBI);
84   bool expandSetTagLoop(MachineBasicBlock &MBB,
85                         MachineBasicBlock::iterator MBBI,
86                         MachineBasicBlock::iterator &NextMBBI);
87   bool expandSVESpillFill(MachineBasicBlock &MBB,
88                           MachineBasicBlock::iterator MBBI, unsigned Opc,
89                           unsigned N);
90   bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
91                            MachineBasicBlock::iterator MBBI);
92   bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
93   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
94                                     MachineBasicBlock::iterator MBBI);
95   MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
96                                      MachineBasicBlock::iterator MBBI);
97   MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
98                                         MachineBasicBlock::iterator MBBI);
99 };
100 
101 } // end anonymous namespace
102 
103 char AArch64ExpandPseudo::ID = 0;
104 
105 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
106                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
107 
108 /// Transfer implicit operands on the pseudo instruction to the
109 /// instructions created from the expansion.
110 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
111                            MachineInstrBuilder &DefMI) {
112   const MCInstrDesc &Desc = OldMI.getDesc();
113   for (const MachineOperand &MO :
114        llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
115     assert(MO.isReg() && MO.getReg());
116     if (MO.isUse())
117       UseMI.add(MO);
118     else
119       DefMI.add(MO);
120   }
121 }
122 
123 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
124 /// real move-immediate instructions to synthesize the immediate.
125 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
126                                        MachineBasicBlock::iterator MBBI,
127                                        unsigned BitSize) {
128   MachineInstr &MI = *MBBI;
129   Register DstReg = MI.getOperand(0).getReg();
130   uint64_t RenamableState =
131       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
132   uint64_t Imm = MI.getOperand(1).getImm();
133 
134   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
135     // Useless def, and we don't want to risk creating an invalid ORR (which
136     // would really write to sp).
137     MI.eraseFromParent();
138     return true;
139   }
140 
141   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
142   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
143   assert(Insn.size() != 0);
144 
145   SmallVector<MachineInstrBuilder, 4> MIBS;
146   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
147     bool LastItem = std::next(I) == E;
148     switch (I->Opcode)
149     {
150     default: llvm_unreachable("unhandled!"); break;
151 
152     case AArch64::ORRWri:
153     case AArch64::ORRXri:
154       if (I->Op1 == 0) {
155         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
156                            .add(MI.getOperand(0))
157                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
158                            .addImm(I->Op2));
159       } else {
160         Register DstReg = MI.getOperand(0).getReg();
161         bool DstIsDead = MI.getOperand(0).isDead();
162         MIBS.push_back(
163             BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
164                 .addReg(DstReg, RegState::Define |
165                                     getDeadRegState(DstIsDead && LastItem) |
166                                     RenamableState)
167                 .addReg(DstReg)
168                 .addImm(I->Op2));
169       }
170       break;
171     case AArch64::ANDXri:
172     case AArch64::EORXri:
173       if (I->Op1 == 0) {
174         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
175                            .add(MI.getOperand(0))
176                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
177                            .addImm(I->Op2));
178       } else {
179         Register DstReg = MI.getOperand(0).getReg();
180         bool DstIsDead = MI.getOperand(0).isDead();
181         MIBS.push_back(
182             BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
183                 .addReg(DstReg, RegState::Define |
184                                     getDeadRegState(DstIsDead && LastItem) |
185                                     RenamableState)
186                 .addReg(DstReg)
187                 .addImm(I->Op2));
188       }
189       break;
190     case AArch64::MOVNWi:
191     case AArch64::MOVNXi:
192     case AArch64::MOVZWi:
193     case AArch64::MOVZXi: {
194       bool DstIsDead = MI.getOperand(0).isDead();
195       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
196         .addReg(DstReg, RegState::Define |
197                 getDeadRegState(DstIsDead && LastItem) |
198                 RenamableState)
199         .addImm(I->Op1)
200         .addImm(I->Op2));
201       } break;
202     case AArch64::MOVKWi:
203     case AArch64::MOVKXi: {
204       Register DstReg = MI.getOperand(0).getReg();
205       bool DstIsDead = MI.getOperand(0).isDead();
206       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
207         .addReg(DstReg,
208                 RegState::Define |
209                 getDeadRegState(DstIsDead && LastItem) |
210                 RenamableState)
211         .addReg(DstReg)
212         .addImm(I->Op1)
213         .addImm(I->Op2));
214       } break;
215     }
216   }
217   transferImpOps(MI, MIBS.front(), MIBS.back());
218   MI.eraseFromParent();
219   return true;
220 }
221 
222 bool AArch64ExpandPseudo::expandCMP_SWAP(
223     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
224     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
225     MachineBasicBlock::iterator &NextMBBI) {
226   MachineInstr &MI = *MBBI;
227   MIMetadata MIMD(MI);
228   const MachineOperand &Dest = MI.getOperand(0);
229   Register StatusReg = MI.getOperand(1).getReg();
230   bool StatusDead = MI.getOperand(1).isDead();
231   // Duplicating undef operands into 2 instructions does not guarantee the same
232   // value on both; However undef should be replaced by xzr anyway.
233   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
234   Register AddrReg = MI.getOperand(2).getReg();
235   Register DesiredReg = MI.getOperand(3).getReg();
236   Register NewReg = MI.getOperand(4).getReg();
237 
238   MachineFunction *MF = MBB.getParent();
239   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
240   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
241   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
242 
243   MF->insert(++MBB.getIterator(), LoadCmpBB);
244   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
245   MF->insert(++StoreBB->getIterator(), DoneBB);
246 
247   // .Lloadcmp:
248   //     mov wStatus, 0
249   //     ldaxr xDest, [xAddr]
250   //     cmp xDest, xDesired
251   //     b.ne .Ldone
252   if (!StatusDead)
253     BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
254       .addImm(0).addImm(0);
255   BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
256       .addReg(AddrReg);
257   BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
258       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
259       .addReg(DesiredReg)
260       .addImm(ExtendImm);
261   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
262       .addImm(AArch64CC::NE)
263       .addMBB(DoneBB)
264       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
265   LoadCmpBB->addSuccessor(DoneBB);
266   LoadCmpBB->addSuccessor(StoreBB);
267 
268   // .Lstore:
269   //     stlxr wStatus, xNew, [xAddr]
270   //     cbnz wStatus, .Lloadcmp
271   BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
272       .addReg(NewReg)
273       .addReg(AddrReg);
274   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
275       .addReg(StatusReg, getKillRegState(StatusDead))
276       .addMBB(LoadCmpBB);
277   StoreBB->addSuccessor(LoadCmpBB);
278   StoreBB->addSuccessor(DoneBB);
279 
280   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
281   DoneBB->transferSuccessors(&MBB);
282 
283   MBB.addSuccessor(LoadCmpBB);
284 
285   NextMBBI = MBB.end();
286   MI.eraseFromParent();
287 
288   // Recompute livein lists.
289   LivePhysRegs LiveRegs;
290   computeAndAddLiveIns(LiveRegs, *DoneBB);
291   computeAndAddLiveIns(LiveRegs, *StoreBB);
292   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
293   // Do an extra pass around the loop to get loop carried registers right.
294   StoreBB->clearLiveIns();
295   computeAndAddLiveIns(LiveRegs, *StoreBB);
296   LoadCmpBB->clearLiveIns();
297   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
298 
299   return true;
300 }
301 
302 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
303     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
304     MachineBasicBlock::iterator &NextMBBI) {
305   MachineInstr &MI = *MBBI;
306   MIMetadata MIMD(MI);
307   MachineOperand &DestLo = MI.getOperand(0);
308   MachineOperand &DestHi = MI.getOperand(1);
309   Register StatusReg = MI.getOperand(2).getReg();
310   bool StatusDead = MI.getOperand(2).isDead();
311   // Duplicating undef operands into 2 instructions does not guarantee the same
312   // value on both; However undef should be replaced by xzr anyway.
313   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
314   Register AddrReg = MI.getOperand(3).getReg();
315   Register DesiredLoReg = MI.getOperand(4).getReg();
316   Register DesiredHiReg = MI.getOperand(5).getReg();
317   Register NewLoReg = MI.getOperand(6).getReg();
318   Register NewHiReg = MI.getOperand(7).getReg();
319 
320   unsigned LdxpOp, StxpOp;
321 
322   switch (MI.getOpcode()) {
323   case AArch64::CMP_SWAP_128_MONOTONIC:
324     LdxpOp = AArch64::LDXPX;
325     StxpOp = AArch64::STXPX;
326     break;
327   case AArch64::CMP_SWAP_128_RELEASE:
328     LdxpOp = AArch64::LDXPX;
329     StxpOp = AArch64::STLXPX;
330     break;
331   case AArch64::CMP_SWAP_128_ACQUIRE:
332     LdxpOp = AArch64::LDAXPX;
333     StxpOp = AArch64::STXPX;
334     break;
335   case AArch64::CMP_SWAP_128:
336     LdxpOp = AArch64::LDAXPX;
337     StxpOp = AArch64::STLXPX;
338     break;
339   default:
340     llvm_unreachable("Unexpected opcode");
341   }
342 
343   MachineFunction *MF = MBB.getParent();
344   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
345   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
346   auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
347   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
348 
349   MF->insert(++MBB.getIterator(), LoadCmpBB);
350   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
351   MF->insert(++StoreBB->getIterator(), FailBB);
352   MF->insert(++FailBB->getIterator(), DoneBB);
353 
354   // .Lloadcmp:
355   //     ldaxp xDestLo, xDestHi, [xAddr]
356   //     cmp xDestLo, xDesiredLo
357   //     sbcs xDestHi, xDesiredHi
358   //     b.ne .Ldone
359   BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
360       .addReg(DestLo.getReg(), RegState::Define)
361       .addReg(DestHi.getReg(), RegState::Define)
362       .addReg(AddrReg);
363   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
364       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
365       .addReg(DesiredLoReg)
366       .addImm(0);
367   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
368     .addUse(AArch64::WZR)
369     .addUse(AArch64::WZR)
370     .addImm(AArch64CC::EQ);
371   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
372       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
373       .addReg(DesiredHiReg)
374       .addImm(0);
375   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
376       .addUse(StatusReg, RegState::Kill)
377       .addUse(StatusReg, RegState::Kill)
378       .addImm(AArch64CC::EQ);
379   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
380       .addUse(StatusReg, getKillRegState(StatusDead))
381       .addMBB(FailBB);
382   LoadCmpBB->addSuccessor(FailBB);
383   LoadCmpBB->addSuccessor(StoreBB);
384 
385   // .Lstore:
386   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
387   //     cbnz wStatus, .Lloadcmp
388   BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
389       .addReg(NewLoReg)
390       .addReg(NewHiReg)
391       .addReg(AddrReg);
392   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
393       .addReg(StatusReg, getKillRegState(StatusDead))
394       .addMBB(LoadCmpBB);
395   BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
396   StoreBB->addSuccessor(LoadCmpBB);
397   StoreBB->addSuccessor(DoneBB);
398 
399   // .Lfail:
400   //     stlxp wStatus, xDestLo, xDestHi, [xAddr]
401   //     cbnz wStatus, .Lloadcmp
402   BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
403       .addReg(DestLo.getReg())
404       .addReg(DestHi.getReg())
405       .addReg(AddrReg);
406   BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
407       .addReg(StatusReg, getKillRegState(StatusDead))
408       .addMBB(LoadCmpBB);
409   FailBB->addSuccessor(LoadCmpBB);
410   FailBB->addSuccessor(DoneBB);
411 
412   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
413   DoneBB->transferSuccessors(&MBB);
414 
415   MBB.addSuccessor(LoadCmpBB);
416 
417   NextMBBI = MBB.end();
418   MI.eraseFromParent();
419 
420   // Recompute liveness bottom up.
421   LivePhysRegs LiveRegs;
422   computeAndAddLiveIns(LiveRegs, *DoneBB);
423   computeAndAddLiveIns(LiveRegs, *FailBB);
424   computeAndAddLiveIns(LiveRegs, *StoreBB);
425   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
426 
427   // Do an extra pass in the loop to get the loop carried dependencies right.
428   FailBB->clearLiveIns();
429   computeAndAddLiveIns(LiveRegs, *FailBB);
430   StoreBB->clearLiveIns();
431   computeAndAddLiveIns(LiveRegs, *StoreBB);
432   LoadCmpBB->clearLiveIns();
433   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
434 
435   return true;
436 }
437 
438 /// \brief Expand Pseudos to Instructions with destructive operands.
439 ///
440 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
441 /// or for fixing relaxed register allocation conditions to comply with
442 /// the instructions register constraints. The latter case may be cheaper
443 /// than setting the register constraints in the register allocator,
444 /// since that will insert regular MOV instructions rather than MOVPRFX.
445 ///
446 /// Example (after register allocation):
447 ///
448 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
449 ///
450 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
451 /// * We cannot map directly to FSUB_ZPmZ_B because the register
452 ///   constraints of the instruction are not met.
453 /// * Also the _ZERO specifies the false lanes need to be zeroed.
454 ///
455 /// We first try to see if the destructive operand == result operand,
456 /// if not, we try to swap the operands, e.g.
457 ///
458 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
459 ///
460 /// But because FSUB_ZPmZ is not commutative, this is semantically
461 /// different, so we need a reverse instruction:
462 ///
463 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
464 ///
465 /// Then we implement the zeroing of the false lanes of Z0 by adding
466 /// a zeroing MOVPRFX instruction:
467 ///
468 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
469 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
470 ///
471 /// Note that this can only be done for _ZERO or _UNDEF variants where
472 /// we can guarantee the false lanes to be zeroed (by implementing this)
473 /// or that they are undef (don't care / not used), otherwise the
474 /// swapping of operands is illegal because the operation is not
475 /// (or cannot be emulated to be) fully commutative.
476 bool AArch64ExpandPseudo::expand_DestructiveOp(
477                             MachineInstr &MI,
478                             MachineBasicBlock &MBB,
479                             MachineBasicBlock::iterator MBBI) {
480   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
481   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
482   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
483   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
484   Register DstReg = MI.getOperand(0).getReg();
485   bool DstIsDead = MI.getOperand(0).isDead();
486   bool UseRev = false;
487   unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
488 
489   switch (DType) {
490   case AArch64::DestructiveBinaryComm:
491   case AArch64::DestructiveBinaryCommWithRev:
492     if (DstReg == MI.getOperand(3).getReg()) {
493       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
494       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
495       UseRev = true;
496       break;
497     }
498     [[fallthrough]];
499   case AArch64::DestructiveBinary:
500   case AArch64::DestructiveBinaryImm:
501     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
502     break;
503   case AArch64::DestructiveUnaryPassthru:
504     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
505     break;
506   case AArch64::DestructiveTernaryCommWithRev:
507     std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
508     if (DstReg == MI.getOperand(3).getReg()) {
509       // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
510       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
511       UseRev = true;
512     } else if (DstReg == MI.getOperand(4).getReg()) {
513       // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
514       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
515       UseRev = true;
516     }
517     break;
518   default:
519     llvm_unreachable("Unsupported Destructive Operand type");
520   }
521 
522   // MOVPRFX can only be used if the destination operand
523   // is the destructive operand, not as any other operand,
524   // so the Destructive Operand must be unique.
525   bool DOPRegIsUnique = false;
526   switch (DType) {
527   case AArch64::DestructiveBinary:
528     DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
529     break;
530   case AArch64::DestructiveBinaryComm:
531   case AArch64::DestructiveBinaryCommWithRev:
532     DOPRegIsUnique =
533       DstReg != MI.getOperand(DOPIdx).getReg() ||
534       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
535     break;
536   case AArch64::DestructiveUnaryPassthru:
537   case AArch64::DestructiveBinaryImm:
538     DOPRegIsUnique = true;
539     break;
540   case AArch64::DestructiveTernaryCommWithRev:
541     DOPRegIsUnique =
542         DstReg != MI.getOperand(DOPIdx).getReg() ||
543         (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
544          MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
545     break;
546   }
547 
548   // Resolve the reverse opcode
549   if (UseRev) {
550     int NewOpcode;
551     // e.g. DIV -> DIVR
552     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
553       Opcode = NewOpcode;
554     // e.g. DIVR -> DIV
555     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
556       Opcode = NewOpcode;
557   }
558 
559   // Get the right MOVPRFX
560   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
561   unsigned MovPrfx, LSLZero, MovPrfxZero;
562   switch (ElementSize) {
563   case AArch64::ElementSizeNone:
564   case AArch64::ElementSizeB:
565     MovPrfx = AArch64::MOVPRFX_ZZ;
566     LSLZero = AArch64::LSL_ZPmI_B;
567     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
568     break;
569   case AArch64::ElementSizeH:
570     MovPrfx = AArch64::MOVPRFX_ZZ;
571     LSLZero = AArch64::LSL_ZPmI_H;
572     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
573     break;
574   case AArch64::ElementSizeS:
575     MovPrfx = AArch64::MOVPRFX_ZZ;
576     LSLZero = AArch64::LSL_ZPmI_S;
577     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
578     break;
579   case AArch64::ElementSizeD:
580     MovPrfx = AArch64::MOVPRFX_ZZ;
581     LSLZero = AArch64::LSL_ZPmI_D;
582     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
583     break;
584   default:
585     llvm_unreachable("Unsupported ElementSize");
586   }
587 
588   //
589   // Create the destructive operation (if required)
590   //
591   MachineInstrBuilder PRFX, DOP;
592   if (FalseZero) {
593     // If we cannot prefix the requested instruction we'll instead emit a
594     // prefixed_zeroing_mov for DestructiveBinary.
595     assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
596             DType == AArch64::DestructiveBinaryComm ||
597             DType == AArch64::DestructiveBinaryCommWithRev) &&
598            "The destructive operand should be unique");
599     assert(ElementSize != AArch64::ElementSizeNone &&
600            "This instruction is unpredicated");
601 
602     // Merge source operand into destination register
603     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
604                .addReg(DstReg, RegState::Define)
605                .addReg(MI.getOperand(PredIdx).getReg())
606                .addReg(MI.getOperand(DOPIdx).getReg());
607 
608     // After the movprfx, the destructive operand is same as Dst
609     DOPIdx = 0;
610 
611     // Create the additional LSL to zero the lanes when the DstReg is not
612     // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
613     // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
614     if ((DType == AArch64::DestructiveBinary ||
615          DType == AArch64::DestructiveBinaryComm ||
616          DType == AArch64::DestructiveBinaryCommWithRev) &&
617         !DOPRegIsUnique) {
618       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
619           .addReg(DstReg, RegState::Define)
620           .add(MI.getOperand(PredIdx))
621           .addReg(DstReg)
622           .addImm(0);
623     }
624   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
625     assert(DOPRegIsUnique && "The destructive operand should be unique");
626     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
627                .addReg(DstReg, RegState::Define)
628                .addReg(MI.getOperand(DOPIdx).getReg());
629     DOPIdx = 0;
630   }
631 
632   //
633   // Create the destructive operation
634   //
635   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
636     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
637 
638   switch (DType) {
639   case AArch64::DestructiveUnaryPassthru:
640     DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
641         .add(MI.getOperand(PredIdx))
642         .add(MI.getOperand(SrcIdx));
643     break;
644   case AArch64::DestructiveBinary:
645   case AArch64::DestructiveBinaryImm:
646   case AArch64::DestructiveBinaryComm:
647   case AArch64::DestructiveBinaryCommWithRev:
648     DOP.add(MI.getOperand(PredIdx))
649        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
650        .add(MI.getOperand(SrcIdx));
651     break;
652   case AArch64::DestructiveTernaryCommWithRev:
653     DOP.add(MI.getOperand(PredIdx))
654         .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
655         .add(MI.getOperand(SrcIdx))
656         .add(MI.getOperand(Src2Idx));
657     break;
658   }
659 
660   if (PRFX) {
661     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
662     transferImpOps(MI, PRFX, DOP);
663   } else
664     transferImpOps(MI, DOP, DOP);
665 
666   MI.eraseFromParent();
667   return true;
668 }
669 
670 bool AArch64ExpandPseudo::expandSetTagLoop(
671     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
672     MachineBasicBlock::iterator &NextMBBI) {
673   MachineInstr &MI = *MBBI;
674   DebugLoc DL = MI.getDebugLoc();
675   Register SizeReg = MI.getOperand(0).getReg();
676   Register AddressReg = MI.getOperand(1).getReg();
677 
678   MachineFunction *MF = MBB.getParent();
679 
680   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
681   const unsigned OpCode1 =
682       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
683   const unsigned OpCode2 =
684       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
685 
686   unsigned Size = MI.getOperand(2).getImm();
687   assert(Size > 0 && Size % 16 == 0);
688   if (Size % (16 * 2) != 0) {
689     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
690         .addReg(AddressReg)
691         .addReg(AddressReg)
692         .addImm(1);
693     Size -= 16;
694   }
695   MachineBasicBlock::iterator I =
696       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
697           .addImm(Size);
698   expandMOVImm(MBB, I, 64);
699 
700   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
701   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
702 
703   MF->insert(++MBB.getIterator(), LoopBB);
704   MF->insert(++LoopBB->getIterator(), DoneBB);
705 
706   BuildMI(LoopBB, DL, TII->get(OpCode2))
707       .addDef(AddressReg)
708       .addReg(AddressReg)
709       .addReg(AddressReg)
710       .addImm(2)
711       .cloneMemRefs(MI)
712       .setMIFlags(MI.getFlags());
713   BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
714       .addDef(SizeReg)
715       .addReg(SizeReg)
716       .addImm(16 * 2)
717       .addImm(0);
718   BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
719       .addImm(AArch64CC::NE)
720       .addMBB(LoopBB)
721       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
722 
723   LoopBB->addSuccessor(LoopBB);
724   LoopBB->addSuccessor(DoneBB);
725 
726   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
727   DoneBB->transferSuccessors(&MBB);
728 
729   MBB.addSuccessor(LoopBB);
730 
731   NextMBBI = MBB.end();
732   MI.eraseFromParent();
733   // Recompute liveness bottom up.
734   LivePhysRegs LiveRegs;
735   computeAndAddLiveIns(LiveRegs, *DoneBB);
736   computeAndAddLiveIns(LiveRegs, *LoopBB);
737   // Do an extra pass in the loop to get the loop carried dependencies right.
738   // FIXME: is this necessary?
739   LoopBB->clearLiveIns();
740   computeAndAddLiveIns(LiveRegs, *LoopBB);
741   DoneBB->clearLiveIns();
742   computeAndAddLiveIns(LiveRegs, *DoneBB);
743 
744   return true;
745 }
746 
747 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
748                                              MachineBasicBlock::iterator MBBI,
749                                              unsigned Opc, unsigned N) {
750   assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
751           Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
752          "Unexpected opcode");
753   unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
754                         ? RegState::Define
755                         : 0;
756   unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
757                       ? AArch64::zsub0
758                       : AArch64::psub0;
759   const TargetRegisterInfo *TRI =
760       MBB.getParent()->getSubtarget().getRegisterInfo();
761   MachineInstr &MI = *MBBI;
762   for (unsigned Offset = 0; Offset < N; ++Offset) {
763     int ImmOffset = MI.getOperand(2).getImm() + Offset;
764     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
765     assert(ImmOffset >= -256 && ImmOffset < 256 &&
766            "Immediate spill offset out of range");
767     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
768         .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),
769                 RState)
770         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
771         .addImm(ImmOffset);
772   }
773   MI.eraseFromParent();
774   return true;
775 }
776 
777 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
778     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
779   // Expand CALL_RVMARKER pseudo to:
780   // - a branch to the call target, followed by
781   // - the special `mov x29, x29` marker, and
782   // - another branch, to the runtime function
783   // Mark the sequence as bundle, to avoid passes moving other code in between.
784   MachineInstr &MI = *MBBI;
785 
786   MachineInstr *OriginalCall;
787   MachineOperand &RVTarget = MI.getOperand(0);
788   MachineOperand &CallTarget = MI.getOperand(1);
789   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
790          "invalid operand for regular call");
791   assert(RVTarget.isGlobal() && "invalid operand for attached call");
792   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
793   OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
794   OriginalCall->addOperand(CallTarget);
795 
796   unsigned RegMaskStartIdx = 2;
797   // Skip register arguments. Those are added during ISel, but are not
798   // needed for the concrete branch.
799   while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
800     auto MOP = MI.getOperand(RegMaskStartIdx);
801     assert(MOP.isReg() && "can only add register operands");
802     OriginalCall->addOperand(MachineOperand::CreateReg(
803         MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
804         /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
805     RegMaskStartIdx++;
806   }
807   for (const MachineOperand &MO :
808        llvm::drop_begin(MI.operands(), RegMaskStartIdx))
809     OriginalCall->addOperand(MO);
810 
811   BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
812                      .addReg(AArch64::FP, RegState::Define)
813                      .addReg(AArch64::XZR)
814                      .addReg(AArch64::FP)
815                      .addImm(0);
816 
817   auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
818                      .add(RVTarget)
819                      .getInstr();
820 
821   if (MI.shouldUpdateCallSiteInfo())
822     MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
823 
824   MI.eraseFromParent();
825   finalizeBundle(MBB, OriginalCall->getIterator(),
826                  std::next(RVCall->getIterator()));
827   return true;
828 }
829 
830 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
831                                          MachineBasicBlock::iterator MBBI) {
832   // Expand CALL_BTI pseudo to:
833   // - a branch to the call target
834   // - a BTI instruction
835   // Mark the sequence as a bundle, to avoid passes moving other code in
836   // between.
837 
838   MachineInstr &MI = *MBBI;
839   MachineOperand &CallTarget = MI.getOperand(0);
840   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
841          "invalid operand for regular call");
842   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
843   MachineInstr *Call =
844       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
845   Call->addOperand(CallTarget);
846   Call->setCFIType(*MBB.getParent(), MI.getCFIType());
847   Call->copyImplicitOps(*MBB.getParent(), MI);
848 
849   MachineInstr *BTI =
850       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
851           // BTI J so that setjmp can to BR to this.
852           .addImm(36)
853           .getInstr();
854 
855   if (MI.shouldUpdateCallSiteInfo())
856     MBB.getParent()->moveCallSiteInfo(&MI, Call);
857 
858   MI.eraseFromParent();
859   finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
860   return true;
861 }
862 
863 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
864     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
865   Register CtxReg = MBBI->getOperand(0).getReg();
866   Register BaseReg = MBBI->getOperand(1).getReg();
867   int Offset = MBBI->getOperand(2).getImm();
868   DebugLoc DL(MBBI->getDebugLoc());
869   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
870 
871   if (STI.getTargetTriple().getArchName() != "arm64e") {
872     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
873         .addUse(CtxReg)
874         .addUse(BaseReg)
875         .addImm(Offset / 8)
876         .setMIFlag(MachineInstr::FrameSetup);
877     MBBI->eraseFromParent();
878     return true;
879   }
880 
881   // We need to sign the context in an address-discriminated way. 0xc31a is a
882   // fixed random value, chosen as part of the ABI.
883   //     add x16, xBase, #Offset
884   //     movk x16, #0xc31a, lsl #48
885   //     mov x17, x22/xzr
886   //     pacdb x17, x16
887   //     str x17, [xBase, #Offset]
888   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
889   BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
890       .addUse(BaseReg)
891       .addImm(abs(Offset))
892       .addImm(0)
893       .setMIFlag(MachineInstr::FrameSetup);
894   BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
895       .addUse(AArch64::X16)
896       .addImm(0xc31a)
897       .addImm(48)
898       .setMIFlag(MachineInstr::FrameSetup);
899   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
900   // move it somewhere before signing.
901   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
902       .addUse(AArch64::XZR)
903       .addUse(CtxReg)
904       .addImm(0)
905       .setMIFlag(MachineInstr::FrameSetup);
906   BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
907       .addUse(AArch64::X17)
908       .addUse(AArch64::X16)
909       .setMIFlag(MachineInstr::FrameSetup);
910   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
911       .addUse(AArch64::X17)
912       .addUse(BaseReg)
913       .addImm(Offset / 8)
914       .setMIFlag(MachineInstr::FrameSetup);
915 
916   MBBI->eraseFromParent();
917   return true;
918 }
919 
920 MachineBasicBlock *
921 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
922                                      MachineBasicBlock::iterator MBBI) {
923   MachineInstr &MI = *MBBI;
924   assert((std::next(MBBI) != MBB.end() ||
925           MI.getParent()->successors().begin() !=
926               MI.getParent()->successors().end()) &&
927          "Unexpected unreachable in block that restores ZA");
928 
929   // Compare TPIDR2_EL0 value against 0.
930   DebugLoc DL = MI.getDebugLoc();
931   MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
932                                 .add(MI.getOperand(0));
933 
934   // Split MBB and create two new blocks:
935   //  - MBB now contains all instructions before RestoreZAPseudo.
936   //  - SMBB contains the RestoreZAPseudo instruction only.
937   //  - EndBB contains all instructions after RestoreZAPseudo.
938   MachineInstr &PrevMI = *std::prev(MBBI);
939   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
940   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
941                                  ? *SMBB->successors().begin()
942                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
943 
944   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
945   Cbz.addMBB(SMBB);
946   BuildMI(&MBB, DL, TII->get(AArch64::B))
947       .addMBB(EndBB);
948   MBB.addSuccessor(EndBB);
949 
950   // Replace the pseudo with a call (BL).
951   MachineInstrBuilder MIB =
952       BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
953   MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
954   for (unsigned I = 2; I < MI.getNumOperands(); ++I)
955     MIB.add(MI.getOperand(I));
956   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
957 
958   MI.eraseFromParent();
959   return EndBB;
960 }
961 
962 MachineBasicBlock *
963 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
964                                         MachineBasicBlock::iterator MBBI) {
965   MachineInstr &MI = *MBBI;
966   // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
967   // Exception handling code generated by Clang may introduce unreachables and it
968   // seems unnecessary to restore pstate.sm when that happens. Note that it is
969   // not just an optimisation, the code below expects a successor instruction/block
970   // in order to split the block at MBBI.
971   if (std::next(MBBI) == MBB.end() &&
972       MI.getParent()->successors().begin() ==
973           MI.getParent()->successors().end()) {
974     MI.eraseFromParent();
975     return &MBB;
976   }
977 
978   // Expand the pseudo into smstart or smstop instruction. The pseudo has the
979   // following operands:
980   //
981   //   MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask>
982   //
983   // The pseudo is expanded into a conditional smstart/smstop, with a
984   // check if pstate.sm (register) equals the expected value, and if not,
985   // invokes the smstart/smstop.
986   //
987   // As an example, the following block contains a normal call from a
988   // streaming-compatible function:
989   //
990   // OrigBB:
991   //   MSRpstatePseudo 3, 0, %0, 0, <regmask>             <- Conditional SMSTOP
992   //   bl @normal_callee
993   //   MSRpstatePseudo 3, 1, %0, 0, <regmask>             <- Conditional SMSTART
994   //
995   // ...which will be transformed into:
996   //
997   // OrigBB:
998   //   TBNZx %0:gpr64, 0, SMBB
999   //   b EndBB
1000   //
1001   // SMBB:
1002   //   MSRpstatesvcrImm1 3, 0, <regmask>                  <- SMSTOP
1003   //
1004   // EndBB:
1005   //   bl @normal_callee
1006   //   MSRcond_pstatesvcrImm1 3, 1, <regmask>             <- SMSTART
1007   //
1008   DebugLoc DL = MI.getDebugLoc();
1009 
1010   // Create the conditional branch based on the third operand of the
1011   // instruction, which tells us if we are wrapping a normal or streaming
1012   // function.
1013   // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1014   // expected value for the callee (0 for a normal callee and 1 for a streaming
1015   // callee).
1016   auto PStateSM = MI.getOperand(2).getReg();
1017   auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1018   unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
1019   bool IsStreamingCallee = MI.getOperand(3).getImm();
1020   unsigned Opc = IsStreamingCallee ? AArch64::TBZW : AArch64::TBNZW;
1021   MachineInstrBuilder Tbx =
1022       BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
1023 
1024   // Split MBB and create two new blocks:
1025   //  - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1026   //  - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1027   //  - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1028   MachineInstr &PrevMI = *std::prev(MBBI);
1029   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
1030   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
1031                                  ? *SMBB->successors().begin()
1032                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
1033 
1034   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1035   Tbx.addMBB(SMBB);
1036   BuildMI(&MBB, DL, TII->get(AArch64::B))
1037       .addMBB(EndBB);
1038   MBB.addSuccessor(EndBB);
1039 
1040   // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1041   MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
1042                                     TII->get(AArch64::MSRpstatesvcrImm1));
1043   // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1044   // these contain the CopyFromReg for the first argument and the flag to
1045   // indicate whether the callee is streaming or normal).
1046   MIB.add(MI.getOperand(0));
1047   MIB.add(MI.getOperand(1));
1048   for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1049     MIB.add(MI.getOperand(i));
1050 
1051   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1052 
1053   MI.eraseFromParent();
1054   return EndBB;
1055 }
1056 
1057 bool AArch64ExpandPseudo::expandMultiVecPseudo(
1058     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1059     TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1060     unsigned ContiguousOp, unsigned StridedOpc) {
1061   MachineInstr &MI = *MBBI;
1062   Register Tuple = MI.getOperand(0).getReg();
1063 
1064   auto ContiguousRange = ContiguousClass.getRegisters();
1065   auto StridedRange = StridedClass.getRegisters();
1066   unsigned Opc;
1067   if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {
1068     Opc = ContiguousOp;
1069   } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {
1070     Opc = StridedOpc;
1071   } else
1072     llvm_unreachable("Cannot expand Multi-Vector pseudo");
1073 
1074   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1075                                 .add(MI.getOperand(0))
1076                                 .add(MI.getOperand(1))
1077                                 .add(MI.getOperand(2))
1078                                 .add(MI.getOperand(3));
1079   transferImpOps(MI, MIB, MIB);
1080   MI.eraseFromParent();
1081   return true;
1082 }
1083 
1084 /// If MBBI references a pseudo instruction that should be expanded here,
1085 /// do the expansion and return true.  Otherwise return false.
1086 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1087                                    MachineBasicBlock::iterator MBBI,
1088                                    MachineBasicBlock::iterator &NextMBBI) {
1089   MachineInstr &MI = *MBBI;
1090   unsigned Opcode = MI.getOpcode();
1091 
1092   // Check if we can expand the destructive op
1093   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1094   if (OrigInstr != -1) {
1095     auto &Orig = TII->get(OrigInstr);
1096     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1097         AArch64::NotDestructive) {
1098       return expand_DestructiveOp(MI, MBB, MBBI);
1099     }
1100   }
1101 
1102   switch (Opcode) {
1103   default:
1104     break;
1105 
1106   case AArch64::BSPv8i8:
1107   case AArch64::BSPv16i8: {
1108     Register DstReg = MI.getOperand(0).getReg();
1109     if (DstReg == MI.getOperand(3).getReg()) {
1110       // Expand to BIT
1111       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1112               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1113                                                   : AArch64::BITv16i8))
1114           .add(MI.getOperand(0))
1115           .add(MI.getOperand(3))
1116           .add(MI.getOperand(2))
1117           .add(MI.getOperand(1));
1118     } else if (DstReg == MI.getOperand(2).getReg()) {
1119       // Expand to BIF
1120       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1121               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1122                                                   : AArch64::BIFv16i8))
1123           .add(MI.getOperand(0))
1124           .add(MI.getOperand(2))
1125           .add(MI.getOperand(3))
1126           .add(MI.getOperand(1));
1127     } else {
1128       // Expand to BSL, use additional move if required
1129       if (DstReg == MI.getOperand(1).getReg()) {
1130         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1131                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1132                                                     : AArch64::BSLv16i8))
1133             .add(MI.getOperand(0))
1134             .add(MI.getOperand(1))
1135             .add(MI.getOperand(2))
1136             .add(MI.getOperand(3));
1137       } else {
1138         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1139                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1140                                                     : AArch64::ORRv16i8))
1141             .addReg(DstReg,
1142                     RegState::Define |
1143                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1144             .add(MI.getOperand(1))
1145             .add(MI.getOperand(1));
1146         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1147                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1148                                                     : AArch64::BSLv16i8))
1149             .add(MI.getOperand(0))
1150             .addReg(DstReg,
1151                     RegState::Kill |
1152                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1153             .add(MI.getOperand(2))
1154             .add(MI.getOperand(3));
1155       }
1156     }
1157     MI.eraseFromParent();
1158     return true;
1159   }
1160 
1161   case AArch64::ADDWrr:
1162   case AArch64::SUBWrr:
1163   case AArch64::ADDXrr:
1164   case AArch64::SUBXrr:
1165   case AArch64::ADDSWrr:
1166   case AArch64::SUBSWrr:
1167   case AArch64::ADDSXrr:
1168   case AArch64::SUBSXrr:
1169   case AArch64::ANDWrr:
1170   case AArch64::ANDXrr:
1171   case AArch64::BICWrr:
1172   case AArch64::BICXrr:
1173   case AArch64::ANDSWrr:
1174   case AArch64::ANDSXrr:
1175   case AArch64::BICSWrr:
1176   case AArch64::BICSXrr:
1177   case AArch64::EONWrr:
1178   case AArch64::EONXrr:
1179   case AArch64::EORWrr:
1180   case AArch64::EORXrr:
1181   case AArch64::ORNWrr:
1182   case AArch64::ORNXrr:
1183   case AArch64::ORRWrr:
1184   case AArch64::ORRXrr: {
1185     unsigned Opcode;
1186     switch (MI.getOpcode()) {
1187     default:
1188       return false;
1189     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
1190     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
1191     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
1192     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
1193     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
1194     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
1195     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
1196     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
1197     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
1198     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
1199     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
1200     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
1201     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
1202     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
1203     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
1204     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
1205     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
1206     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
1207     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
1208     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
1209     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
1210     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
1211     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
1212     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
1213     }
1214     MachineFunction &MF = *MBB.getParent();
1215     // Try to create new inst without implicit operands added.
1216     MachineInstr *NewMI = MF.CreateMachineInstr(
1217         TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1218     MBB.insert(MBBI, NewMI);
1219     MachineInstrBuilder MIB1(MF, NewMI);
1220     MIB1->setPCSections(MF, MI.getPCSections());
1221     MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1222         .add(MI.getOperand(1))
1223         .add(MI.getOperand(2))
1224         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1225     transferImpOps(MI, MIB1, MIB1);
1226     if (auto DebugNumber = MI.peekDebugInstrNum())
1227       NewMI->setDebugInstrNum(DebugNumber);
1228     MI.eraseFromParent();
1229     return true;
1230   }
1231 
1232   case AArch64::LOADgot: {
1233     MachineFunction *MF = MBB.getParent();
1234     Register DstReg = MI.getOperand(0).getReg();
1235     const MachineOperand &MO1 = MI.getOperand(1);
1236     unsigned Flags = MO1.getTargetFlags();
1237 
1238     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1239       // Tiny codemodel expand to LDR
1240       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1241                                         TII->get(AArch64::LDRXl), DstReg);
1242 
1243       if (MO1.isGlobal()) {
1244         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1245       } else if (MO1.isSymbol()) {
1246         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1247       } else {
1248         assert(MO1.isCPI() &&
1249                "Only expect globals, externalsymbols, or constant pools");
1250         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1251       }
1252     } else {
1253       // Small codemodel expand into ADRP + LDR.
1254       MachineFunction &MF = *MI.getParent()->getParent();
1255       DebugLoc DL = MI.getDebugLoc();
1256       MachineInstrBuilder MIB1 =
1257           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1258 
1259       MachineInstrBuilder MIB2;
1260       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1261         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1262         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1263         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1264         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1265                    .addDef(Reg32)
1266                    .addReg(DstReg, RegState::Kill)
1267                    .addReg(DstReg, DstFlags | RegState::Implicit);
1268       } else {
1269         Register DstReg = MI.getOperand(0).getReg();
1270         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1271                    .add(MI.getOperand(0))
1272                    .addUse(DstReg, RegState::Kill);
1273       }
1274 
1275       if (MO1.isGlobal()) {
1276         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1277         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1278                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1279       } else if (MO1.isSymbol()) {
1280         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1281         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1282                                                         AArch64II::MO_PAGEOFF |
1283                                                         AArch64II::MO_NC);
1284       } else {
1285         assert(MO1.isCPI() &&
1286                "Only expect globals, externalsymbols, or constant pools");
1287         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1288                                   Flags | AArch64II::MO_PAGE);
1289         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1290                                   Flags | AArch64II::MO_PAGEOFF |
1291                                       AArch64II::MO_NC);
1292       }
1293 
1294       transferImpOps(MI, MIB1, MIB2);
1295     }
1296     MI.eraseFromParent();
1297     return true;
1298   }
1299   case AArch64::MOVaddrBA: {
1300     MachineFunction &MF = *MI.getParent()->getParent();
1301     if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1302       // blockaddress expressions have to come from a constant pool because the
1303       // largest addend (and hence offset within a function) allowed for ADRP is
1304       // only 8MB.
1305       const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1306       assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1307 
1308       MachineConstantPool *MCP = MF.getConstantPool();
1309       unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1310 
1311       Register DstReg = MI.getOperand(0).getReg();
1312       auto MIB1 =
1313           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1314               .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1315       auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1316                           TII->get(AArch64::LDRXui), DstReg)
1317                       .addUse(DstReg)
1318                       .addConstantPoolIndex(
1319                           CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1320       transferImpOps(MI, MIB1, MIB2);
1321       MI.eraseFromParent();
1322       return true;
1323     }
1324   }
1325     [[fallthrough]];
1326   case AArch64::MOVaddr:
1327   case AArch64::MOVaddrJT:
1328   case AArch64::MOVaddrCP:
1329   case AArch64::MOVaddrTLS:
1330   case AArch64::MOVaddrEXT: {
1331     // Expand into ADRP + ADD.
1332     Register DstReg = MI.getOperand(0).getReg();
1333     assert(DstReg != AArch64::XZR);
1334     MachineInstrBuilder MIB1 =
1335         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1336             .add(MI.getOperand(1));
1337 
1338     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1339       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1340       // We do so by creating a MOVK that sets bits 48-63 of the register to
1341       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1342       // the small code model so we can assume a binary size of <= 4GB, which
1343       // makes the untagged PC relative offset positive. The binary must also be
1344       // loaded into address range [0, 2^48). Both of these properties need to
1345       // be ensured at runtime when using tagged addresses.
1346       auto Tag = MI.getOperand(1);
1347       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1348       Tag.setOffset(0x100000000);
1349       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1350           .addReg(DstReg)
1351           .add(Tag)
1352           .addImm(48);
1353     }
1354 
1355     MachineInstrBuilder MIB2 =
1356         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1357             .add(MI.getOperand(0))
1358             .addReg(DstReg)
1359             .add(MI.getOperand(2))
1360             .addImm(0);
1361 
1362     transferImpOps(MI, MIB1, MIB2);
1363     MI.eraseFromParent();
1364     return true;
1365   }
1366   case AArch64::ADDlowTLS:
1367     // Produce a plain ADD
1368     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1369         .add(MI.getOperand(0))
1370         .add(MI.getOperand(1))
1371         .add(MI.getOperand(2))
1372         .addImm(0);
1373     MI.eraseFromParent();
1374     return true;
1375 
1376   case AArch64::MOVbaseTLS: {
1377     Register DstReg = MI.getOperand(0).getReg();
1378     auto SysReg = AArch64SysReg::TPIDR_EL0;
1379     MachineFunction *MF = MBB.getParent();
1380     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1381       SysReg = AArch64SysReg::TPIDR_EL3;
1382     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1383       SysReg = AArch64SysReg::TPIDR_EL2;
1384     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1385       SysReg = AArch64SysReg::TPIDR_EL1;
1386     else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1387       SysReg = AArch64SysReg::TPIDRRO_EL0;
1388     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1389         .addImm(SysReg);
1390     MI.eraseFromParent();
1391     return true;
1392   }
1393 
1394   case AArch64::MOVi32imm:
1395     return expandMOVImm(MBB, MBBI, 32);
1396   case AArch64::MOVi64imm:
1397     return expandMOVImm(MBB, MBBI, 64);
1398   case AArch64::RET_ReallyLR: {
1399     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1400     // function and missing live-ins. We are fine in practice because callee
1401     // saved register handling ensures the register value is restored before
1402     // RET, but we need the undef flag here to appease the MachineVerifier
1403     // liveness checks.
1404     MachineInstrBuilder MIB =
1405         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1406           .addReg(AArch64::LR, RegState::Undef);
1407     transferImpOps(MI, MIB, MIB);
1408     MI.eraseFromParent();
1409     return true;
1410   }
1411   case AArch64::CMP_SWAP_8:
1412     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1413                           AArch64::SUBSWrx,
1414                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1415                           AArch64::WZR, NextMBBI);
1416   case AArch64::CMP_SWAP_16:
1417     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1418                           AArch64::SUBSWrx,
1419                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1420                           AArch64::WZR, NextMBBI);
1421   case AArch64::CMP_SWAP_32:
1422     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1423                           AArch64::SUBSWrs,
1424                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1425                           AArch64::WZR, NextMBBI);
1426   case AArch64::CMP_SWAP_64:
1427     return expandCMP_SWAP(MBB, MBBI,
1428                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1429                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1430                           AArch64::XZR, NextMBBI);
1431   case AArch64::CMP_SWAP_128:
1432   case AArch64::CMP_SWAP_128_RELEASE:
1433   case AArch64::CMP_SWAP_128_ACQUIRE:
1434   case AArch64::CMP_SWAP_128_MONOTONIC:
1435     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1436 
1437   case AArch64::AESMCrrTied:
1438   case AArch64::AESIMCrrTied: {
1439     MachineInstrBuilder MIB =
1440     BuildMI(MBB, MBBI, MI.getDebugLoc(),
1441             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1442                                                       AArch64::AESIMCrr))
1443       .add(MI.getOperand(0))
1444       .add(MI.getOperand(1));
1445     transferImpOps(MI, MIB, MIB);
1446     MI.eraseFromParent();
1447     return true;
1448    }
1449    case AArch64::IRGstack: {
1450      MachineFunction &MF = *MBB.getParent();
1451      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1452      const AArch64FrameLowering *TFI =
1453          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1454 
1455      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1456      // almost always point to SP-after-prologue; if not, emit a longer
1457      // instruction sequence.
1458      int BaseOffset = -AFI->getTaggedBasePointerOffset();
1459      Register FrameReg;
1460      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1461          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1462          /*PreferFP=*/false,
1463          /*ForSimm=*/true);
1464      Register SrcReg = FrameReg;
1465      if (FrameRegOffset) {
1466        // Use output register as temporary.
1467        SrcReg = MI.getOperand(0).getReg();
1468        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1469                        FrameRegOffset, TII);
1470      }
1471      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1472          .add(MI.getOperand(0))
1473          .addUse(SrcReg)
1474          .add(MI.getOperand(2));
1475      MI.eraseFromParent();
1476      return true;
1477    }
1478    case AArch64::TAGPstack: {
1479      int64_t Offset = MI.getOperand(2).getImm();
1480      BuildMI(MBB, MBBI, MI.getDebugLoc(),
1481              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1482          .add(MI.getOperand(0))
1483          .add(MI.getOperand(1))
1484          .addImm(std::abs(Offset))
1485          .add(MI.getOperand(4));
1486      MI.eraseFromParent();
1487      return true;
1488    }
1489    case AArch64::STGloop_wback:
1490    case AArch64::STZGloop_wback:
1491      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1492    case AArch64::STGloop:
1493    case AArch64::STZGloop:
1494      report_fatal_error(
1495          "Non-writeback variants of STGloop / STZGloop should not "
1496          "survive past PrologEpilogInserter.");
1497    case AArch64::STR_ZZZZXI:
1498      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1499    case AArch64::STR_ZZZXI:
1500      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1501    case AArch64::STR_ZZXI:
1502      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1503    case AArch64::STR_PPXI:
1504      return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
1505    case AArch64::LDR_ZZZZXI:
1506      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1507    case AArch64::LDR_ZZZXI:
1508      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1509    case AArch64::LDR_ZZXI:
1510      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1511    case AArch64::LDR_PPXI:
1512      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
1513    case AArch64::BLR_RVMARKER:
1514      return expandCALL_RVMARKER(MBB, MBBI);
1515    case AArch64::BLR_BTI:
1516      return expandCALL_BTI(MBB, MBBI);
1517    case AArch64::StoreSwiftAsyncContext:
1518      return expandStoreSwiftAsyncContext(MBB, MBBI);
1519    case AArch64::RestoreZAPseudo: {
1520      auto *NewMBB = expandRestoreZA(MBB, MBBI);
1521      if (NewMBB != &MBB)
1522        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1523      return true;
1524    }
1525    case AArch64::MSRpstatePseudo: {
1526      auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1527      if (NewMBB != &MBB)
1528        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1529      return true;
1530    }
1531    case AArch64::LD1B_2Z_IMM_PSEUDO:
1532      return expandMultiVecPseudo(
1533          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1534          AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1535    case AArch64::LD1H_2Z_IMM_PSEUDO:
1536      return expandMultiVecPseudo(
1537          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1538          AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1539    case AArch64::LD1W_2Z_IMM_PSEUDO:
1540      return expandMultiVecPseudo(
1541          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1542          AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1543    case AArch64::LD1D_2Z_IMM_PSEUDO:
1544      return expandMultiVecPseudo(
1545          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1546          AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1547    case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1548      return expandMultiVecPseudo(
1549          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1550          AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1551    case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1552      return expandMultiVecPseudo(
1553          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1554          AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1555    case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1556      return expandMultiVecPseudo(
1557          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1558          AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1559    case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1560      return expandMultiVecPseudo(
1561          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1562          AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1563    case AArch64::LD1B_2Z_PSEUDO:
1564      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1565                                  AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1566                                  AArch64::LD1B_2Z_STRIDED);
1567    case AArch64::LD1H_2Z_PSEUDO:
1568      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1569                                  AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1570                                  AArch64::LD1H_2Z_STRIDED);
1571    case AArch64::LD1W_2Z_PSEUDO:
1572      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1573                                  AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1574                                  AArch64::LD1W_2Z_STRIDED);
1575    case AArch64::LD1D_2Z_PSEUDO:
1576      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1577                                  AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1578                                  AArch64::LD1D_2Z_STRIDED);
1579    case AArch64::LDNT1B_2Z_PSEUDO:
1580      return expandMultiVecPseudo(
1581          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1582          AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1583    case AArch64::LDNT1H_2Z_PSEUDO:
1584      return expandMultiVecPseudo(
1585          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1586          AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1587    case AArch64::LDNT1W_2Z_PSEUDO:
1588      return expandMultiVecPseudo(
1589          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1590          AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1591    case AArch64::LDNT1D_2Z_PSEUDO:
1592      return expandMultiVecPseudo(
1593          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1594          AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1595    case AArch64::LD1B_4Z_IMM_PSEUDO:
1596      return expandMultiVecPseudo(
1597          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1598          AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1599    case AArch64::LD1H_4Z_IMM_PSEUDO:
1600      return expandMultiVecPseudo(
1601          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1602          AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1603    case AArch64::LD1W_4Z_IMM_PSEUDO:
1604      return expandMultiVecPseudo(
1605          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1606          AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1607    case AArch64::LD1D_4Z_IMM_PSEUDO:
1608      return expandMultiVecPseudo(
1609          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1610          AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1611    case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1612      return expandMultiVecPseudo(
1613          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1614          AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1615    case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1616      return expandMultiVecPseudo(
1617          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1618          AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1619    case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1620      return expandMultiVecPseudo(
1621          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1622          AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1623    case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1624      return expandMultiVecPseudo(
1625          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1626          AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1627    case AArch64::LD1B_4Z_PSEUDO:
1628      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1629                                  AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1630                                  AArch64::LD1B_4Z_STRIDED);
1631    case AArch64::LD1H_4Z_PSEUDO:
1632      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1633                                  AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1634                                  AArch64::LD1H_4Z_STRIDED);
1635    case AArch64::LD1W_4Z_PSEUDO:
1636      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1637                                  AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1638                                  AArch64::LD1W_4Z_STRIDED);
1639    case AArch64::LD1D_4Z_PSEUDO:
1640      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1641                                  AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1642                                  AArch64::LD1D_4Z_STRIDED);
1643    case AArch64::LDNT1B_4Z_PSEUDO:
1644      return expandMultiVecPseudo(
1645          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1646          AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1647    case AArch64::LDNT1H_4Z_PSEUDO:
1648      return expandMultiVecPseudo(
1649          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1650          AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1651    case AArch64::LDNT1W_4Z_PSEUDO:
1652      return expandMultiVecPseudo(
1653          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1654          AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1655    case AArch64::LDNT1D_4Z_PSEUDO:
1656      return expandMultiVecPseudo(
1657          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1658          AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1659   }
1660   return false;
1661 }
1662 
1663 /// Iterate over the instructions in basic block MBB and expand any
1664 /// pseudo instructions.  Return true if anything was modified.
1665 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1666   bool Modified = false;
1667 
1668   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1669   while (MBBI != E) {
1670     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1671     Modified |= expandMI(MBB, MBBI, NMBBI);
1672     MBBI = NMBBI;
1673   }
1674 
1675   return Modified;
1676 }
1677 
1678 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1679   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1680 
1681   bool Modified = false;
1682   for (auto &MBB : MF)
1683     Modified |= expandMBB(MBB);
1684   return Modified;
1685 }
1686 
1687 /// Returns an instance of the pseudo instruction expansion pass.
1688 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1689   return new AArch64ExpandPseudo();
1690 }
1691