xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (revision c66a499e037efd268a744e487e7d0c45a4944a9b)
1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/CodeGen.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Target/TargetMachine.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <limits>
43 #include <utility>
44 
45 using namespace llvm;
46 
47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
48 
49 namespace {
50 
51 class AArch64ExpandPseudo : public MachineFunctionPass {
52 public:
53   const AArch64InstrInfo *TII;
54 
55   static char ID;
56 
57   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
58     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
59   }
60 
61   bool runOnMachineFunction(MachineFunction &Fn) override;
62 
63   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
64 
65 private:
66   bool expandMBB(MachineBasicBlock &MBB);
67   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
68                 MachineBasicBlock::iterator &NextMBBI);
69   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
70                     unsigned BitSize);
71 
72   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
73                             MachineBasicBlock::iterator MBBI);
74   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
75                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
76                       unsigned ExtendImm, unsigned ZeroReg,
77                       MachineBasicBlock::iterator &NextMBBI);
78   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
79                           MachineBasicBlock::iterator MBBI,
80                           MachineBasicBlock::iterator &NextMBBI);
81   bool expandSetTagLoop(MachineBasicBlock &MBB,
82                         MachineBasicBlock::iterator MBBI,
83                         MachineBasicBlock::iterator &NextMBBI);
84   bool expandSVESpillFill(MachineBasicBlock &MBB,
85                           MachineBasicBlock::iterator MBBI, unsigned Opc,
86                           unsigned N);
87   bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
88                            MachineBasicBlock::iterator MBBI);
89   bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
90   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
91                                     MachineBasicBlock::iterator MBBI);
92   MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
93                                      MachineBasicBlock::iterator MBBI);
94   MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
95                                         MachineBasicBlock::iterator MBBI);
96 };
97 
98 } // end anonymous namespace
99 
100 char AArch64ExpandPseudo::ID = 0;
101 
102 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
103                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
104 
105 /// Transfer implicit operands on the pseudo instruction to the
106 /// instructions created from the expansion.
107 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
108                            MachineInstrBuilder &DefMI) {
109   const MCInstrDesc &Desc = OldMI.getDesc();
110   for (const MachineOperand &MO :
111        llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
112     assert(MO.isReg() && MO.getReg());
113     if (MO.isUse())
114       UseMI.add(MO);
115     else
116       DefMI.add(MO);
117   }
118 }
119 
120 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
121 /// real move-immediate instructions to synthesize the immediate.
122 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
123                                        MachineBasicBlock::iterator MBBI,
124                                        unsigned BitSize) {
125   MachineInstr &MI = *MBBI;
126   Register DstReg = MI.getOperand(0).getReg();
127   uint64_t RenamableState =
128       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
129   uint64_t Imm = MI.getOperand(1).getImm();
130 
131   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
132     // Useless def, and we don't want to risk creating an invalid ORR (which
133     // would really write to sp).
134     MI.eraseFromParent();
135     return true;
136   }
137 
138   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
139   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
140   assert(Insn.size() != 0);
141 
142   SmallVector<MachineInstrBuilder, 4> MIBS;
143   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
144     bool LastItem = std::next(I) == E;
145     switch (I->Opcode)
146     {
147     default: llvm_unreachable("unhandled!"); break;
148 
149     case AArch64::ORRWri:
150     case AArch64::ORRXri:
151       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
152         .add(MI.getOperand(0))
153         .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
154         .addImm(I->Op2));
155       break;
156     case AArch64::MOVNWi:
157     case AArch64::MOVNXi:
158     case AArch64::MOVZWi:
159     case AArch64::MOVZXi: {
160       bool DstIsDead = MI.getOperand(0).isDead();
161       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
162         .addReg(DstReg, RegState::Define |
163                 getDeadRegState(DstIsDead && LastItem) |
164                 RenamableState)
165         .addImm(I->Op1)
166         .addImm(I->Op2));
167       } break;
168     case AArch64::MOVKWi:
169     case AArch64::MOVKXi: {
170       Register DstReg = MI.getOperand(0).getReg();
171       bool DstIsDead = MI.getOperand(0).isDead();
172       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
173         .addReg(DstReg,
174                 RegState::Define |
175                 getDeadRegState(DstIsDead && LastItem) |
176                 RenamableState)
177         .addReg(DstReg)
178         .addImm(I->Op1)
179         .addImm(I->Op2));
180       } break;
181     }
182   }
183   transferImpOps(MI, MIBS.front(), MIBS.back());
184   MI.eraseFromParent();
185   return true;
186 }
187 
188 bool AArch64ExpandPseudo::expandCMP_SWAP(
189     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
190     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
191     MachineBasicBlock::iterator &NextMBBI) {
192   MachineInstr &MI = *MBBI;
193   MIMetadata MIMD(MI);
194   const MachineOperand &Dest = MI.getOperand(0);
195   Register StatusReg = MI.getOperand(1).getReg();
196   bool StatusDead = MI.getOperand(1).isDead();
197   // Duplicating undef operands into 2 instructions does not guarantee the same
198   // value on both; However undef should be replaced by xzr anyway.
199   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
200   Register AddrReg = MI.getOperand(2).getReg();
201   Register DesiredReg = MI.getOperand(3).getReg();
202   Register NewReg = MI.getOperand(4).getReg();
203 
204   MachineFunction *MF = MBB.getParent();
205   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
206   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
207   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
208 
209   MF->insert(++MBB.getIterator(), LoadCmpBB);
210   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
211   MF->insert(++StoreBB->getIterator(), DoneBB);
212 
213   // .Lloadcmp:
214   //     mov wStatus, 0
215   //     ldaxr xDest, [xAddr]
216   //     cmp xDest, xDesired
217   //     b.ne .Ldone
218   if (!StatusDead)
219     BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
220       .addImm(0).addImm(0);
221   BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
222       .addReg(AddrReg);
223   BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
224       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
225       .addReg(DesiredReg)
226       .addImm(ExtendImm);
227   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
228       .addImm(AArch64CC::NE)
229       .addMBB(DoneBB)
230       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
231   LoadCmpBB->addSuccessor(DoneBB);
232   LoadCmpBB->addSuccessor(StoreBB);
233 
234   // .Lstore:
235   //     stlxr wStatus, xNew, [xAddr]
236   //     cbnz wStatus, .Lloadcmp
237   BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
238       .addReg(NewReg)
239       .addReg(AddrReg);
240   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
241       .addReg(StatusReg, getKillRegState(StatusDead))
242       .addMBB(LoadCmpBB);
243   StoreBB->addSuccessor(LoadCmpBB);
244   StoreBB->addSuccessor(DoneBB);
245 
246   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
247   DoneBB->transferSuccessors(&MBB);
248 
249   MBB.addSuccessor(LoadCmpBB);
250 
251   NextMBBI = MBB.end();
252   MI.eraseFromParent();
253 
254   // Recompute livein lists.
255   LivePhysRegs LiveRegs;
256   computeAndAddLiveIns(LiveRegs, *DoneBB);
257   computeAndAddLiveIns(LiveRegs, *StoreBB);
258   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
259   // Do an extra pass around the loop to get loop carried registers right.
260   StoreBB->clearLiveIns();
261   computeAndAddLiveIns(LiveRegs, *StoreBB);
262   LoadCmpBB->clearLiveIns();
263   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
264 
265   return true;
266 }
267 
268 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
269     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
270     MachineBasicBlock::iterator &NextMBBI) {
271   MachineInstr &MI = *MBBI;
272   MIMetadata MIMD(MI);
273   MachineOperand &DestLo = MI.getOperand(0);
274   MachineOperand &DestHi = MI.getOperand(1);
275   Register StatusReg = MI.getOperand(2).getReg();
276   bool StatusDead = MI.getOperand(2).isDead();
277   // Duplicating undef operands into 2 instructions does not guarantee the same
278   // value on both; However undef should be replaced by xzr anyway.
279   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
280   Register AddrReg = MI.getOperand(3).getReg();
281   Register DesiredLoReg = MI.getOperand(4).getReg();
282   Register DesiredHiReg = MI.getOperand(5).getReg();
283   Register NewLoReg = MI.getOperand(6).getReg();
284   Register NewHiReg = MI.getOperand(7).getReg();
285 
286   unsigned LdxpOp, StxpOp;
287 
288   switch (MI.getOpcode()) {
289   case AArch64::CMP_SWAP_128_MONOTONIC:
290     LdxpOp = AArch64::LDXPX;
291     StxpOp = AArch64::STXPX;
292     break;
293   case AArch64::CMP_SWAP_128_RELEASE:
294     LdxpOp = AArch64::LDXPX;
295     StxpOp = AArch64::STLXPX;
296     break;
297   case AArch64::CMP_SWAP_128_ACQUIRE:
298     LdxpOp = AArch64::LDAXPX;
299     StxpOp = AArch64::STXPX;
300     break;
301   case AArch64::CMP_SWAP_128:
302     LdxpOp = AArch64::LDAXPX;
303     StxpOp = AArch64::STLXPX;
304     break;
305   default:
306     llvm_unreachable("Unexpected opcode");
307   }
308 
309   MachineFunction *MF = MBB.getParent();
310   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
311   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
312   auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
313   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
314 
315   MF->insert(++MBB.getIterator(), LoadCmpBB);
316   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
317   MF->insert(++StoreBB->getIterator(), FailBB);
318   MF->insert(++FailBB->getIterator(), DoneBB);
319 
320   // .Lloadcmp:
321   //     ldaxp xDestLo, xDestHi, [xAddr]
322   //     cmp xDestLo, xDesiredLo
323   //     sbcs xDestHi, xDesiredHi
324   //     b.ne .Ldone
325   BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
326       .addReg(DestLo.getReg(), RegState::Define)
327       .addReg(DestHi.getReg(), RegState::Define)
328       .addReg(AddrReg);
329   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
330       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
331       .addReg(DesiredLoReg)
332       .addImm(0);
333   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
334     .addUse(AArch64::WZR)
335     .addUse(AArch64::WZR)
336     .addImm(AArch64CC::EQ);
337   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
338       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
339       .addReg(DesiredHiReg)
340       .addImm(0);
341   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
342       .addUse(StatusReg, RegState::Kill)
343       .addUse(StatusReg, RegState::Kill)
344       .addImm(AArch64CC::EQ);
345   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
346       .addUse(StatusReg, getKillRegState(StatusDead))
347       .addMBB(FailBB);
348   LoadCmpBB->addSuccessor(FailBB);
349   LoadCmpBB->addSuccessor(StoreBB);
350 
351   // .Lstore:
352   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
353   //     cbnz wStatus, .Lloadcmp
354   BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
355       .addReg(NewLoReg)
356       .addReg(NewHiReg)
357       .addReg(AddrReg);
358   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
359       .addReg(StatusReg, getKillRegState(StatusDead))
360       .addMBB(LoadCmpBB);
361   BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
362   StoreBB->addSuccessor(LoadCmpBB);
363   StoreBB->addSuccessor(DoneBB);
364 
365   // .Lfail:
366   //     stlxp wStatus, xDestLo, xDestHi, [xAddr]
367   //     cbnz wStatus, .Lloadcmp
368   BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
369       .addReg(DestLo.getReg())
370       .addReg(DestHi.getReg())
371       .addReg(AddrReg);
372   BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
373       .addReg(StatusReg, getKillRegState(StatusDead))
374       .addMBB(LoadCmpBB);
375   FailBB->addSuccessor(LoadCmpBB);
376   FailBB->addSuccessor(DoneBB);
377 
378   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
379   DoneBB->transferSuccessors(&MBB);
380 
381   MBB.addSuccessor(LoadCmpBB);
382 
383   NextMBBI = MBB.end();
384   MI.eraseFromParent();
385 
386   // Recompute liveness bottom up.
387   LivePhysRegs LiveRegs;
388   computeAndAddLiveIns(LiveRegs, *DoneBB);
389   computeAndAddLiveIns(LiveRegs, *FailBB);
390   computeAndAddLiveIns(LiveRegs, *StoreBB);
391   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
392 
393   // Do an extra pass in the loop to get the loop carried dependencies right.
394   FailBB->clearLiveIns();
395   computeAndAddLiveIns(LiveRegs, *FailBB);
396   StoreBB->clearLiveIns();
397   computeAndAddLiveIns(LiveRegs, *StoreBB);
398   LoadCmpBB->clearLiveIns();
399   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
400 
401   return true;
402 }
403 
404 /// \brief Expand Pseudos to Instructions with destructive operands.
405 ///
406 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
407 /// or for fixing relaxed register allocation conditions to comply with
408 /// the instructions register constraints. The latter case may be cheaper
409 /// than setting the register constraints in the register allocator,
410 /// since that will insert regular MOV instructions rather than MOVPRFX.
411 ///
412 /// Example (after register allocation):
413 ///
414 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
415 ///
416 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
417 /// * We cannot map directly to FSUB_ZPmZ_B because the register
418 ///   constraints of the instruction are not met.
419 /// * Also the _ZERO specifies the false lanes need to be zeroed.
420 ///
421 /// We first try to see if the destructive operand == result operand,
422 /// if not, we try to swap the operands, e.g.
423 ///
424 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
425 ///
426 /// But because FSUB_ZPmZ is not commutative, this is semantically
427 /// different, so we need a reverse instruction:
428 ///
429 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
430 ///
431 /// Then we implement the zeroing of the false lanes of Z0 by adding
432 /// a zeroing MOVPRFX instruction:
433 ///
434 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
435 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
436 ///
437 /// Note that this can only be done for _ZERO or _UNDEF variants where
438 /// we can guarantee the false lanes to be zeroed (by implementing this)
439 /// or that they are undef (don't care / not used), otherwise the
440 /// swapping of operands is illegal because the operation is not
441 /// (or cannot be emulated to be) fully commutative.
442 bool AArch64ExpandPseudo::expand_DestructiveOp(
443                             MachineInstr &MI,
444                             MachineBasicBlock &MBB,
445                             MachineBasicBlock::iterator MBBI) {
446   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
447   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
448   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
449   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
450   Register DstReg = MI.getOperand(0).getReg();
451   bool DstIsDead = MI.getOperand(0).isDead();
452   bool UseRev = false;
453   unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
454 
455   switch (DType) {
456   case AArch64::DestructiveBinaryComm:
457   case AArch64::DestructiveBinaryCommWithRev:
458     if (DstReg == MI.getOperand(3).getReg()) {
459       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
460       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
461       UseRev = true;
462       break;
463     }
464     [[fallthrough]];
465   case AArch64::DestructiveBinary:
466   case AArch64::DestructiveBinaryImm:
467     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
468     break;
469   case AArch64::DestructiveUnaryPassthru:
470     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
471     break;
472   case AArch64::DestructiveTernaryCommWithRev:
473     std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
474     if (DstReg == MI.getOperand(3).getReg()) {
475       // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
476       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
477       UseRev = true;
478     } else if (DstReg == MI.getOperand(4).getReg()) {
479       // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
480       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
481       UseRev = true;
482     }
483     break;
484   default:
485     llvm_unreachable("Unsupported Destructive Operand type");
486   }
487 
488   // MOVPRFX can only be used if the destination operand
489   // is the destructive operand, not as any other operand,
490   // so the Destructive Operand must be unique.
491   bool DOPRegIsUnique = false;
492   switch (DType) {
493   case AArch64::DestructiveBinary:
494     DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
495     break;
496   case AArch64::DestructiveBinaryComm:
497   case AArch64::DestructiveBinaryCommWithRev:
498     DOPRegIsUnique =
499       DstReg != MI.getOperand(DOPIdx).getReg() ||
500       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
501     break;
502   case AArch64::DestructiveUnaryPassthru:
503   case AArch64::DestructiveBinaryImm:
504     DOPRegIsUnique = true;
505     break;
506   case AArch64::DestructiveTernaryCommWithRev:
507     DOPRegIsUnique =
508         DstReg != MI.getOperand(DOPIdx).getReg() ||
509         (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
510          MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
511     break;
512   }
513 
514   // Resolve the reverse opcode
515   if (UseRev) {
516     int NewOpcode;
517     // e.g. DIV -> DIVR
518     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
519       Opcode = NewOpcode;
520     // e.g. DIVR -> DIV
521     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
522       Opcode = NewOpcode;
523   }
524 
525   // Get the right MOVPRFX
526   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
527   unsigned MovPrfx, LSLZero, MovPrfxZero;
528   switch (ElementSize) {
529   case AArch64::ElementSizeNone:
530   case AArch64::ElementSizeB:
531     MovPrfx = AArch64::MOVPRFX_ZZ;
532     LSLZero = AArch64::LSL_ZPmI_B;
533     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
534     break;
535   case AArch64::ElementSizeH:
536     MovPrfx = AArch64::MOVPRFX_ZZ;
537     LSLZero = AArch64::LSL_ZPmI_H;
538     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
539     break;
540   case AArch64::ElementSizeS:
541     MovPrfx = AArch64::MOVPRFX_ZZ;
542     LSLZero = AArch64::LSL_ZPmI_S;
543     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
544     break;
545   case AArch64::ElementSizeD:
546     MovPrfx = AArch64::MOVPRFX_ZZ;
547     LSLZero = AArch64::LSL_ZPmI_D;
548     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
549     break;
550   default:
551     llvm_unreachable("Unsupported ElementSize");
552   }
553 
554   //
555   // Create the destructive operation (if required)
556   //
557   MachineInstrBuilder PRFX, DOP;
558   if (FalseZero) {
559     // If we cannot prefix the requested instruction we'll instead emit a
560     // prefixed_zeroing_mov for DestructiveBinary.
561     assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
562             DType == AArch64::DestructiveBinaryComm) &&
563            "The destructive operand should be unique");
564     assert(ElementSize != AArch64::ElementSizeNone &&
565            "This instruction is unpredicated");
566 
567     // Merge source operand into destination register
568     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
569                .addReg(DstReg, RegState::Define)
570                .addReg(MI.getOperand(PredIdx).getReg())
571                .addReg(MI.getOperand(DOPIdx).getReg());
572 
573     // After the movprfx, the destructive operand is same as Dst
574     DOPIdx = 0;
575 
576     // Create the additional LSL to zero the lanes when the DstReg is not
577     // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
578     // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
579     if ((DType == AArch64::DestructiveBinary ||
580          DType == AArch64::DestructiveBinaryComm) &&
581         !DOPRegIsUnique) {
582       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
583           .addReg(DstReg, RegState::Define)
584           .add(MI.getOperand(PredIdx))
585           .addReg(DstReg)
586           .addImm(0);
587     }
588   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
589     assert(DOPRegIsUnique && "The destructive operand should be unique");
590     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
591                .addReg(DstReg, RegState::Define)
592                .addReg(MI.getOperand(DOPIdx).getReg());
593     DOPIdx = 0;
594   }
595 
596   //
597   // Create the destructive operation
598   //
599   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
600     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
601 
602   switch (DType) {
603   case AArch64::DestructiveUnaryPassthru:
604     DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
605         .add(MI.getOperand(PredIdx))
606         .add(MI.getOperand(SrcIdx));
607     break;
608   case AArch64::DestructiveBinary:
609   case AArch64::DestructiveBinaryImm:
610   case AArch64::DestructiveBinaryComm:
611   case AArch64::DestructiveBinaryCommWithRev:
612     DOP.add(MI.getOperand(PredIdx))
613        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
614        .add(MI.getOperand(SrcIdx));
615     break;
616   case AArch64::DestructiveTernaryCommWithRev:
617     DOP.add(MI.getOperand(PredIdx))
618         .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
619         .add(MI.getOperand(SrcIdx))
620         .add(MI.getOperand(Src2Idx));
621     break;
622   }
623 
624   if (PRFX) {
625     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
626     transferImpOps(MI, PRFX, DOP);
627   } else
628     transferImpOps(MI, DOP, DOP);
629 
630   MI.eraseFromParent();
631   return true;
632 }
633 
634 bool AArch64ExpandPseudo::expandSetTagLoop(
635     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
636     MachineBasicBlock::iterator &NextMBBI) {
637   MachineInstr &MI = *MBBI;
638   DebugLoc DL = MI.getDebugLoc();
639   Register SizeReg = MI.getOperand(0).getReg();
640   Register AddressReg = MI.getOperand(1).getReg();
641 
642   MachineFunction *MF = MBB.getParent();
643 
644   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
645   const unsigned OpCode1 =
646       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
647   const unsigned OpCode2 =
648       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
649 
650   unsigned Size = MI.getOperand(2).getImm();
651   assert(Size > 0 && Size % 16 == 0);
652   if (Size % (16 * 2) != 0) {
653     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
654         .addReg(AddressReg)
655         .addReg(AddressReg)
656         .addImm(1);
657     Size -= 16;
658   }
659   MachineBasicBlock::iterator I =
660       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
661           .addImm(Size);
662   expandMOVImm(MBB, I, 64);
663 
664   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
665   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
666 
667   MF->insert(++MBB.getIterator(), LoopBB);
668   MF->insert(++LoopBB->getIterator(), DoneBB);
669 
670   BuildMI(LoopBB, DL, TII->get(OpCode2))
671       .addDef(AddressReg)
672       .addReg(AddressReg)
673       .addReg(AddressReg)
674       .addImm(2)
675       .cloneMemRefs(MI)
676       .setMIFlags(MI.getFlags());
677   BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
678       .addDef(SizeReg)
679       .addReg(SizeReg)
680       .addImm(16 * 2)
681       .addImm(0);
682   BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
683 
684   LoopBB->addSuccessor(LoopBB);
685   LoopBB->addSuccessor(DoneBB);
686 
687   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
688   DoneBB->transferSuccessors(&MBB);
689 
690   MBB.addSuccessor(LoopBB);
691 
692   NextMBBI = MBB.end();
693   MI.eraseFromParent();
694   // Recompute liveness bottom up.
695   LivePhysRegs LiveRegs;
696   computeAndAddLiveIns(LiveRegs, *DoneBB);
697   computeAndAddLiveIns(LiveRegs, *LoopBB);
698   // Do an extra pass in the loop to get the loop carried dependencies right.
699   // FIXME: is this necessary?
700   LoopBB->clearLiveIns();
701   computeAndAddLiveIns(LiveRegs, *LoopBB);
702   DoneBB->clearLiveIns();
703   computeAndAddLiveIns(LiveRegs, *DoneBB);
704 
705   return true;
706 }
707 
708 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
709                                              MachineBasicBlock::iterator MBBI,
710                                              unsigned Opc, unsigned N) {
711   const TargetRegisterInfo *TRI =
712       MBB.getParent()->getSubtarget().getRegisterInfo();
713   MachineInstr &MI = *MBBI;
714   for (unsigned Offset = 0; Offset < N; ++Offset) {
715     int ImmOffset = MI.getOperand(2).getImm() + Offset;
716     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
717     assert(ImmOffset >= -256 && ImmOffset < 256 &&
718            "Immediate spill offset out of range");
719     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
720         .addReg(
721             TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
722             Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
723         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
724         .addImm(ImmOffset);
725   }
726   MI.eraseFromParent();
727   return true;
728 }
729 
730 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
731     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
732   // Expand CALL_RVMARKER pseudo to:
733   // - a branch to the call target, followed by
734   // - the special `mov x29, x29` marker, and
735   // - another branch, to the runtime function
736   // Mark the sequence as bundle, to avoid passes moving other code in between.
737   MachineInstr &MI = *MBBI;
738 
739   MachineInstr *OriginalCall;
740   MachineOperand &RVTarget = MI.getOperand(0);
741   MachineOperand &CallTarget = MI.getOperand(1);
742   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
743          "invalid operand for regular call");
744   assert(RVTarget.isGlobal() && "invalid operand for attached call");
745   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
746   OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
747   OriginalCall->addOperand(CallTarget);
748 
749   unsigned RegMaskStartIdx = 2;
750   // Skip register arguments. Those are added during ISel, but are not
751   // needed for the concrete branch.
752   while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
753     auto MOP = MI.getOperand(RegMaskStartIdx);
754     assert(MOP.isReg() && "can only add register operands");
755     OriginalCall->addOperand(MachineOperand::CreateReg(
756         MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
757     RegMaskStartIdx++;
758   }
759   for (const MachineOperand &MO :
760        llvm::drop_begin(MI.operands(), RegMaskStartIdx))
761     OriginalCall->addOperand(MO);
762 
763   BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
764                      .addReg(AArch64::FP, RegState::Define)
765                      .addReg(AArch64::XZR)
766                      .addReg(AArch64::FP)
767                      .addImm(0);
768 
769   auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
770                      .add(RVTarget)
771                      .getInstr();
772 
773   if (MI.shouldUpdateCallSiteInfo())
774     MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
775 
776   MI.eraseFromParent();
777   finalizeBundle(MBB, OriginalCall->getIterator(),
778                  std::next(RVCall->getIterator()));
779   return true;
780 }
781 
782 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
783                                          MachineBasicBlock::iterator MBBI) {
784   // Expand CALL_BTI pseudo to:
785   // - a branch to the call target
786   // - a BTI instruction
787   // Mark the sequence as a bundle, to avoid passes moving other code in
788   // between.
789 
790   MachineInstr &MI = *MBBI;
791   MachineOperand &CallTarget = MI.getOperand(0);
792   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
793          "invalid operand for regular call");
794   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
795   MachineInstr *Call =
796       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
797   Call->addOperand(CallTarget);
798   Call->setCFIType(*MBB.getParent(), MI.getCFIType());
799 
800   MachineInstr *BTI =
801       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
802           // BTI J so that setjmp can to BR to this.
803           .addImm(36)
804           .getInstr();
805 
806   if (MI.shouldUpdateCallSiteInfo())
807     MBB.getParent()->moveCallSiteInfo(&MI, Call);
808 
809   MI.eraseFromParent();
810   finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
811   return true;
812 }
813 
814 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
815     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
816   Register CtxReg = MBBI->getOperand(0).getReg();
817   Register BaseReg = MBBI->getOperand(1).getReg();
818   int Offset = MBBI->getOperand(2).getImm();
819   DebugLoc DL(MBBI->getDebugLoc());
820   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
821 
822   if (STI.getTargetTriple().getArchName() != "arm64e") {
823     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
824         .addUse(CtxReg)
825         .addUse(BaseReg)
826         .addImm(Offset / 8)
827         .setMIFlag(MachineInstr::FrameSetup);
828     MBBI->eraseFromParent();
829     return true;
830   }
831 
832   // We need to sign the context in an address-discriminated way. 0xc31a is a
833   // fixed random value, chosen as part of the ABI.
834   //     add x16, xBase, #Offset
835   //     movk x16, #0xc31a, lsl #48
836   //     mov x17, x22/xzr
837   //     pacdb x17, x16
838   //     str x17, [xBase, #Offset]
839   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
840   BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
841       .addUse(BaseReg)
842       .addImm(abs(Offset))
843       .addImm(0)
844       .setMIFlag(MachineInstr::FrameSetup);
845   BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
846       .addUse(AArch64::X16)
847       .addImm(0xc31a)
848       .addImm(48)
849       .setMIFlag(MachineInstr::FrameSetup);
850   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
851   // move it somewhere before signing.
852   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
853       .addUse(AArch64::XZR)
854       .addUse(CtxReg)
855       .addImm(0)
856       .setMIFlag(MachineInstr::FrameSetup);
857   BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
858       .addUse(AArch64::X17)
859       .addUse(AArch64::X16)
860       .setMIFlag(MachineInstr::FrameSetup);
861   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
862       .addUse(AArch64::X17)
863       .addUse(BaseReg)
864       .addImm(Offset / 8)
865       .setMIFlag(MachineInstr::FrameSetup);
866 
867   MBBI->eraseFromParent();
868   return true;
869 }
870 
871 MachineBasicBlock *
872 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
873                                      MachineBasicBlock::iterator MBBI) {
874   MachineInstr &MI = *MBBI;
875   assert((std::next(MBBI) != MBB.end() ||
876           MI.getParent()->successors().begin() !=
877               MI.getParent()->successors().end()) &&
878          "Unexpected unreachable in block that restores ZA");
879 
880   // Compare TPIDR2_EL0 value against 0.
881   DebugLoc DL = MI.getDebugLoc();
882   MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
883                                 .add(MI.getOperand(0));
884 
885   // Split MBB and create two new blocks:
886   //  - MBB now contains all instructions before RestoreZAPseudo.
887   //  - SMBB contains the RestoreZAPseudo instruction only.
888   //  - EndBB contains all instructions after RestoreZAPseudo.
889   MachineInstr &PrevMI = *std::prev(MBBI);
890   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
891   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
892                                  ? *SMBB->successors().begin()
893                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
894 
895   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
896   Cbz.addMBB(SMBB);
897   BuildMI(&MBB, DL, TII->get(AArch64::B))
898       .addMBB(EndBB);
899   MBB.addSuccessor(EndBB);
900 
901   // Replace the pseudo with a call (BL).
902   MachineInstrBuilder MIB =
903       BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
904   MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
905   for (unsigned I = 2; I < MI.getNumOperands(); ++I)
906     MIB.add(MI.getOperand(I));
907   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
908 
909   MI.eraseFromParent();
910   return EndBB;
911 }
912 
913 MachineBasicBlock *
914 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
915                                         MachineBasicBlock::iterator MBBI) {
916   MachineInstr &MI = *MBBI;
917   // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
918   // Exception handling code generated by Clang may introduce unreachables and it
919   // seems unnecessary to restore pstate.sm when that happens. Note that it is
920   // not just an optimisation, the code below expects a successor instruction/block
921   // in order to split the block at MBBI.
922   if (std::next(MBBI) == MBB.end() &&
923       MI.getParent()->successors().begin() ==
924           MI.getParent()->successors().end()) {
925     MI.eraseFromParent();
926     return &MBB;
927   }
928 
929   // Expand the pseudo into smstart or smstop instruction. The pseudo has the
930   // following operands:
931   //
932   //   MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask>
933   //
934   // The pseudo is expanded into a conditional smstart/smstop, with a
935   // check if pstate.sm (register) equals the expected value, and if not,
936   // invokes the smstart/smstop.
937   //
938   // As an example, the following block contains a normal call from a
939   // streaming-compatible function:
940   //
941   // OrigBB:
942   //   MSRpstatePseudo 3, 0, %0, 0, <regmask>             <- Conditional SMSTOP
943   //   bl @normal_callee
944   //   MSRpstatePseudo 3, 1, %0, 0, <regmask>             <- Conditional SMSTART
945   //
946   // ...which will be transformed into:
947   //
948   // OrigBB:
949   //   TBNZx %0:gpr64, 0, SMBB
950   //   b EndBB
951   //
952   // SMBB:
953   //   MSRpstatesvcrImm1 3, 0, <regmask>                  <- SMSTOP
954   //
955   // EndBB:
956   //   bl @normal_callee
957   //   MSRcond_pstatesvcrImm1 3, 1, <regmask>             <- SMSTART
958   //
959   DebugLoc DL = MI.getDebugLoc();
960 
961   // Create the conditional branch based on the third operand of the
962   // instruction, which tells us if we are wrapping a normal or streaming
963   // function.
964   // We test the live value of pstate.sm and toggle pstate.sm if this is not the
965   // expected value for the callee (0 for a normal callee and 1 for a streaming
966   // callee).
967   auto PStateSM = MI.getOperand(2).getReg();
968   bool IsStreamingCallee = MI.getOperand(3).getImm();
969   unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX;
970   MachineInstrBuilder Tbx =
971       BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0);
972 
973   // Split MBB and create two new blocks:
974   //  - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
975   //  - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
976   //  - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
977   MachineInstr &PrevMI = *std::prev(MBBI);
978   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
979   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
980                                  ? *SMBB->successors().begin()
981                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
982 
983   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
984   Tbx.addMBB(SMBB);
985   BuildMI(&MBB, DL, TII->get(AArch64::B))
986       .addMBB(EndBB);
987   MBB.addSuccessor(EndBB);
988 
989   // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
990   MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
991                                     TII->get(AArch64::MSRpstatesvcrImm1));
992   // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
993   // these contain the CopyFromReg for the first argument and the flag to
994   // indicate whether the callee is streaming or normal).
995   MIB.add(MI.getOperand(0));
996   MIB.add(MI.getOperand(1));
997   for (unsigned i = 4; i < MI.getNumOperands(); ++i)
998     MIB.add(MI.getOperand(i));
999 
1000   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1001 
1002   MI.eraseFromParent();
1003   return EndBB;
1004 }
1005 
1006 /// If MBBI references a pseudo instruction that should be expanded here,
1007 /// do the expansion and return true.  Otherwise return false.
1008 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1009                                    MachineBasicBlock::iterator MBBI,
1010                                    MachineBasicBlock::iterator &NextMBBI) {
1011   MachineInstr &MI = *MBBI;
1012   unsigned Opcode = MI.getOpcode();
1013 
1014   // Check if we can expand the destructive op
1015   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1016   if (OrigInstr != -1) {
1017     auto &Orig = TII->get(OrigInstr);
1018     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
1019            != AArch64::NotDestructive) {
1020       return expand_DestructiveOp(MI, MBB, MBBI);
1021     }
1022   }
1023 
1024   switch (Opcode) {
1025   default:
1026     break;
1027 
1028   case AArch64::BSPv8i8:
1029   case AArch64::BSPv16i8: {
1030     Register DstReg = MI.getOperand(0).getReg();
1031     if (DstReg == MI.getOperand(3).getReg()) {
1032       // Expand to BIT
1033       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1034               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1035                                                   : AArch64::BITv16i8))
1036           .add(MI.getOperand(0))
1037           .add(MI.getOperand(3))
1038           .add(MI.getOperand(2))
1039           .add(MI.getOperand(1));
1040     } else if (DstReg == MI.getOperand(2).getReg()) {
1041       // Expand to BIF
1042       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1043               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1044                                                   : AArch64::BIFv16i8))
1045           .add(MI.getOperand(0))
1046           .add(MI.getOperand(2))
1047           .add(MI.getOperand(3))
1048           .add(MI.getOperand(1));
1049     } else {
1050       // Expand to BSL, use additional move if required
1051       if (DstReg == MI.getOperand(1).getReg()) {
1052         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1053                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1054                                                     : AArch64::BSLv16i8))
1055             .add(MI.getOperand(0))
1056             .add(MI.getOperand(1))
1057             .add(MI.getOperand(2))
1058             .add(MI.getOperand(3));
1059       } else {
1060         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1061                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1062                                                     : AArch64::ORRv16i8))
1063             .addReg(DstReg,
1064                     RegState::Define |
1065                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1066             .add(MI.getOperand(1))
1067             .add(MI.getOperand(1));
1068         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1069                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1070                                                     : AArch64::BSLv16i8))
1071             .add(MI.getOperand(0))
1072             .addReg(DstReg,
1073                     RegState::Kill |
1074                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1075             .add(MI.getOperand(2))
1076             .add(MI.getOperand(3));
1077       }
1078     }
1079     MI.eraseFromParent();
1080     return true;
1081   }
1082 
1083   case AArch64::ADDWrr:
1084   case AArch64::SUBWrr:
1085   case AArch64::ADDXrr:
1086   case AArch64::SUBXrr:
1087   case AArch64::ADDSWrr:
1088   case AArch64::SUBSWrr:
1089   case AArch64::ADDSXrr:
1090   case AArch64::SUBSXrr:
1091   case AArch64::ANDWrr:
1092   case AArch64::ANDXrr:
1093   case AArch64::BICWrr:
1094   case AArch64::BICXrr:
1095   case AArch64::ANDSWrr:
1096   case AArch64::ANDSXrr:
1097   case AArch64::BICSWrr:
1098   case AArch64::BICSXrr:
1099   case AArch64::EONWrr:
1100   case AArch64::EONXrr:
1101   case AArch64::EORWrr:
1102   case AArch64::EORXrr:
1103   case AArch64::ORNWrr:
1104   case AArch64::ORNXrr:
1105   case AArch64::ORRWrr:
1106   case AArch64::ORRXrr: {
1107     unsigned Opcode;
1108     switch (MI.getOpcode()) {
1109     default:
1110       return false;
1111     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
1112     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
1113     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
1114     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
1115     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
1116     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
1117     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
1118     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
1119     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
1120     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
1121     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
1122     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
1123     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
1124     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
1125     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
1126     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
1127     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
1128     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
1129     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
1130     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
1131     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
1132     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
1133     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
1134     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
1135     }
1136     MachineFunction &MF = *MBB.getParent();
1137     // Try to create new inst without implicit operands added.
1138     MachineInstr *NewMI = MF.CreateMachineInstr(
1139         TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1140     MBB.insert(MBBI, NewMI);
1141     MachineInstrBuilder MIB1(MF, NewMI);
1142     MIB1->setPCSections(MF, MI.getPCSections());
1143     MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1144         .add(MI.getOperand(1))
1145         .add(MI.getOperand(2))
1146         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1147     transferImpOps(MI, MIB1, MIB1);
1148     MI.eraseFromParent();
1149     return true;
1150   }
1151 
1152   case AArch64::LOADgot: {
1153     MachineFunction *MF = MBB.getParent();
1154     Register DstReg = MI.getOperand(0).getReg();
1155     const MachineOperand &MO1 = MI.getOperand(1);
1156     unsigned Flags = MO1.getTargetFlags();
1157 
1158     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1159       // Tiny codemodel expand to LDR
1160       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1161                                         TII->get(AArch64::LDRXl), DstReg);
1162 
1163       if (MO1.isGlobal()) {
1164         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1165       } else if (MO1.isSymbol()) {
1166         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1167       } else {
1168         assert(MO1.isCPI() &&
1169                "Only expect globals, externalsymbols, or constant pools");
1170         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1171       }
1172     } else {
1173       // Small codemodel expand into ADRP + LDR.
1174       MachineFunction &MF = *MI.getParent()->getParent();
1175       DebugLoc DL = MI.getDebugLoc();
1176       MachineInstrBuilder MIB1 =
1177           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1178 
1179       MachineInstrBuilder MIB2;
1180       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1181         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1182         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1183         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1184         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1185                    .addDef(Reg32)
1186                    .addReg(DstReg, RegState::Kill)
1187                    .addReg(DstReg, DstFlags | RegState::Implicit);
1188       } else {
1189         Register DstReg = MI.getOperand(0).getReg();
1190         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1191                    .add(MI.getOperand(0))
1192                    .addUse(DstReg, RegState::Kill);
1193       }
1194 
1195       if (MO1.isGlobal()) {
1196         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1197         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1198                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1199       } else if (MO1.isSymbol()) {
1200         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1201         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1202                                                         AArch64II::MO_PAGEOFF |
1203                                                         AArch64II::MO_NC);
1204       } else {
1205         assert(MO1.isCPI() &&
1206                "Only expect globals, externalsymbols, or constant pools");
1207         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1208                                   Flags | AArch64II::MO_PAGE);
1209         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1210                                   Flags | AArch64II::MO_PAGEOFF |
1211                                       AArch64II::MO_NC);
1212       }
1213 
1214       transferImpOps(MI, MIB1, MIB2);
1215     }
1216     MI.eraseFromParent();
1217     return true;
1218   }
1219   case AArch64::MOVaddrBA: {
1220     MachineFunction &MF = *MI.getParent()->getParent();
1221     if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1222       // blockaddress expressions have to come from a constant pool because the
1223       // largest addend (and hence offset within a function) allowed for ADRP is
1224       // only 8MB.
1225       const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1226       assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1227 
1228       MachineConstantPool *MCP = MF.getConstantPool();
1229       unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1230 
1231       Register DstReg = MI.getOperand(0).getReg();
1232       auto MIB1 =
1233           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1234               .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1235       auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1236                           TII->get(AArch64::LDRXui), DstReg)
1237                       .addUse(DstReg)
1238                       .addConstantPoolIndex(
1239                           CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1240       transferImpOps(MI, MIB1, MIB2);
1241       MI.eraseFromParent();
1242       return true;
1243     }
1244   }
1245     [[fallthrough]];
1246   case AArch64::MOVaddr:
1247   case AArch64::MOVaddrJT:
1248   case AArch64::MOVaddrCP:
1249   case AArch64::MOVaddrTLS:
1250   case AArch64::MOVaddrEXT: {
1251     // Expand into ADRP + ADD.
1252     Register DstReg = MI.getOperand(0).getReg();
1253     assert(DstReg != AArch64::XZR);
1254     MachineInstrBuilder MIB1 =
1255         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1256             .add(MI.getOperand(1));
1257 
1258     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1259       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1260       // We do so by creating a MOVK that sets bits 48-63 of the register to
1261       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1262       // the small code model so we can assume a binary size of <= 4GB, which
1263       // makes the untagged PC relative offset positive. The binary must also be
1264       // loaded into address range [0, 2^48). Both of these properties need to
1265       // be ensured at runtime when using tagged addresses.
1266       auto Tag = MI.getOperand(1);
1267       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1268       Tag.setOffset(0x100000000);
1269       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1270           .addReg(DstReg)
1271           .add(Tag)
1272           .addImm(48);
1273     }
1274 
1275     MachineInstrBuilder MIB2 =
1276         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1277             .add(MI.getOperand(0))
1278             .addReg(DstReg)
1279             .add(MI.getOperand(2))
1280             .addImm(0);
1281 
1282     transferImpOps(MI, MIB1, MIB2);
1283     MI.eraseFromParent();
1284     return true;
1285   }
1286   case AArch64::ADDlowTLS:
1287     // Produce a plain ADD
1288     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1289         .add(MI.getOperand(0))
1290         .add(MI.getOperand(1))
1291         .add(MI.getOperand(2))
1292         .addImm(0);
1293     MI.eraseFromParent();
1294     return true;
1295 
1296   case AArch64::MOVbaseTLS: {
1297     Register DstReg = MI.getOperand(0).getReg();
1298     auto SysReg = AArch64SysReg::TPIDR_EL0;
1299     MachineFunction *MF = MBB.getParent();
1300     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1301       SysReg = AArch64SysReg::TPIDR_EL3;
1302     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1303       SysReg = AArch64SysReg::TPIDR_EL2;
1304     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1305       SysReg = AArch64SysReg::TPIDR_EL1;
1306     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1307         .addImm(SysReg);
1308     MI.eraseFromParent();
1309     return true;
1310   }
1311 
1312   case AArch64::MOVi32imm:
1313     return expandMOVImm(MBB, MBBI, 32);
1314   case AArch64::MOVi64imm:
1315     return expandMOVImm(MBB, MBBI, 64);
1316   case AArch64::RET_ReallyLR: {
1317     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1318     // function and missing live-ins. We are fine in practice because callee
1319     // saved register handling ensures the register value is restored before
1320     // RET, but we need the undef flag here to appease the MachineVerifier
1321     // liveness checks.
1322     MachineInstrBuilder MIB =
1323         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1324           .addReg(AArch64::LR, RegState::Undef);
1325     transferImpOps(MI, MIB, MIB);
1326     MI.eraseFromParent();
1327     return true;
1328   }
1329   case AArch64::CMP_SWAP_8:
1330     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1331                           AArch64::SUBSWrx,
1332                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1333                           AArch64::WZR, NextMBBI);
1334   case AArch64::CMP_SWAP_16:
1335     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1336                           AArch64::SUBSWrx,
1337                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1338                           AArch64::WZR, NextMBBI);
1339   case AArch64::CMP_SWAP_32:
1340     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1341                           AArch64::SUBSWrs,
1342                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1343                           AArch64::WZR, NextMBBI);
1344   case AArch64::CMP_SWAP_64:
1345     return expandCMP_SWAP(MBB, MBBI,
1346                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1347                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1348                           AArch64::XZR, NextMBBI);
1349   case AArch64::CMP_SWAP_128:
1350   case AArch64::CMP_SWAP_128_RELEASE:
1351   case AArch64::CMP_SWAP_128_ACQUIRE:
1352   case AArch64::CMP_SWAP_128_MONOTONIC:
1353     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1354 
1355   case AArch64::AESMCrrTied:
1356   case AArch64::AESIMCrrTied: {
1357     MachineInstrBuilder MIB =
1358     BuildMI(MBB, MBBI, MI.getDebugLoc(),
1359             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1360                                                       AArch64::AESIMCrr))
1361       .add(MI.getOperand(0))
1362       .add(MI.getOperand(1));
1363     transferImpOps(MI, MIB, MIB);
1364     MI.eraseFromParent();
1365     return true;
1366    }
1367    case AArch64::IRGstack: {
1368      MachineFunction &MF = *MBB.getParent();
1369      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1370      const AArch64FrameLowering *TFI =
1371          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1372 
1373      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1374      // almost always point to SP-after-prologue; if not, emit a longer
1375      // instruction sequence.
1376      int BaseOffset = -AFI->getTaggedBasePointerOffset();
1377      Register FrameReg;
1378      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1379          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1380          /*PreferFP=*/false,
1381          /*ForSimm=*/true);
1382      Register SrcReg = FrameReg;
1383      if (FrameRegOffset) {
1384        // Use output register as temporary.
1385        SrcReg = MI.getOperand(0).getReg();
1386        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1387                        FrameRegOffset, TII);
1388      }
1389      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1390          .add(MI.getOperand(0))
1391          .addUse(SrcReg)
1392          .add(MI.getOperand(2));
1393      MI.eraseFromParent();
1394      return true;
1395    }
1396    case AArch64::TAGPstack: {
1397      int64_t Offset = MI.getOperand(2).getImm();
1398      BuildMI(MBB, MBBI, MI.getDebugLoc(),
1399              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1400          .add(MI.getOperand(0))
1401          .add(MI.getOperand(1))
1402          .addImm(std::abs(Offset))
1403          .add(MI.getOperand(4));
1404      MI.eraseFromParent();
1405      return true;
1406    }
1407    case AArch64::STGloop_wback:
1408    case AArch64::STZGloop_wback:
1409      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1410    case AArch64::STGloop:
1411    case AArch64::STZGloop:
1412      report_fatal_error(
1413          "Non-writeback variants of STGloop / STZGloop should not "
1414          "survive past PrologEpilogInserter.");
1415    case AArch64::STR_ZZZZXI:
1416      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1417    case AArch64::STR_ZZZXI:
1418      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1419    case AArch64::STR_ZZXI:
1420      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1421    case AArch64::LDR_ZZZZXI:
1422      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1423    case AArch64::LDR_ZZZXI:
1424      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1425    case AArch64::LDR_ZZXI:
1426      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1427    case AArch64::BLR_RVMARKER:
1428      return expandCALL_RVMARKER(MBB, MBBI);
1429    case AArch64::BLR_BTI:
1430      return expandCALL_BTI(MBB, MBBI);
1431    case AArch64::StoreSwiftAsyncContext:
1432      return expandStoreSwiftAsyncContext(MBB, MBBI);
1433    case AArch64::RestoreZAPseudo: {
1434      auto *NewMBB = expandRestoreZA(MBB, MBBI);
1435      if (NewMBB != &MBB)
1436        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1437      return true;
1438    }
1439    case AArch64::MSRpstatePseudo: {
1440      auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1441      if (NewMBB != &MBB)
1442        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1443      return true;
1444    }
1445    case AArch64::OBSCURE_COPY: {
1446      if (MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) {
1447        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
1448            .add(MI.getOperand(0))
1449            .addReg(AArch64::XZR)
1450            .add(MI.getOperand(1))
1451            .addImm(0);
1452      }
1453      MI.eraseFromParent();
1454      return true;
1455    }
1456   }
1457   return false;
1458 }
1459 
1460 /// Iterate over the instructions in basic block MBB and expand any
1461 /// pseudo instructions.  Return true if anything was modified.
1462 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1463   bool Modified = false;
1464 
1465   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1466   while (MBBI != E) {
1467     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1468     Modified |= expandMI(MBB, MBBI, NMBBI);
1469     MBBI = NMBBI;
1470   }
1471 
1472   return Modified;
1473 }
1474 
1475 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1476   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1477 
1478   bool Modified = false;
1479   for (auto &MBB : MF)
1480     Modified |= expandMBB(MBB);
1481   return Modified;
1482 }
1483 
1484 /// Returns an instance of the pseudo instruction expansion pass.
1485 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1486   return new AArch64ExpandPseudo();
1487 }
1488