xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineFunctionPass.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineOperand.h"
31 #include "llvm/CodeGen/TargetSubtargetInfo.h"
32 #include "llvm/IR/DebugLoc.h"
33 #include "llvm/MC/MCInstrDesc.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/CodeGen.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Target/TargetMachine.h"
38 #include <cassert>
39 #include <cstdint>
40 #include <iterator>
41 #include <limits>
42 #include <utility>
43 
44 using namespace llvm;
45 
46 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
47 
48 namespace {
49 
50 class AArch64ExpandPseudo : public MachineFunctionPass {
51 public:
52   const AArch64InstrInfo *TII;
53 
54   static char ID;
55 
56   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
57     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
58   }
59 
60   bool runOnMachineFunction(MachineFunction &Fn) override;
61 
62   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
63 
64 private:
65   bool expandMBB(MachineBasicBlock &MBB);
66   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
67                 MachineBasicBlock::iterator &NextMBBI);
68   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
69                     unsigned BitSize);
70 
71   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
72                             MachineBasicBlock::iterator MBBI);
73   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
74                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
75                       unsigned ExtendImm, unsigned ZeroReg,
76                       MachineBasicBlock::iterator &NextMBBI);
77   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
78                           MachineBasicBlock::iterator MBBI,
79                           MachineBasicBlock::iterator &NextMBBI);
80   bool expandSetTagLoop(MachineBasicBlock &MBB,
81                         MachineBasicBlock::iterator MBBI,
82                         MachineBasicBlock::iterator &NextMBBI);
83   bool expandSVESpillFill(MachineBasicBlock &MBB,
84                           MachineBasicBlock::iterator MBBI, unsigned Opc,
85                           unsigned N);
86 };
87 
88 } // end anonymous namespace
89 
90 char AArch64ExpandPseudo::ID = 0;
91 
92 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
93                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
94 
95 /// Transfer implicit operands on the pseudo instruction to the
96 /// instructions created from the expansion.
97 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
98                            MachineInstrBuilder &DefMI) {
99   const MCInstrDesc &Desc = OldMI.getDesc();
100   for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
101        ++i) {
102     const MachineOperand &MO = OldMI.getOperand(i);
103     assert(MO.isReg() && MO.getReg());
104     if (MO.isUse())
105       UseMI.add(MO);
106     else
107       DefMI.add(MO);
108   }
109 }
110 
111 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
112 /// real move-immediate instructions to synthesize the immediate.
113 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
114                                        MachineBasicBlock::iterator MBBI,
115                                        unsigned BitSize) {
116   MachineInstr &MI = *MBBI;
117   Register DstReg = MI.getOperand(0).getReg();
118   uint64_t RenamableState =
119       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
120   uint64_t Imm = MI.getOperand(1).getImm();
121 
122   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
123     // Useless def, and we don't want to risk creating an invalid ORR (which
124     // would really write to sp).
125     MI.eraseFromParent();
126     return true;
127   }
128 
129   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
130   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
131   assert(Insn.size() != 0);
132 
133   SmallVector<MachineInstrBuilder, 4> MIBS;
134   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
135     bool LastItem = std::next(I) == E;
136     switch (I->Opcode)
137     {
138     default: llvm_unreachable("unhandled!"); break;
139 
140     case AArch64::ORRWri:
141     case AArch64::ORRXri:
142       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
143         .add(MI.getOperand(0))
144         .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
145         .addImm(I->Op2));
146       break;
147     case AArch64::MOVNWi:
148     case AArch64::MOVNXi:
149     case AArch64::MOVZWi:
150     case AArch64::MOVZXi: {
151       bool DstIsDead = MI.getOperand(0).isDead();
152       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
153         .addReg(DstReg, RegState::Define |
154                 getDeadRegState(DstIsDead && LastItem) |
155                 RenamableState)
156         .addImm(I->Op1)
157         .addImm(I->Op2));
158       } break;
159     case AArch64::MOVKWi:
160     case AArch64::MOVKXi: {
161       Register DstReg = MI.getOperand(0).getReg();
162       bool DstIsDead = MI.getOperand(0).isDead();
163       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
164         .addReg(DstReg,
165                 RegState::Define |
166                 getDeadRegState(DstIsDead && LastItem) |
167                 RenamableState)
168         .addReg(DstReg)
169         .addImm(I->Op1)
170         .addImm(I->Op2));
171       } break;
172     }
173   }
174   transferImpOps(MI, MIBS.front(), MIBS.back());
175   MI.eraseFromParent();
176   return true;
177 }
178 
179 bool AArch64ExpandPseudo::expandCMP_SWAP(
180     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
181     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
182     MachineBasicBlock::iterator &NextMBBI) {
183   MachineInstr &MI = *MBBI;
184   DebugLoc DL = MI.getDebugLoc();
185   const MachineOperand &Dest = MI.getOperand(0);
186   Register StatusReg = MI.getOperand(1).getReg();
187   bool StatusDead = MI.getOperand(1).isDead();
188   // Duplicating undef operands into 2 instructions does not guarantee the same
189   // value on both; However undef should be replaced by xzr anyway.
190   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
191   Register AddrReg = MI.getOperand(2).getReg();
192   Register DesiredReg = MI.getOperand(3).getReg();
193   Register NewReg = MI.getOperand(4).getReg();
194 
195   MachineFunction *MF = MBB.getParent();
196   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
197   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
198   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
199 
200   MF->insert(++MBB.getIterator(), LoadCmpBB);
201   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
202   MF->insert(++StoreBB->getIterator(), DoneBB);
203 
204   // .Lloadcmp:
205   //     mov wStatus, 0
206   //     ldaxr xDest, [xAddr]
207   //     cmp xDest, xDesired
208   //     b.ne .Ldone
209   if (!StatusDead)
210     BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
211       .addImm(0).addImm(0);
212   BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
213       .addReg(AddrReg);
214   BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
215       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
216       .addReg(DesiredReg)
217       .addImm(ExtendImm);
218   BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
219       .addImm(AArch64CC::NE)
220       .addMBB(DoneBB)
221       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
222   LoadCmpBB->addSuccessor(DoneBB);
223   LoadCmpBB->addSuccessor(StoreBB);
224 
225   // .Lstore:
226   //     stlxr wStatus, xNew, [xAddr]
227   //     cbnz wStatus, .Lloadcmp
228   BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
229       .addReg(NewReg)
230       .addReg(AddrReg);
231   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
232       .addReg(StatusReg, getKillRegState(StatusDead))
233       .addMBB(LoadCmpBB);
234   StoreBB->addSuccessor(LoadCmpBB);
235   StoreBB->addSuccessor(DoneBB);
236 
237   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
238   DoneBB->transferSuccessors(&MBB);
239 
240   MBB.addSuccessor(LoadCmpBB);
241 
242   NextMBBI = MBB.end();
243   MI.eraseFromParent();
244 
245   // Recompute livein lists.
246   LivePhysRegs LiveRegs;
247   computeAndAddLiveIns(LiveRegs, *DoneBB);
248   computeAndAddLiveIns(LiveRegs, *StoreBB);
249   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
250   // Do an extra pass around the loop to get loop carried registers right.
251   StoreBB->clearLiveIns();
252   computeAndAddLiveIns(LiveRegs, *StoreBB);
253   LoadCmpBB->clearLiveIns();
254   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
255 
256   return true;
257 }
258 
259 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
260     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
261     MachineBasicBlock::iterator &NextMBBI) {
262   MachineInstr &MI = *MBBI;
263   DebugLoc DL = MI.getDebugLoc();
264   MachineOperand &DestLo = MI.getOperand(0);
265   MachineOperand &DestHi = MI.getOperand(1);
266   Register StatusReg = MI.getOperand(2).getReg();
267   bool StatusDead = MI.getOperand(2).isDead();
268   // Duplicating undef operands into 2 instructions does not guarantee the same
269   // value on both; However undef should be replaced by xzr anyway.
270   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
271   Register AddrReg = MI.getOperand(3).getReg();
272   Register DesiredLoReg = MI.getOperand(4).getReg();
273   Register DesiredHiReg = MI.getOperand(5).getReg();
274   Register NewLoReg = MI.getOperand(6).getReg();
275   Register NewHiReg = MI.getOperand(7).getReg();
276 
277   MachineFunction *MF = MBB.getParent();
278   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
279   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
280   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
281 
282   MF->insert(++MBB.getIterator(), LoadCmpBB);
283   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
284   MF->insert(++StoreBB->getIterator(), DoneBB);
285 
286   // .Lloadcmp:
287   //     ldaxp xDestLo, xDestHi, [xAddr]
288   //     cmp xDestLo, xDesiredLo
289   //     sbcs xDestHi, xDesiredHi
290   //     b.ne .Ldone
291   BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
292       .addReg(DestLo.getReg(), RegState::Define)
293       .addReg(DestHi.getReg(), RegState::Define)
294       .addReg(AddrReg);
295   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
296       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
297       .addReg(DesiredLoReg)
298       .addImm(0);
299   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
300     .addUse(AArch64::WZR)
301     .addUse(AArch64::WZR)
302     .addImm(AArch64CC::EQ);
303   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
304       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
305       .addReg(DesiredHiReg)
306       .addImm(0);
307   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
308       .addUse(StatusReg, RegState::Kill)
309       .addUse(StatusReg, RegState::Kill)
310       .addImm(AArch64CC::EQ);
311   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
312       .addUse(StatusReg, getKillRegState(StatusDead))
313       .addMBB(DoneBB);
314   LoadCmpBB->addSuccessor(DoneBB);
315   LoadCmpBB->addSuccessor(StoreBB);
316 
317   // .Lstore:
318   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
319   //     cbnz wStatus, .Lloadcmp
320   BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
321       .addReg(NewLoReg)
322       .addReg(NewHiReg)
323       .addReg(AddrReg);
324   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
325       .addReg(StatusReg, getKillRegState(StatusDead))
326       .addMBB(LoadCmpBB);
327   StoreBB->addSuccessor(LoadCmpBB);
328   StoreBB->addSuccessor(DoneBB);
329 
330   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
331   DoneBB->transferSuccessors(&MBB);
332 
333   MBB.addSuccessor(LoadCmpBB);
334 
335   NextMBBI = MBB.end();
336   MI.eraseFromParent();
337 
338   // Recompute liveness bottom up.
339   LivePhysRegs LiveRegs;
340   computeAndAddLiveIns(LiveRegs, *DoneBB);
341   computeAndAddLiveIns(LiveRegs, *StoreBB);
342   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
343   // Do an extra pass in the loop to get the loop carried dependencies right.
344   StoreBB->clearLiveIns();
345   computeAndAddLiveIns(LiveRegs, *StoreBB);
346   LoadCmpBB->clearLiveIns();
347   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
348 
349   return true;
350 }
351 
352 /// \brief Expand Pseudos to Instructions with destructive operands.
353 ///
354 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
355 /// or for fixing relaxed register allocation conditions to comply with
356 /// the instructions register constraints. The latter case may be cheaper
357 /// than setting the register constraints in the register allocator,
358 /// since that will insert regular MOV instructions rather than MOVPRFX.
359 ///
360 /// Example (after register allocation):
361 ///
362 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
363 ///
364 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
365 /// * We cannot map directly to FSUB_ZPmZ_B because the register
366 ///   constraints of the instruction are not met.
367 /// * Also the _ZERO specifies the false lanes need to be zeroed.
368 ///
369 /// We first try to see if the destructive operand == result operand,
370 /// if not, we try to swap the operands, e.g.
371 ///
372 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
373 ///
374 /// But because FSUB_ZPmZ is not commutative, this is semantically
375 /// different, so we need a reverse instruction:
376 ///
377 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
378 ///
379 /// Then we implement the zeroing of the false lanes of Z0 by adding
380 /// a zeroing MOVPRFX instruction:
381 ///
382 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
383 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
384 ///
385 /// Note that this can only be done for _ZERO or _UNDEF variants where
386 /// we can guarantee the false lanes to be zeroed (by implementing this)
387 /// or that they are undef (don't care / not used), otherwise the
388 /// swapping of operands is illegal because the operation is not
389 /// (or cannot be emulated to be) fully commutative.
390 bool AArch64ExpandPseudo::expand_DestructiveOp(
391                             MachineInstr &MI,
392                             MachineBasicBlock &MBB,
393                             MachineBasicBlock::iterator MBBI) {
394   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
395   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
396   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
397   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
398 
399   unsigned DstReg = MI.getOperand(0).getReg();
400   bool DstIsDead = MI.getOperand(0).isDead();
401 
402   if (DType == AArch64::DestructiveBinary)
403     assert(DstReg != MI.getOperand(3).getReg());
404 
405   bool UseRev = false;
406   unsigned PredIdx, DOPIdx, SrcIdx;
407   switch (DType) {
408   case AArch64::DestructiveBinaryComm:
409   case AArch64::DestructiveBinaryCommWithRev:
410     if (DstReg == MI.getOperand(3).getReg()) {
411       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
412       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
413       UseRev = true;
414       break;
415     }
416     LLVM_FALLTHROUGH;
417   case AArch64::DestructiveBinary:
418   case AArch64::DestructiveBinaryImm:
419     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
420    break;
421   default:
422     llvm_unreachable("Unsupported Destructive Operand type");
423   }
424 
425 #ifndef NDEBUG
426   // MOVPRFX can only be used if the destination operand
427   // is the destructive operand, not as any other operand,
428   // so the Destructive Operand must be unique.
429   bool DOPRegIsUnique = false;
430   switch (DType) {
431   case AArch64::DestructiveBinaryComm:
432   case AArch64::DestructiveBinaryCommWithRev:
433     DOPRegIsUnique =
434       DstReg != MI.getOperand(DOPIdx).getReg() ||
435       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
436     break;
437   case AArch64::DestructiveBinaryImm:
438     DOPRegIsUnique = true;
439     break;
440   }
441 #endif
442 
443   // Resolve the reverse opcode
444   if (UseRev) {
445     int NewOpcode;
446     // e.g. DIV -> DIVR
447     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
448       Opcode = NewOpcode;
449     // e.g. DIVR -> DIV
450     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
451       Opcode = NewOpcode;
452   }
453 
454   // Get the right MOVPRFX
455   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
456   unsigned MovPrfx, MovPrfxZero;
457   switch (ElementSize) {
458   case AArch64::ElementSizeNone:
459   case AArch64::ElementSizeB:
460     MovPrfx = AArch64::MOVPRFX_ZZ;
461     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
462     break;
463   case AArch64::ElementSizeH:
464     MovPrfx = AArch64::MOVPRFX_ZZ;
465     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
466     break;
467   case AArch64::ElementSizeS:
468     MovPrfx = AArch64::MOVPRFX_ZZ;
469     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
470     break;
471   case AArch64::ElementSizeD:
472     MovPrfx = AArch64::MOVPRFX_ZZ;
473     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
474     break;
475   default:
476     llvm_unreachable("Unsupported ElementSize");
477   }
478 
479   //
480   // Create the destructive operation (if required)
481   //
482   MachineInstrBuilder PRFX, DOP;
483   if (FalseZero) {
484 #ifndef NDEBUG
485     assert(DOPRegIsUnique && "The destructive operand should be unique");
486 #endif
487     assert(ElementSize != AArch64::ElementSizeNone &&
488            "This instruction is unpredicated");
489 
490     // Merge source operand into destination register
491     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
492                .addReg(DstReg, RegState::Define)
493                .addReg(MI.getOperand(PredIdx).getReg())
494                .addReg(MI.getOperand(DOPIdx).getReg());
495 
496     // After the movprfx, the destructive operand is same as Dst
497     DOPIdx = 0;
498   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
499 #ifndef NDEBUG
500     assert(DOPRegIsUnique && "The destructive operand should be unique");
501 #endif
502     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
503                .addReg(DstReg, RegState::Define)
504                .addReg(MI.getOperand(DOPIdx).getReg());
505     DOPIdx = 0;
506   }
507 
508   //
509   // Create the destructive operation
510   //
511   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
512     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
513 
514   switch (DType) {
515   case AArch64::DestructiveBinaryImm:
516   case AArch64::DestructiveBinaryComm:
517   case AArch64::DestructiveBinaryCommWithRev:
518     DOP.add(MI.getOperand(PredIdx))
519        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
520        .add(MI.getOperand(SrcIdx));
521     break;
522   }
523 
524   if (PRFX) {
525     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
526     transferImpOps(MI, PRFX, DOP);
527   } else
528     transferImpOps(MI, DOP, DOP);
529 
530   MI.eraseFromParent();
531   return true;
532 }
533 
534 bool AArch64ExpandPseudo::expandSetTagLoop(
535     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
536     MachineBasicBlock::iterator &NextMBBI) {
537   MachineInstr &MI = *MBBI;
538   DebugLoc DL = MI.getDebugLoc();
539   Register SizeReg = MI.getOperand(0).getReg();
540   Register AddressReg = MI.getOperand(1).getReg();
541 
542   MachineFunction *MF = MBB.getParent();
543 
544   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
545   const unsigned OpCode1 =
546       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
547   const unsigned OpCode2 =
548       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
549 
550   unsigned Size = MI.getOperand(2).getImm();
551   assert(Size > 0 && Size % 16 == 0);
552   if (Size % (16 * 2) != 0) {
553     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
554         .addReg(AddressReg)
555         .addReg(AddressReg)
556         .addImm(1);
557     Size -= 16;
558   }
559   MachineBasicBlock::iterator I =
560       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
561           .addImm(Size);
562   expandMOVImm(MBB, I, 64);
563 
564   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
565   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
566 
567   MF->insert(++MBB.getIterator(), LoopBB);
568   MF->insert(++LoopBB->getIterator(), DoneBB);
569 
570   BuildMI(LoopBB, DL, TII->get(OpCode2))
571       .addDef(AddressReg)
572       .addReg(AddressReg)
573       .addReg(AddressReg)
574       .addImm(2)
575       .cloneMemRefs(MI)
576       .setMIFlags(MI.getFlags());
577   BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
578       .addDef(SizeReg)
579       .addReg(SizeReg)
580       .addImm(16 * 2)
581       .addImm(0);
582   BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
583 
584   LoopBB->addSuccessor(LoopBB);
585   LoopBB->addSuccessor(DoneBB);
586 
587   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
588   DoneBB->transferSuccessors(&MBB);
589 
590   MBB.addSuccessor(LoopBB);
591 
592   NextMBBI = MBB.end();
593   MI.eraseFromParent();
594   // Recompute liveness bottom up.
595   LivePhysRegs LiveRegs;
596   computeAndAddLiveIns(LiveRegs, *DoneBB);
597   computeAndAddLiveIns(LiveRegs, *LoopBB);
598   // Do an extra pass in the loop to get the loop carried dependencies right.
599   // FIXME: is this necessary?
600   LoopBB->clearLiveIns();
601   computeAndAddLiveIns(LiveRegs, *LoopBB);
602   DoneBB->clearLiveIns();
603   computeAndAddLiveIns(LiveRegs, *DoneBB);
604 
605   return true;
606 }
607 
608 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
609                                              MachineBasicBlock::iterator MBBI,
610                                              unsigned Opc, unsigned N) {
611   const TargetRegisterInfo *TRI =
612       MBB.getParent()->getSubtarget().getRegisterInfo();
613   MachineInstr &MI = *MBBI;
614   for (unsigned Offset = 0; Offset < N; ++Offset) {
615     int ImmOffset = MI.getOperand(2).getImm() + Offset;
616     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
617     assert(ImmOffset >= -256 && ImmOffset < 256 &&
618            "Immediate spill offset out of range");
619     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
620         .addReg(
621             TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
622             Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
623         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
624         .addImm(ImmOffset);
625   }
626   MI.eraseFromParent();
627   return true;
628 }
629 
630 /// If MBBI references a pseudo instruction that should be expanded here,
631 /// do the expansion and return true.  Otherwise return false.
632 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
633                                    MachineBasicBlock::iterator MBBI,
634                                    MachineBasicBlock::iterator &NextMBBI) {
635   MachineInstr &MI = *MBBI;
636   unsigned Opcode = MI.getOpcode();
637 
638   // Check if we can expand the destructive op
639   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
640   if (OrigInstr != -1) {
641     auto &Orig = TII->get(OrigInstr);
642     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
643            != AArch64::NotDestructive) {
644       return expand_DestructiveOp(MI, MBB, MBBI);
645     }
646   }
647 
648   switch (Opcode) {
649   default:
650     break;
651 
652   case AArch64::BSPv8i8:
653   case AArch64::BSPv16i8: {
654     Register DstReg = MI.getOperand(0).getReg();
655     if (DstReg == MI.getOperand(3).getReg()) {
656       // Expand to BIT
657       BuildMI(MBB, MBBI, MI.getDebugLoc(),
658               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
659                                                   : AArch64::BITv16i8))
660           .add(MI.getOperand(0))
661           .add(MI.getOperand(3))
662           .add(MI.getOperand(2))
663           .add(MI.getOperand(1));
664     } else if (DstReg == MI.getOperand(2).getReg()) {
665       // Expand to BIF
666       BuildMI(MBB, MBBI, MI.getDebugLoc(),
667               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
668                                                   : AArch64::BIFv16i8))
669           .add(MI.getOperand(0))
670           .add(MI.getOperand(2))
671           .add(MI.getOperand(3))
672           .add(MI.getOperand(1));
673     } else {
674       // Expand to BSL, use additional move if required
675       if (DstReg == MI.getOperand(1).getReg()) {
676         BuildMI(MBB, MBBI, MI.getDebugLoc(),
677                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
678                                                     : AArch64::BSLv16i8))
679             .add(MI.getOperand(0))
680             .add(MI.getOperand(1))
681             .add(MI.getOperand(2))
682             .add(MI.getOperand(3));
683       } else {
684         BuildMI(MBB, MBBI, MI.getDebugLoc(),
685                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
686                                                     : AArch64::ORRv16i8))
687             .addReg(DstReg,
688                     RegState::Define |
689                         getRenamableRegState(MI.getOperand(0).isRenamable()))
690             .add(MI.getOperand(1))
691             .add(MI.getOperand(1));
692         BuildMI(MBB, MBBI, MI.getDebugLoc(),
693                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
694                                                     : AArch64::BSLv16i8))
695             .add(MI.getOperand(0))
696             .addReg(DstReg,
697                     RegState::Kill |
698                         getRenamableRegState(MI.getOperand(0).isRenamable()))
699             .add(MI.getOperand(2))
700             .add(MI.getOperand(3));
701       }
702     }
703     MI.eraseFromParent();
704     return true;
705   }
706 
707   case AArch64::ADDWrr:
708   case AArch64::SUBWrr:
709   case AArch64::ADDXrr:
710   case AArch64::SUBXrr:
711   case AArch64::ADDSWrr:
712   case AArch64::SUBSWrr:
713   case AArch64::ADDSXrr:
714   case AArch64::SUBSXrr:
715   case AArch64::ANDWrr:
716   case AArch64::ANDXrr:
717   case AArch64::BICWrr:
718   case AArch64::BICXrr:
719   case AArch64::ANDSWrr:
720   case AArch64::ANDSXrr:
721   case AArch64::BICSWrr:
722   case AArch64::BICSXrr:
723   case AArch64::EONWrr:
724   case AArch64::EONXrr:
725   case AArch64::EORWrr:
726   case AArch64::EORXrr:
727   case AArch64::ORNWrr:
728   case AArch64::ORNXrr:
729   case AArch64::ORRWrr:
730   case AArch64::ORRXrr: {
731     unsigned Opcode;
732     switch (MI.getOpcode()) {
733     default:
734       return false;
735     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
736     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
737     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
738     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
739     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
740     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
741     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
742     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
743     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
744     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
745     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
746     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
747     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
748     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
749     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
750     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
751     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
752     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
753     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
754     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
755     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
756     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
757     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
758     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
759     }
760     MachineInstrBuilder MIB1 =
761         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
762                 MI.getOperand(0).getReg())
763             .add(MI.getOperand(1))
764             .add(MI.getOperand(2))
765             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
766     transferImpOps(MI, MIB1, MIB1);
767     MI.eraseFromParent();
768     return true;
769   }
770 
771   case AArch64::LOADgot: {
772     MachineFunction *MF = MBB.getParent();
773     Register DstReg = MI.getOperand(0).getReg();
774     const MachineOperand &MO1 = MI.getOperand(1);
775     unsigned Flags = MO1.getTargetFlags();
776 
777     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
778       // Tiny codemodel expand to LDR
779       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
780                                         TII->get(AArch64::LDRXl), DstReg);
781 
782       if (MO1.isGlobal()) {
783         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
784       } else if (MO1.isSymbol()) {
785         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
786       } else {
787         assert(MO1.isCPI() &&
788                "Only expect globals, externalsymbols, or constant pools");
789         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
790       }
791     } else {
792       // Small codemodel expand into ADRP + LDR.
793       MachineFunction &MF = *MI.getParent()->getParent();
794       DebugLoc DL = MI.getDebugLoc();
795       MachineInstrBuilder MIB1 =
796           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
797 
798       MachineInstrBuilder MIB2;
799       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
800         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
801         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
802         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
803         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
804                    .addDef(Reg32)
805                    .addReg(DstReg, RegState::Kill)
806                    .addReg(DstReg, DstFlags | RegState::Implicit);
807       } else {
808         unsigned DstReg = MI.getOperand(0).getReg();
809         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
810                    .add(MI.getOperand(0))
811                    .addUse(DstReg, RegState::Kill);
812       }
813 
814       if (MO1.isGlobal()) {
815         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
816         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
817                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
818       } else if (MO1.isSymbol()) {
819         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
820         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
821                                                         AArch64II::MO_PAGEOFF |
822                                                         AArch64II::MO_NC);
823       } else {
824         assert(MO1.isCPI() &&
825                "Only expect globals, externalsymbols, or constant pools");
826         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
827                                   Flags | AArch64II::MO_PAGE);
828         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
829                                   Flags | AArch64II::MO_PAGEOFF |
830                                       AArch64II::MO_NC);
831       }
832 
833       transferImpOps(MI, MIB1, MIB2);
834     }
835     MI.eraseFromParent();
836     return true;
837   }
838 
839   case AArch64::MOVaddr:
840   case AArch64::MOVaddrJT:
841   case AArch64::MOVaddrCP:
842   case AArch64::MOVaddrBA:
843   case AArch64::MOVaddrTLS:
844   case AArch64::MOVaddrEXT: {
845     // Expand into ADRP + ADD.
846     Register DstReg = MI.getOperand(0).getReg();
847     MachineInstrBuilder MIB1 =
848         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
849             .add(MI.getOperand(1));
850 
851     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
852       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
853       // We do so by creating a MOVK that sets bits 48-63 of the register to
854       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
855       // the small code model so we can assume a binary size of <= 4GB, which
856       // makes the untagged PC relative offset positive. The binary must also be
857       // loaded into address range [0, 2^48). Both of these properties need to
858       // be ensured at runtime when using tagged addresses.
859       auto Tag = MI.getOperand(1);
860       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
861       Tag.setOffset(0x100000000);
862       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
863           .addReg(DstReg)
864           .add(Tag)
865           .addImm(48);
866     }
867 
868     MachineInstrBuilder MIB2 =
869         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
870             .add(MI.getOperand(0))
871             .addReg(DstReg)
872             .add(MI.getOperand(2))
873             .addImm(0);
874 
875     transferImpOps(MI, MIB1, MIB2);
876     MI.eraseFromParent();
877     return true;
878   }
879   case AArch64::ADDlowTLS:
880     // Produce a plain ADD
881     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
882         .add(MI.getOperand(0))
883         .add(MI.getOperand(1))
884         .add(MI.getOperand(2))
885         .addImm(0);
886     MI.eraseFromParent();
887     return true;
888 
889   case AArch64::MOVbaseTLS: {
890     Register DstReg = MI.getOperand(0).getReg();
891     auto SysReg = AArch64SysReg::TPIDR_EL0;
892     MachineFunction *MF = MBB.getParent();
893     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
894       SysReg = AArch64SysReg::TPIDR_EL3;
895     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
896       SysReg = AArch64SysReg::TPIDR_EL2;
897     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
898       SysReg = AArch64SysReg::TPIDR_EL1;
899     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
900         .addImm(SysReg);
901     MI.eraseFromParent();
902     return true;
903   }
904 
905   case AArch64::MOVi32imm:
906     return expandMOVImm(MBB, MBBI, 32);
907   case AArch64::MOVi64imm:
908     return expandMOVImm(MBB, MBBI, 64);
909   case AArch64::RET_ReallyLR: {
910     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
911     // function and missing live-ins. We are fine in practice because callee
912     // saved register handling ensures the register value is restored before
913     // RET, but we need the undef flag here to appease the MachineVerifier
914     // liveness checks.
915     MachineInstrBuilder MIB =
916         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
917           .addReg(AArch64::LR, RegState::Undef);
918     transferImpOps(MI, MIB, MIB);
919     MI.eraseFromParent();
920     return true;
921   }
922   case AArch64::CMP_SWAP_8:
923     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
924                           AArch64::SUBSWrx,
925                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
926                           AArch64::WZR, NextMBBI);
927   case AArch64::CMP_SWAP_16:
928     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
929                           AArch64::SUBSWrx,
930                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
931                           AArch64::WZR, NextMBBI);
932   case AArch64::CMP_SWAP_32:
933     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
934                           AArch64::SUBSWrs,
935                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
936                           AArch64::WZR, NextMBBI);
937   case AArch64::CMP_SWAP_64:
938     return expandCMP_SWAP(MBB, MBBI,
939                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
940                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
941                           AArch64::XZR, NextMBBI);
942   case AArch64::CMP_SWAP_128:
943     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
944 
945   case AArch64::AESMCrrTied:
946   case AArch64::AESIMCrrTied: {
947     MachineInstrBuilder MIB =
948     BuildMI(MBB, MBBI, MI.getDebugLoc(),
949             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
950                                                       AArch64::AESIMCrr))
951       .add(MI.getOperand(0))
952       .add(MI.getOperand(1));
953     transferImpOps(MI, MIB, MIB);
954     MI.eraseFromParent();
955     return true;
956    }
957    case AArch64::IRGstack: {
958      MachineFunction &MF = *MBB.getParent();
959      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
960      const AArch64FrameLowering *TFI =
961          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
962 
963      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
964      // almost always point to SP-after-prologue; if not, emit a longer
965      // instruction sequence.
966      int BaseOffset = -AFI->getTaggedBasePointerOffset();
967      Register FrameReg;
968      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
969          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
970          /*PreferFP=*/false,
971          /*ForSimm=*/true);
972      Register SrcReg = FrameReg;
973      if (FrameRegOffset) {
974        // Use output register as temporary.
975        SrcReg = MI.getOperand(0).getReg();
976        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
977                        FrameRegOffset, TII);
978      }
979      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
980          .add(MI.getOperand(0))
981          .addUse(SrcReg)
982          .add(MI.getOperand(2));
983      MI.eraseFromParent();
984      return true;
985    }
986    case AArch64::TAGPstack: {
987      int64_t Offset = MI.getOperand(2).getImm();
988      BuildMI(MBB, MBBI, MI.getDebugLoc(),
989              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
990          .add(MI.getOperand(0))
991          .add(MI.getOperand(1))
992          .addImm(std::abs(Offset))
993          .add(MI.getOperand(4));
994      MI.eraseFromParent();
995      return true;
996    }
997    case AArch64::STGloop_wback:
998    case AArch64::STZGloop_wback:
999      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1000    case AArch64::STGloop:
1001    case AArch64::STZGloop:
1002      report_fatal_error(
1003          "Non-writeback variants of STGloop / STZGloop should not "
1004          "survive past PrologEpilogInserter.");
1005    case AArch64::STR_ZZZZXI:
1006      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1007    case AArch64::STR_ZZZXI:
1008      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1009    case AArch64::STR_ZZXI:
1010      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1011    case AArch64::LDR_ZZZZXI:
1012      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1013    case AArch64::LDR_ZZZXI:
1014      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1015    case AArch64::LDR_ZZXI:
1016      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1017   }
1018   return false;
1019 }
1020 
1021 /// Iterate over the instructions in basic block MBB and expand any
1022 /// pseudo instructions.  Return true if anything was modified.
1023 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1024   bool Modified = false;
1025 
1026   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1027   while (MBBI != E) {
1028     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1029     Modified |= expandMI(MBB, MBBI, NMBBI);
1030     MBBI = NMBBI;
1031   }
1032 
1033   return Modified;
1034 }
1035 
1036 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1037   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1038 
1039   bool Modified = false;
1040   for (auto &MBB : MF)
1041     Modified |= expandMBB(MBB);
1042   return Modified;
1043 }
1044 
1045 /// Returns an instance of the pseudo instruction expansion pass.
1046 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1047   return new AArch64ExpandPseudo();
1048 }
1049