xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/CodeGen.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Target/TargetMachine.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <limits>
43 #include <utility>
44 
45 using namespace llvm;
46 
47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
48 
49 namespace {
50 
51 class AArch64ExpandPseudo : public MachineFunctionPass {
52 public:
53   const AArch64InstrInfo *TII;
54 
55   static char ID;
56 
57   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
58     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
59   }
60 
61   bool runOnMachineFunction(MachineFunction &Fn) override;
62 
63   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
64 
65 private:
66   bool expandMBB(MachineBasicBlock &MBB);
67   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
68                 MachineBasicBlock::iterator &NextMBBI);
69   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
70                     unsigned BitSize);
71 
72   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
73                             MachineBasicBlock::iterator MBBI);
74   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
75                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
76                       unsigned ExtendImm, unsigned ZeroReg,
77                       MachineBasicBlock::iterator &NextMBBI);
78   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
79                           MachineBasicBlock::iterator MBBI,
80                           MachineBasicBlock::iterator &NextMBBI);
81   bool expandSetTagLoop(MachineBasicBlock &MBB,
82                         MachineBasicBlock::iterator MBBI,
83                         MachineBasicBlock::iterator &NextMBBI);
84   bool expandSVESpillFill(MachineBasicBlock &MBB,
85                           MachineBasicBlock::iterator MBBI, unsigned Opc,
86                           unsigned N);
87   bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
88                            MachineBasicBlock::iterator MBBI);
89   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
90                                     MachineBasicBlock::iterator MBBI);
91 };
92 
93 } // end anonymous namespace
94 
95 char AArch64ExpandPseudo::ID = 0;
96 
97 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
98                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
99 
100 /// Transfer implicit operands on the pseudo instruction to the
101 /// instructions created from the expansion.
102 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
103                            MachineInstrBuilder &DefMI) {
104   const MCInstrDesc &Desc = OldMI.getDesc();
105   for (const MachineOperand &MO :
106        llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
107     assert(MO.isReg() && MO.getReg());
108     if (MO.isUse())
109       UseMI.add(MO);
110     else
111       DefMI.add(MO);
112   }
113 }
114 
115 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
116 /// real move-immediate instructions to synthesize the immediate.
117 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
118                                        MachineBasicBlock::iterator MBBI,
119                                        unsigned BitSize) {
120   MachineInstr &MI = *MBBI;
121   Register DstReg = MI.getOperand(0).getReg();
122   uint64_t RenamableState =
123       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
124   uint64_t Imm = MI.getOperand(1).getImm();
125 
126   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
127     // Useless def, and we don't want to risk creating an invalid ORR (which
128     // would really write to sp).
129     MI.eraseFromParent();
130     return true;
131   }
132 
133   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
134   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
135   assert(Insn.size() != 0);
136 
137   SmallVector<MachineInstrBuilder, 4> MIBS;
138   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
139     bool LastItem = std::next(I) == E;
140     switch (I->Opcode)
141     {
142     default: llvm_unreachable("unhandled!"); break;
143 
144     case AArch64::ORRWri:
145     case AArch64::ORRXri:
146       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
147         .add(MI.getOperand(0))
148         .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
149         .addImm(I->Op2));
150       break;
151     case AArch64::MOVNWi:
152     case AArch64::MOVNXi:
153     case AArch64::MOVZWi:
154     case AArch64::MOVZXi: {
155       bool DstIsDead = MI.getOperand(0).isDead();
156       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
157         .addReg(DstReg, RegState::Define |
158                 getDeadRegState(DstIsDead && LastItem) |
159                 RenamableState)
160         .addImm(I->Op1)
161         .addImm(I->Op2));
162       } break;
163     case AArch64::MOVKWi:
164     case AArch64::MOVKXi: {
165       Register DstReg = MI.getOperand(0).getReg();
166       bool DstIsDead = MI.getOperand(0).isDead();
167       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
168         .addReg(DstReg,
169                 RegState::Define |
170                 getDeadRegState(DstIsDead && LastItem) |
171                 RenamableState)
172         .addReg(DstReg)
173         .addImm(I->Op1)
174         .addImm(I->Op2));
175       } break;
176     }
177   }
178   transferImpOps(MI, MIBS.front(), MIBS.back());
179   MI.eraseFromParent();
180   return true;
181 }
182 
183 bool AArch64ExpandPseudo::expandCMP_SWAP(
184     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
185     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
186     MachineBasicBlock::iterator &NextMBBI) {
187   MachineInstr &MI = *MBBI;
188   DebugLoc DL = MI.getDebugLoc();
189   const MachineOperand &Dest = MI.getOperand(0);
190   Register StatusReg = MI.getOperand(1).getReg();
191   bool StatusDead = MI.getOperand(1).isDead();
192   // Duplicating undef operands into 2 instructions does not guarantee the same
193   // value on both; However undef should be replaced by xzr anyway.
194   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
195   Register AddrReg = MI.getOperand(2).getReg();
196   Register DesiredReg = MI.getOperand(3).getReg();
197   Register NewReg = MI.getOperand(4).getReg();
198 
199   MachineFunction *MF = MBB.getParent();
200   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
201   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
202   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
203 
204   MF->insert(++MBB.getIterator(), LoadCmpBB);
205   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
206   MF->insert(++StoreBB->getIterator(), DoneBB);
207 
208   // .Lloadcmp:
209   //     mov wStatus, 0
210   //     ldaxr xDest, [xAddr]
211   //     cmp xDest, xDesired
212   //     b.ne .Ldone
213   if (!StatusDead)
214     BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
215       .addImm(0).addImm(0);
216   BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
217       .addReg(AddrReg);
218   BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
219       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
220       .addReg(DesiredReg)
221       .addImm(ExtendImm);
222   BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
223       .addImm(AArch64CC::NE)
224       .addMBB(DoneBB)
225       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
226   LoadCmpBB->addSuccessor(DoneBB);
227   LoadCmpBB->addSuccessor(StoreBB);
228 
229   // .Lstore:
230   //     stlxr wStatus, xNew, [xAddr]
231   //     cbnz wStatus, .Lloadcmp
232   BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
233       .addReg(NewReg)
234       .addReg(AddrReg);
235   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
236       .addReg(StatusReg, getKillRegState(StatusDead))
237       .addMBB(LoadCmpBB);
238   StoreBB->addSuccessor(LoadCmpBB);
239   StoreBB->addSuccessor(DoneBB);
240 
241   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
242   DoneBB->transferSuccessors(&MBB);
243 
244   MBB.addSuccessor(LoadCmpBB);
245 
246   NextMBBI = MBB.end();
247   MI.eraseFromParent();
248 
249   // Recompute livein lists.
250   LivePhysRegs LiveRegs;
251   computeAndAddLiveIns(LiveRegs, *DoneBB);
252   computeAndAddLiveIns(LiveRegs, *StoreBB);
253   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
254   // Do an extra pass around the loop to get loop carried registers right.
255   StoreBB->clearLiveIns();
256   computeAndAddLiveIns(LiveRegs, *StoreBB);
257   LoadCmpBB->clearLiveIns();
258   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
259 
260   return true;
261 }
262 
263 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
264     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
265     MachineBasicBlock::iterator &NextMBBI) {
266   MachineInstr &MI = *MBBI;
267   DebugLoc DL = MI.getDebugLoc();
268   MachineOperand &DestLo = MI.getOperand(0);
269   MachineOperand &DestHi = MI.getOperand(1);
270   Register StatusReg = MI.getOperand(2).getReg();
271   bool StatusDead = MI.getOperand(2).isDead();
272   // Duplicating undef operands into 2 instructions does not guarantee the same
273   // value on both; However undef should be replaced by xzr anyway.
274   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
275   Register AddrReg = MI.getOperand(3).getReg();
276   Register DesiredLoReg = MI.getOperand(4).getReg();
277   Register DesiredHiReg = MI.getOperand(5).getReg();
278   Register NewLoReg = MI.getOperand(6).getReg();
279   Register NewHiReg = MI.getOperand(7).getReg();
280 
281   unsigned LdxpOp, StxpOp;
282 
283   switch (MI.getOpcode()) {
284   case AArch64::CMP_SWAP_128_MONOTONIC:
285     LdxpOp = AArch64::LDXPX;
286     StxpOp = AArch64::STXPX;
287     break;
288   case AArch64::CMP_SWAP_128_RELEASE:
289     LdxpOp = AArch64::LDXPX;
290     StxpOp = AArch64::STLXPX;
291     break;
292   case AArch64::CMP_SWAP_128_ACQUIRE:
293     LdxpOp = AArch64::LDAXPX;
294     StxpOp = AArch64::STXPX;
295     break;
296   case AArch64::CMP_SWAP_128:
297     LdxpOp = AArch64::LDAXPX;
298     StxpOp = AArch64::STLXPX;
299     break;
300   default:
301     llvm_unreachable("Unexpected opcode");
302   }
303 
304   MachineFunction *MF = MBB.getParent();
305   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
306   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
307   auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
308   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
309 
310   MF->insert(++MBB.getIterator(), LoadCmpBB);
311   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
312   MF->insert(++StoreBB->getIterator(), FailBB);
313   MF->insert(++FailBB->getIterator(), DoneBB);
314 
315   // .Lloadcmp:
316   //     ldaxp xDestLo, xDestHi, [xAddr]
317   //     cmp xDestLo, xDesiredLo
318   //     sbcs xDestHi, xDesiredHi
319   //     b.ne .Ldone
320   BuildMI(LoadCmpBB, DL, TII->get(LdxpOp))
321       .addReg(DestLo.getReg(), RegState::Define)
322       .addReg(DestHi.getReg(), RegState::Define)
323       .addReg(AddrReg);
324   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
325       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
326       .addReg(DesiredLoReg)
327       .addImm(0);
328   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
329     .addUse(AArch64::WZR)
330     .addUse(AArch64::WZR)
331     .addImm(AArch64CC::EQ);
332   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
333       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
334       .addReg(DesiredHiReg)
335       .addImm(0);
336   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
337       .addUse(StatusReg, RegState::Kill)
338       .addUse(StatusReg, RegState::Kill)
339       .addImm(AArch64CC::EQ);
340   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
341       .addUse(StatusReg, getKillRegState(StatusDead))
342       .addMBB(FailBB);
343   LoadCmpBB->addSuccessor(FailBB);
344   LoadCmpBB->addSuccessor(StoreBB);
345 
346   // .Lstore:
347   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
348   //     cbnz wStatus, .Lloadcmp
349   BuildMI(StoreBB, DL, TII->get(StxpOp), StatusReg)
350       .addReg(NewLoReg)
351       .addReg(NewHiReg)
352       .addReg(AddrReg);
353   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
354       .addReg(StatusReg, getKillRegState(StatusDead))
355       .addMBB(LoadCmpBB);
356   BuildMI(StoreBB, DL, TII->get(AArch64::B)).addMBB(DoneBB);
357   StoreBB->addSuccessor(LoadCmpBB);
358   StoreBB->addSuccessor(DoneBB);
359 
360   // .Lfail:
361   //     stlxp wStatus, xDestLo, xDestHi, [xAddr]
362   //     cbnz wStatus, .Lloadcmp
363   BuildMI(FailBB, DL, TII->get(StxpOp), StatusReg)
364       .addReg(DestLo.getReg())
365       .addReg(DestHi.getReg())
366       .addReg(AddrReg);
367   BuildMI(FailBB, DL, TII->get(AArch64::CBNZW))
368       .addReg(StatusReg, getKillRegState(StatusDead))
369       .addMBB(LoadCmpBB);
370   FailBB->addSuccessor(LoadCmpBB);
371   FailBB->addSuccessor(DoneBB);
372 
373   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
374   DoneBB->transferSuccessors(&MBB);
375 
376   MBB.addSuccessor(LoadCmpBB);
377 
378   NextMBBI = MBB.end();
379   MI.eraseFromParent();
380 
381   // Recompute liveness bottom up.
382   LivePhysRegs LiveRegs;
383   computeAndAddLiveIns(LiveRegs, *DoneBB);
384   computeAndAddLiveIns(LiveRegs, *FailBB);
385   computeAndAddLiveIns(LiveRegs, *StoreBB);
386   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
387 
388   // Do an extra pass in the loop to get the loop carried dependencies right.
389   FailBB->clearLiveIns();
390   computeAndAddLiveIns(LiveRegs, *FailBB);
391   StoreBB->clearLiveIns();
392   computeAndAddLiveIns(LiveRegs, *StoreBB);
393   LoadCmpBB->clearLiveIns();
394   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
395 
396   return true;
397 }
398 
399 /// \brief Expand Pseudos to Instructions with destructive operands.
400 ///
401 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
402 /// or for fixing relaxed register allocation conditions to comply with
403 /// the instructions register constraints. The latter case may be cheaper
404 /// than setting the register constraints in the register allocator,
405 /// since that will insert regular MOV instructions rather than MOVPRFX.
406 ///
407 /// Example (after register allocation):
408 ///
409 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
410 ///
411 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
412 /// * We cannot map directly to FSUB_ZPmZ_B because the register
413 ///   constraints of the instruction are not met.
414 /// * Also the _ZERO specifies the false lanes need to be zeroed.
415 ///
416 /// We first try to see if the destructive operand == result operand,
417 /// if not, we try to swap the operands, e.g.
418 ///
419 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
420 ///
421 /// But because FSUB_ZPmZ is not commutative, this is semantically
422 /// different, so we need a reverse instruction:
423 ///
424 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
425 ///
426 /// Then we implement the zeroing of the false lanes of Z0 by adding
427 /// a zeroing MOVPRFX instruction:
428 ///
429 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
430 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
431 ///
432 /// Note that this can only be done for _ZERO or _UNDEF variants where
433 /// we can guarantee the false lanes to be zeroed (by implementing this)
434 /// or that they are undef (don't care / not used), otherwise the
435 /// swapping of operands is illegal because the operation is not
436 /// (or cannot be emulated to be) fully commutative.
437 bool AArch64ExpandPseudo::expand_DestructiveOp(
438                             MachineInstr &MI,
439                             MachineBasicBlock &MBB,
440                             MachineBasicBlock::iterator MBBI) {
441   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
442   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
443   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
444   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
445 
446   unsigned DstReg = MI.getOperand(0).getReg();
447   bool DstIsDead = MI.getOperand(0).isDead();
448 
449   if (DType == AArch64::DestructiveBinary)
450     assert(DstReg != MI.getOperand(3).getReg());
451 
452   bool UseRev = false;
453   unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
454   switch (DType) {
455   case AArch64::DestructiveBinaryComm:
456   case AArch64::DestructiveBinaryCommWithRev:
457     if (DstReg == MI.getOperand(3).getReg()) {
458       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
459       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
460       UseRev = true;
461       break;
462     }
463     LLVM_FALLTHROUGH;
464   case AArch64::DestructiveBinary:
465   case AArch64::DestructiveBinaryImm:
466     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
467     break;
468   case AArch64::DestructiveUnaryPassthru:
469     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
470     break;
471   case AArch64::DestructiveTernaryCommWithRev:
472     std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
473     if (DstReg == MI.getOperand(3).getReg()) {
474       // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
475       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
476       UseRev = true;
477     } else if (DstReg == MI.getOperand(4).getReg()) {
478       // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
479       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
480       UseRev = true;
481     }
482     break;
483   default:
484     llvm_unreachable("Unsupported Destructive Operand type");
485   }
486 
487 #ifndef NDEBUG
488   // MOVPRFX can only be used if the destination operand
489   // is the destructive operand, not as any other operand,
490   // so the Destructive Operand must be unique.
491   bool DOPRegIsUnique = false;
492   switch (DType) {
493   case AArch64::DestructiveBinaryComm:
494   case AArch64::DestructiveBinaryCommWithRev:
495     DOPRegIsUnique =
496       DstReg != MI.getOperand(DOPIdx).getReg() ||
497       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
498     break;
499   case AArch64::DestructiveUnaryPassthru:
500   case AArch64::DestructiveBinaryImm:
501     DOPRegIsUnique = true;
502     break;
503   case AArch64::DestructiveTernaryCommWithRev:
504     DOPRegIsUnique =
505         DstReg != MI.getOperand(DOPIdx).getReg() ||
506         (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
507          MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
508     break;
509   }
510 #endif
511 
512   // Resolve the reverse opcode
513   if (UseRev) {
514     int NewOpcode;
515     // e.g. DIV -> DIVR
516     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
517       Opcode = NewOpcode;
518     // e.g. DIVR -> DIV
519     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
520       Opcode = NewOpcode;
521   }
522 
523   // Get the right MOVPRFX
524   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
525   unsigned MovPrfx, MovPrfxZero;
526   switch (ElementSize) {
527   case AArch64::ElementSizeNone:
528   case AArch64::ElementSizeB:
529     MovPrfx = AArch64::MOVPRFX_ZZ;
530     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
531     break;
532   case AArch64::ElementSizeH:
533     MovPrfx = AArch64::MOVPRFX_ZZ;
534     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
535     break;
536   case AArch64::ElementSizeS:
537     MovPrfx = AArch64::MOVPRFX_ZZ;
538     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
539     break;
540   case AArch64::ElementSizeD:
541     MovPrfx = AArch64::MOVPRFX_ZZ;
542     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
543     break;
544   default:
545     llvm_unreachable("Unsupported ElementSize");
546   }
547 
548   //
549   // Create the destructive operation (if required)
550   //
551   MachineInstrBuilder PRFX, DOP;
552   if (FalseZero) {
553 #ifndef NDEBUG
554     assert(DOPRegIsUnique && "The destructive operand should be unique");
555 #endif
556     assert(ElementSize != AArch64::ElementSizeNone &&
557            "This instruction is unpredicated");
558 
559     // Merge source operand into destination register
560     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
561                .addReg(DstReg, RegState::Define)
562                .addReg(MI.getOperand(PredIdx).getReg())
563                .addReg(MI.getOperand(DOPIdx).getReg());
564 
565     // After the movprfx, the destructive operand is same as Dst
566     DOPIdx = 0;
567   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
568 #ifndef NDEBUG
569     assert(DOPRegIsUnique && "The destructive operand should be unique");
570 #endif
571     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
572                .addReg(DstReg, RegState::Define)
573                .addReg(MI.getOperand(DOPIdx).getReg());
574     DOPIdx = 0;
575   }
576 
577   //
578   // Create the destructive operation
579   //
580   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
581     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
582 
583   switch (DType) {
584   case AArch64::DestructiveUnaryPassthru:
585     DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
586         .add(MI.getOperand(PredIdx))
587         .add(MI.getOperand(SrcIdx));
588     break;
589   case AArch64::DestructiveBinaryImm:
590   case AArch64::DestructiveBinaryComm:
591   case AArch64::DestructiveBinaryCommWithRev:
592     DOP.add(MI.getOperand(PredIdx))
593        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
594        .add(MI.getOperand(SrcIdx));
595     break;
596   case AArch64::DestructiveTernaryCommWithRev:
597     DOP.add(MI.getOperand(PredIdx))
598         .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
599         .add(MI.getOperand(SrcIdx))
600         .add(MI.getOperand(Src2Idx));
601     break;
602   }
603 
604   if (PRFX) {
605     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
606     transferImpOps(MI, PRFX, DOP);
607   } else
608     transferImpOps(MI, DOP, DOP);
609 
610   MI.eraseFromParent();
611   return true;
612 }
613 
614 bool AArch64ExpandPseudo::expandSetTagLoop(
615     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
616     MachineBasicBlock::iterator &NextMBBI) {
617   MachineInstr &MI = *MBBI;
618   DebugLoc DL = MI.getDebugLoc();
619   Register SizeReg = MI.getOperand(0).getReg();
620   Register AddressReg = MI.getOperand(1).getReg();
621 
622   MachineFunction *MF = MBB.getParent();
623 
624   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
625   const unsigned OpCode1 =
626       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
627   const unsigned OpCode2 =
628       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
629 
630   unsigned Size = MI.getOperand(2).getImm();
631   assert(Size > 0 && Size % 16 == 0);
632   if (Size % (16 * 2) != 0) {
633     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
634         .addReg(AddressReg)
635         .addReg(AddressReg)
636         .addImm(1);
637     Size -= 16;
638   }
639   MachineBasicBlock::iterator I =
640       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
641           .addImm(Size);
642   expandMOVImm(MBB, I, 64);
643 
644   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
645   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
646 
647   MF->insert(++MBB.getIterator(), LoopBB);
648   MF->insert(++LoopBB->getIterator(), DoneBB);
649 
650   BuildMI(LoopBB, DL, TII->get(OpCode2))
651       .addDef(AddressReg)
652       .addReg(AddressReg)
653       .addReg(AddressReg)
654       .addImm(2)
655       .cloneMemRefs(MI)
656       .setMIFlags(MI.getFlags());
657   BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
658       .addDef(SizeReg)
659       .addReg(SizeReg)
660       .addImm(16 * 2)
661       .addImm(0);
662   BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
663 
664   LoopBB->addSuccessor(LoopBB);
665   LoopBB->addSuccessor(DoneBB);
666 
667   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
668   DoneBB->transferSuccessors(&MBB);
669 
670   MBB.addSuccessor(LoopBB);
671 
672   NextMBBI = MBB.end();
673   MI.eraseFromParent();
674   // Recompute liveness bottom up.
675   LivePhysRegs LiveRegs;
676   computeAndAddLiveIns(LiveRegs, *DoneBB);
677   computeAndAddLiveIns(LiveRegs, *LoopBB);
678   // Do an extra pass in the loop to get the loop carried dependencies right.
679   // FIXME: is this necessary?
680   LoopBB->clearLiveIns();
681   computeAndAddLiveIns(LiveRegs, *LoopBB);
682   DoneBB->clearLiveIns();
683   computeAndAddLiveIns(LiveRegs, *DoneBB);
684 
685   return true;
686 }
687 
688 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
689                                              MachineBasicBlock::iterator MBBI,
690                                              unsigned Opc, unsigned N) {
691   const TargetRegisterInfo *TRI =
692       MBB.getParent()->getSubtarget().getRegisterInfo();
693   MachineInstr &MI = *MBBI;
694   for (unsigned Offset = 0; Offset < N; ++Offset) {
695     int ImmOffset = MI.getOperand(2).getImm() + Offset;
696     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
697     assert(ImmOffset >= -256 && ImmOffset < 256 &&
698            "Immediate spill offset out of range");
699     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
700         .addReg(
701             TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
702             Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
703         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
704         .addImm(ImmOffset);
705   }
706   MI.eraseFromParent();
707   return true;
708 }
709 
710 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
711     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
712   // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
713   // x29` marker. Mark the sequence as bundle, to avoid passes moving other code
714   // in between.
715   MachineInstr &MI = *MBBI;
716 
717   MachineInstr *OriginalCall;
718   MachineOperand &CallTarget = MI.getOperand(0);
719   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
720          "invalid operand for regular call");
721   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
722   OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
723   OriginalCall->addOperand(CallTarget);
724 
725   unsigned RegMaskStartIdx = 1;
726   // Skip register arguments. Those are added during ISel, but are not
727   // needed for the concrete branch.
728   while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
729     auto MOP = MI.getOperand(RegMaskStartIdx);
730     assert(MOP.isReg() && "can only add register operands");
731     OriginalCall->addOperand(MachineOperand::CreateReg(
732         MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
733     RegMaskStartIdx++;
734   }
735   for (const MachineOperand &MO :
736        llvm::drop_begin(MI.operands(), RegMaskStartIdx))
737     OriginalCall->addOperand(MO);
738 
739   auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
740                      .addReg(AArch64::FP, RegState::Define)
741                      .addReg(AArch64::XZR)
742                      .addReg(AArch64::FP)
743                      .addImm(0)
744                      .getInstr();
745   if (MI.shouldUpdateCallSiteInfo())
746     MBB.getParent()->moveCallSiteInfo(&MI, Marker);
747   MI.eraseFromParent();
748   finalizeBundle(MBB, OriginalCall->getIterator(),
749                  std::next(Marker->getIterator()));
750   return true;
751 }
752 
753 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
754     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
755   Register CtxReg = MBBI->getOperand(0).getReg();
756   Register BaseReg = MBBI->getOperand(1).getReg();
757   int Offset = MBBI->getOperand(2).getImm();
758   DebugLoc DL(MBBI->getDebugLoc());
759   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
760 
761   if (STI.getTargetTriple().getArchName() != "arm64e") {
762     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
763         .addUse(CtxReg)
764         .addUse(BaseReg)
765         .addImm(Offset / 8)
766         .setMIFlag(MachineInstr::FrameSetup);
767     MBBI->eraseFromParent();
768     return true;
769   }
770 
771   // We need to sign the context in an address-discriminated way. 0xc31a is a
772   // fixed random value, chosen as part of the ABI.
773   //     add x16, xBase, #Offset
774   //     movk x16, #0xc31a, lsl #48
775   //     mov x17, x22/xzr
776   //     pacdb x17, x16
777   //     str x17, [xBase, #Offset]
778   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
779   BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
780       .addUse(BaseReg)
781       .addImm(abs(Offset))
782       .addImm(0)
783       .setMIFlag(MachineInstr::FrameSetup);
784   BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
785       .addUse(AArch64::X16)
786       .addImm(0xc31a)
787       .addImm(48)
788       .setMIFlag(MachineInstr::FrameSetup);
789   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
790   // move it somewhere before signing.
791   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
792       .addUse(AArch64::XZR)
793       .addUse(CtxReg)
794       .addImm(0)
795       .setMIFlag(MachineInstr::FrameSetup);
796   BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
797       .addUse(AArch64::X17)
798       .addUse(AArch64::X16)
799       .setMIFlag(MachineInstr::FrameSetup);
800   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
801       .addUse(AArch64::X17)
802       .addUse(BaseReg)
803       .addImm(Offset / 8)
804       .setMIFlag(MachineInstr::FrameSetup);
805 
806   MBBI->eraseFromParent();
807   return true;
808 }
809 
810 /// If MBBI references a pseudo instruction that should be expanded here,
811 /// do the expansion and return true.  Otherwise return false.
812 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
813                                    MachineBasicBlock::iterator MBBI,
814                                    MachineBasicBlock::iterator &NextMBBI) {
815   MachineInstr &MI = *MBBI;
816   unsigned Opcode = MI.getOpcode();
817 
818   // Check if we can expand the destructive op
819   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
820   if (OrigInstr != -1) {
821     auto &Orig = TII->get(OrigInstr);
822     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
823            != AArch64::NotDestructive) {
824       return expand_DestructiveOp(MI, MBB, MBBI);
825     }
826   }
827 
828   switch (Opcode) {
829   default:
830     break;
831 
832   case AArch64::BSPv8i8:
833   case AArch64::BSPv16i8: {
834     Register DstReg = MI.getOperand(0).getReg();
835     if (DstReg == MI.getOperand(3).getReg()) {
836       // Expand to BIT
837       BuildMI(MBB, MBBI, MI.getDebugLoc(),
838               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
839                                                   : AArch64::BITv16i8))
840           .add(MI.getOperand(0))
841           .add(MI.getOperand(3))
842           .add(MI.getOperand(2))
843           .add(MI.getOperand(1));
844     } else if (DstReg == MI.getOperand(2).getReg()) {
845       // Expand to BIF
846       BuildMI(MBB, MBBI, MI.getDebugLoc(),
847               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
848                                                   : AArch64::BIFv16i8))
849           .add(MI.getOperand(0))
850           .add(MI.getOperand(2))
851           .add(MI.getOperand(3))
852           .add(MI.getOperand(1));
853     } else {
854       // Expand to BSL, use additional move if required
855       if (DstReg == MI.getOperand(1).getReg()) {
856         BuildMI(MBB, MBBI, MI.getDebugLoc(),
857                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
858                                                     : AArch64::BSLv16i8))
859             .add(MI.getOperand(0))
860             .add(MI.getOperand(1))
861             .add(MI.getOperand(2))
862             .add(MI.getOperand(3));
863       } else {
864         BuildMI(MBB, MBBI, MI.getDebugLoc(),
865                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
866                                                     : AArch64::ORRv16i8))
867             .addReg(DstReg,
868                     RegState::Define |
869                         getRenamableRegState(MI.getOperand(0).isRenamable()))
870             .add(MI.getOperand(1))
871             .add(MI.getOperand(1));
872         BuildMI(MBB, MBBI, MI.getDebugLoc(),
873                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
874                                                     : AArch64::BSLv16i8))
875             .add(MI.getOperand(0))
876             .addReg(DstReg,
877                     RegState::Kill |
878                         getRenamableRegState(MI.getOperand(0).isRenamable()))
879             .add(MI.getOperand(2))
880             .add(MI.getOperand(3));
881       }
882     }
883     MI.eraseFromParent();
884     return true;
885   }
886 
887   case AArch64::ADDWrr:
888   case AArch64::SUBWrr:
889   case AArch64::ADDXrr:
890   case AArch64::SUBXrr:
891   case AArch64::ADDSWrr:
892   case AArch64::SUBSWrr:
893   case AArch64::ADDSXrr:
894   case AArch64::SUBSXrr:
895   case AArch64::ANDWrr:
896   case AArch64::ANDXrr:
897   case AArch64::BICWrr:
898   case AArch64::BICXrr:
899   case AArch64::ANDSWrr:
900   case AArch64::ANDSXrr:
901   case AArch64::BICSWrr:
902   case AArch64::BICSXrr:
903   case AArch64::EONWrr:
904   case AArch64::EONXrr:
905   case AArch64::EORWrr:
906   case AArch64::EORXrr:
907   case AArch64::ORNWrr:
908   case AArch64::ORNXrr:
909   case AArch64::ORRWrr:
910   case AArch64::ORRXrr: {
911     unsigned Opcode;
912     switch (MI.getOpcode()) {
913     default:
914       return false;
915     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
916     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
917     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
918     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
919     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
920     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
921     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
922     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
923     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
924     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
925     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
926     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
927     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
928     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
929     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
930     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
931     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
932     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
933     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
934     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
935     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
936     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
937     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
938     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
939     }
940     MachineFunction &MF = *MBB.getParent();
941     // Try to create new inst without implicit operands added.
942     MachineInstr *NewMI = MF.CreateMachineInstr(
943         TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
944     MBB.insert(MBBI, NewMI);
945     MachineInstrBuilder MIB1(MF, NewMI);
946     MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
947         .add(MI.getOperand(1))
948         .add(MI.getOperand(2))
949         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
950     transferImpOps(MI, MIB1, MIB1);
951     MI.eraseFromParent();
952     return true;
953   }
954 
955   case AArch64::LOADgot: {
956     MachineFunction *MF = MBB.getParent();
957     Register DstReg = MI.getOperand(0).getReg();
958     const MachineOperand &MO1 = MI.getOperand(1);
959     unsigned Flags = MO1.getTargetFlags();
960 
961     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
962       // Tiny codemodel expand to LDR
963       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
964                                         TII->get(AArch64::LDRXl), DstReg);
965 
966       if (MO1.isGlobal()) {
967         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
968       } else if (MO1.isSymbol()) {
969         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
970       } else {
971         assert(MO1.isCPI() &&
972                "Only expect globals, externalsymbols, or constant pools");
973         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
974       }
975     } else {
976       // Small codemodel expand into ADRP + LDR.
977       MachineFunction &MF = *MI.getParent()->getParent();
978       DebugLoc DL = MI.getDebugLoc();
979       MachineInstrBuilder MIB1 =
980           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
981 
982       MachineInstrBuilder MIB2;
983       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
984         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
985         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
986         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
987         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
988                    .addDef(Reg32)
989                    .addReg(DstReg, RegState::Kill)
990                    .addReg(DstReg, DstFlags | RegState::Implicit);
991       } else {
992         unsigned DstReg = MI.getOperand(0).getReg();
993         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
994                    .add(MI.getOperand(0))
995                    .addUse(DstReg, RegState::Kill);
996       }
997 
998       if (MO1.isGlobal()) {
999         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1000         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1001                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1002       } else if (MO1.isSymbol()) {
1003         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1004         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1005                                                         AArch64II::MO_PAGEOFF |
1006                                                         AArch64II::MO_NC);
1007       } else {
1008         assert(MO1.isCPI() &&
1009                "Only expect globals, externalsymbols, or constant pools");
1010         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1011                                   Flags | AArch64II::MO_PAGE);
1012         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1013                                   Flags | AArch64II::MO_PAGEOFF |
1014                                       AArch64II::MO_NC);
1015       }
1016 
1017       transferImpOps(MI, MIB1, MIB2);
1018     }
1019     MI.eraseFromParent();
1020     return true;
1021   }
1022   case AArch64::MOVaddrBA: {
1023     MachineFunction &MF = *MI.getParent()->getParent();
1024     if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1025       // blockaddress expressions have to come from a constant pool because the
1026       // largest addend (and hence offset within a function) allowed for ADRP is
1027       // only 8MB.
1028       const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1029       assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1030 
1031       MachineConstantPool *MCP = MF.getConstantPool();
1032       unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1033 
1034       Register DstReg = MI.getOperand(0).getReg();
1035       auto MIB1 =
1036           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1037               .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1038       auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1039                           TII->get(AArch64::LDRXui), DstReg)
1040                       .addUse(DstReg)
1041                       .addConstantPoolIndex(
1042                           CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1043       transferImpOps(MI, MIB1, MIB2);
1044       MI.eraseFromParent();
1045       return true;
1046     }
1047   }
1048     LLVM_FALLTHROUGH;
1049   case AArch64::MOVaddr:
1050   case AArch64::MOVaddrJT:
1051   case AArch64::MOVaddrCP:
1052   case AArch64::MOVaddrTLS:
1053   case AArch64::MOVaddrEXT: {
1054     // Expand into ADRP + ADD.
1055     Register DstReg = MI.getOperand(0).getReg();
1056     assert(DstReg != AArch64::XZR);
1057     MachineInstrBuilder MIB1 =
1058         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1059             .add(MI.getOperand(1));
1060 
1061     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1062       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1063       // We do so by creating a MOVK that sets bits 48-63 of the register to
1064       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1065       // the small code model so we can assume a binary size of <= 4GB, which
1066       // makes the untagged PC relative offset positive. The binary must also be
1067       // loaded into address range [0, 2^48). Both of these properties need to
1068       // be ensured at runtime when using tagged addresses.
1069       auto Tag = MI.getOperand(1);
1070       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1071       Tag.setOffset(0x100000000);
1072       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1073           .addReg(DstReg)
1074           .add(Tag)
1075           .addImm(48);
1076     }
1077 
1078     MachineInstrBuilder MIB2 =
1079         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1080             .add(MI.getOperand(0))
1081             .addReg(DstReg)
1082             .add(MI.getOperand(2))
1083             .addImm(0);
1084 
1085     transferImpOps(MI, MIB1, MIB2);
1086     MI.eraseFromParent();
1087     return true;
1088   }
1089   case AArch64::ADDlowTLS:
1090     // Produce a plain ADD
1091     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1092         .add(MI.getOperand(0))
1093         .add(MI.getOperand(1))
1094         .add(MI.getOperand(2))
1095         .addImm(0);
1096     MI.eraseFromParent();
1097     return true;
1098 
1099   case AArch64::MOVbaseTLS: {
1100     Register DstReg = MI.getOperand(0).getReg();
1101     auto SysReg = AArch64SysReg::TPIDR_EL0;
1102     MachineFunction *MF = MBB.getParent();
1103     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1104       SysReg = AArch64SysReg::TPIDR_EL3;
1105     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1106       SysReg = AArch64SysReg::TPIDR_EL2;
1107     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1108       SysReg = AArch64SysReg::TPIDR_EL1;
1109     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1110         .addImm(SysReg);
1111     MI.eraseFromParent();
1112     return true;
1113   }
1114 
1115   case AArch64::MOVi32imm:
1116     return expandMOVImm(MBB, MBBI, 32);
1117   case AArch64::MOVi64imm:
1118     return expandMOVImm(MBB, MBBI, 64);
1119   case AArch64::RET_ReallyLR: {
1120     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1121     // function and missing live-ins. We are fine in practice because callee
1122     // saved register handling ensures the register value is restored before
1123     // RET, but we need the undef flag here to appease the MachineVerifier
1124     // liveness checks.
1125     MachineInstrBuilder MIB =
1126         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1127           .addReg(AArch64::LR, RegState::Undef);
1128     transferImpOps(MI, MIB, MIB);
1129     MI.eraseFromParent();
1130     return true;
1131   }
1132   case AArch64::CMP_SWAP_8:
1133     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1134                           AArch64::SUBSWrx,
1135                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1136                           AArch64::WZR, NextMBBI);
1137   case AArch64::CMP_SWAP_16:
1138     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1139                           AArch64::SUBSWrx,
1140                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1141                           AArch64::WZR, NextMBBI);
1142   case AArch64::CMP_SWAP_32:
1143     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1144                           AArch64::SUBSWrs,
1145                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1146                           AArch64::WZR, NextMBBI);
1147   case AArch64::CMP_SWAP_64:
1148     return expandCMP_SWAP(MBB, MBBI,
1149                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1150                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1151                           AArch64::XZR, NextMBBI);
1152   case AArch64::CMP_SWAP_128:
1153   case AArch64::CMP_SWAP_128_RELEASE:
1154   case AArch64::CMP_SWAP_128_ACQUIRE:
1155   case AArch64::CMP_SWAP_128_MONOTONIC:
1156     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1157 
1158   case AArch64::AESMCrrTied:
1159   case AArch64::AESIMCrrTied: {
1160     MachineInstrBuilder MIB =
1161     BuildMI(MBB, MBBI, MI.getDebugLoc(),
1162             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1163                                                       AArch64::AESIMCrr))
1164       .add(MI.getOperand(0))
1165       .add(MI.getOperand(1));
1166     transferImpOps(MI, MIB, MIB);
1167     MI.eraseFromParent();
1168     return true;
1169    }
1170    case AArch64::IRGstack: {
1171      MachineFunction &MF = *MBB.getParent();
1172      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1173      const AArch64FrameLowering *TFI =
1174          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1175 
1176      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1177      // almost always point to SP-after-prologue; if not, emit a longer
1178      // instruction sequence.
1179      int BaseOffset = -AFI->getTaggedBasePointerOffset();
1180      Register FrameReg;
1181      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1182          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1183          /*PreferFP=*/false,
1184          /*ForSimm=*/true);
1185      Register SrcReg = FrameReg;
1186      if (FrameRegOffset) {
1187        // Use output register as temporary.
1188        SrcReg = MI.getOperand(0).getReg();
1189        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1190                        FrameRegOffset, TII);
1191      }
1192      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1193          .add(MI.getOperand(0))
1194          .addUse(SrcReg)
1195          .add(MI.getOperand(2));
1196      MI.eraseFromParent();
1197      return true;
1198    }
1199    case AArch64::TAGPstack: {
1200      int64_t Offset = MI.getOperand(2).getImm();
1201      BuildMI(MBB, MBBI, MI.getDebugLoc(),
1202              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1203          .add(MI.getOperand(0))
1204          .add(MI.getOperand(1))
1205          .addImm(std::abs(Offset))
1206          .add(MI.getOperand(4));
1207      MI.eraseFromParent();
1208      return true;
1209    }
1210    case AArch64::STGloop_wback:
1211    case AArch64::STZGloop_wback:
1212      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1213    case AArch64::STGloop:
1214    case AArch64::STZGloop:
1215      report_fatal_error(
1216          "Non-writeback variants of STGloop / STZGloop should not "
1217          "survive past PrologEpilogInserter.");
1218    case AArch64::STR_ZZZZXI:
1219      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1220    case AArch64::STR_ZZZXI:
1221      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1222    case AArch64::STR_ZZXI:
1223      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1224    case AArch64::LDR_ZZZZXI:
1225      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1226    case AArch64::LDR_ZZZXI:
1227      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1228    case AArch64::LDR_ZZXI:
1229      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1230    case AArch64::BLR_RVMARKER:
1231      return expandCALL_RVMARKER(MBB, MBBI);
1232    case AArch64::StoreSwiftAsyncContext:
1233      return expandStoreSwiftAsyncContext(MBB, MBBI);
1234   }
1235   return false;
1236 }
1237 
1238 /// Iterate over the instructions in basic block MBB and expand any
1239 /// pseudo instructions.  Return true if anything was modified.
1240 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1241   bool Modified = false;
1242 
1243   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1244   while (MBBI != E) {
1245     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1246     Modified |= expandMI(MBB, MBBI, NMBBI);
1247     MBBI = NMBBI;
1248   }
1249 
1250   return Modified;
1251 }
1252 
1253 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1254   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1255 
1256   bool Modified = false;
1257   for (auto &MBB : MF)
1258     Modified |= expandMBB(MBB);
1259   return Modified;
1260 }
1261 
1262 /// Returns an instance of the pseudo instruction expansion pass.
1263 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1264   return new AArch64ExpandPseudo();
1265 }
1266