xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (revision 963f5dc7a30624e95d72fb7f87b8892651164e46)
1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/CodeGen.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Target/TargetMachine.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <limits>
43 #include <utility>
44 
45 using namespace llvm;
46 
47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
48 
49 namespace {
50 
51 class AArch64ExpandPseudo : public MachineFunctionPass {
52 public:
53   const AArch64InstrInfo *TII;
54 
55   static char ID;
56 
57   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
58     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
59   }
60 
61   bool runOnMachineFunction(MachineFunction &Fn) override;
62 
63   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
64 
65 private:
66   bool expandMBB(MachineBasicBlock &MBB);
67   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
68                 MachineBasicBlock::iterator &NextMBBI);
69   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
70                     unsigned BitSize);
71 
72   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
73                             MachineBasicBlock::iterator MBBI);
74   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
75                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
76                       unsigned ExtendImm, unsigned ZeroReg,
77                       MachineBasicBlock::iterator &NextMBBI);
78   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
79                           MachineBasicBlock::iterator MBBI,
80                           MachineBasicBlock::iterator &NextMBBI);
81   bool expandSetTagLoop(MachineBasicBlock &MBB,
82                         MachineBasicBlock::iterator MBBI,
83                         MachineBasicBlock::iterator &NextMBBI);
84   bool expandSVESpillFill(MachineBasicBlock &MBB,
85                           MachineBasicBlock::iterator MBBI, unsigned Opc,
86                           unsigned N);
87   bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
88                            MachineBasicBlock::iterator MBBI);
89   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
90                                     MachineBasicBlock::iterator MBBI);
91 };
92 
93 } // end anonymous namespace
94 
95 char AArch64ExpandPseudo::ID = 0;
96 
97 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
98                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
99 
100 /// Transfer implicit operands on the pseudo instruction to the
101 /// instructions created from the expansion.
102 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
103                            MachineInstrBuilder &DefMI) {
104   const MCInstrDesc &Desc = OldMI.getDesc();
105   for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); i != e;
106        ++i) {
107     const MachineOperand &MO = OldMI.getOperand(i);
108     assert(MO.isReg() && MO.getReg());
109     if (MO.isUse())
110       UseMI.add(MO);
111     else
112       DefMI.add(MO);
113   }
114 }
115 
116 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
117 /// real move-immediate instructions to synthesize the immediate.
118 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
119                                        MachineBasicBlock::iterator MBBI,
120                                        unsigned BitSize) {
121   MachineInstr &MI = *MBBI;
122   Register DstReg = MI.getOperand(0).getReg();
123   uint64_t RenamableState =
124       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
125   uint64_t Imm = MI.getOperand(1).getImm();
126 
127   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
128     // Useless def, and we don't want to risk creating an invalid ORR (which
129     // would really write to sp).
130     MI.eraseFromParent();
131     return true;
132   }
133 
134   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
135   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
136   assert(Insn.size() != 0);
137 
138   SmallVector<MachineInstrBuilder, 4> MIBS;
139   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
140     bool LastItem = std::next(I) == E;
141     switch (I->Opcode)
142     {
143     default: llvm_unreachable("unhandled!"); break;
144 
145     case AArch64::ORRWri:
146     case AArch64::ORRXri:
147       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
148         .add(MI.getOperand(0))
149         .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
150         .addImm(I->Op2));
151       break;
152     case AArch64::MOVNWi:
153     case AArch64::MOVNXi:
154     case AArch64::MOVZWi:
155     case AArch64::MOVZXi: {
156       bool DstIsDead = MI.getOperand(0).isDead();
157       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
158         .addReg(DstReg, RegState::Define |
159                 getDeadRegState(DstIsDead && LastItem) |
160                 RenamableState)
161         .addImm(I->Op1)
162         .addImm(I->Op2));
163       } break;
164     case AArch64::MOVKWi:
165     case AArch64::MOVKXi: {
166       Register DstReg = MI.getOperand(0).getReg();
167       bool DstIsDead = MI.getOperand(0).isDead();
168       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
169         .addReg(DstReg,
170                 RegState::Define |
171                 getDeadRegState(DstIsDead && LastItem) |
172                 RenamableState)
173         .addReg(DstReg)
174         .addImm(I->Op1)
175         .addImm(I->Op2));
176       } break;
177     }
178   }
179   transferImpOps(MI, MIBS.front(), MIBS.back());
180   MI.eraseFromParent();
181   return true;
182 }
183 
184 bool AArch64ExpandPseudo::expandCMP_SWAP(
185     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
186     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
187     MachineBasicBlock::iterator &NextMBBI) {
188   MachineInstr &MI = *MBBI;
189   DebugLoc DL = MI.getDebugLoc();
190   const MachineOperand &Dest = MI.getOperand(0);
191   Register StatusReg = MI.getOperand(1).getReg();
192   bool StatusDead = MI.getOperand(1).isDead();
193   // Duplicating undef operands into 2 instructions does not guarantee the same
194   // value on both; However undef should be replaced by xzr anyway.
195   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
196   Register AddrReg = MI.getOperand(2).getReg();
197   Register DesiredReg = MI.getOperand(3).getReg();
198   Register NewReg = MI.getOperand(4).getReg();
199 
200   MachineFunction *MF = MBB.getParent();
201   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
202   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
203   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
204 
205   MF->insert(++MBB.getIterator(), LoadCmpBB);
206   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
207   MF->insert(++StoreBB->getIterator(), DoneBB);
208 
209   // .Lloadcmp:
210   //     mov wStatus, 0
211   //     ldaxr xDest, [xAddr]
212   //     cmp xDest, xDesired
213   //     b.ne .Ldone
214   if (!StatusDead)
215     BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
216       .addImm(0).addImm(0);
217   BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
218       .addReg(AddrReg);
219   BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
220       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
221       .addReg(DesiredReg)
222       .addImm(ExtendImm);
223   BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
224       .addImm(AArch64CC::NE)
225       .addMBB(DoneBB)
226       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
227   LoadCmpBB->addSuccessor(DoneBB);
228   LoadCmpBB->addSuccessor(StoreBB);
229 
230   // .Lstore:
231   //     stlxr wStatus, xNew, [xAddr]
232   //     cbnz wStatus, .Lloadcmp
233   BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
234       .addReg(NewReg)
235       .addReg(AddrReg);
236   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
237       .addReg(StatusReg, getKillRegState(StatusDead))
238       .addMBB(LoadCmpBB);
239   StoreBB->addSuccessor(LoadCmpBB);
240   StoreBB->addSuccessor(DoneBB);
241 
242   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
243   DoneBB->transferSuccessors(&MBB);
244 
245   MBB.addSuccessor(LoadCmpBB);
246 
247   NextMBBI = MBB.end();
248   MI.eraseFromParent();
249 
250   // Recompute livein lists.
251   LivePhysRegs LiveRegs;
252   computeAndAddLiveIns(LiveRegs, *DoneBB);
253   computeAndAddLiveIns(LiveRegs, *StoreBB);
254   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
255   // Do an extra pass around the loop to get loop carried registers right.
256   StoreBB->clearLiveIns();
257   computeAndAddLiveIns(LiveRegs, *StoreBB);
258   LoadCmpBB->clearLiveIns();
259   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
260 
261   return true;
262 }
263 
264 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
265     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
266     MachineBasicBlock::iterator &NextMBBI) {
267   MachineInstr &MI = *MBBI;
268   DebugLoc DL = MI.getDebugLoc();
269   MachineOperand &DestLo = MI.getOperand(0);
270   MachineOperand &DestHi = MI.getOperand(1);
271   Register StatusReg = MI.getOperand(2).getReg();
272   bool StatusDead = MI.getOperand(2).isDead();
273   // Duplicating undef operands into 2 instructions does not guarantee the same
274   // value on both; However undef should be replaced by xzr anyway.
275   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
276   Register AddrReg = MI.getOperand(3).getReg();
277   Register DesiredLoReg = MI.getOperand(4).getReg();
278   Register DesiredHiReg = MI.getOperand(5).getReg();
279   Register NewLoReg = MI.getOperand(6).getReg();
280   Register NewHiReg = MI.getOperand(7).getReg();
281 
282   unsigned LdxpOp, StxpOp;
283 
284   switch (MI.getOpcode()) {
285   case AArch64::CMP_SWAP_128_MONOTONIC:
286     LdxpOp = AArch64::LDXPX;
287     StxpOp = AArch64::STXPX;
288     break;
289   case AArch64::CMP_SWAP_128_RELEASE:
290     LdxpOp = AArch64::LDXPX;
291     StxpOp = AArch64::STLXPX;
292     break;
293   case AArch64::CMP_SWAP_128_ACQUIRE:
294     LdxpOp = AArch64::LDAXPX;
295     StxpOp = AArch64::STXPX;
296     break;
297   case AArch64::CMP_SWAP_128:
298     LdxpOp = AArch64::LDAXPX;
299     StxpOp = AArch64::STLXPX;
300     break;
301   default:
302     llvm_unreachable("Unexpected opcode");
303   }
304 
305   MachineFunction *MF = MBB.getParent();
306   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
307   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
308   auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
309   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
310 
311   MF->insert(++MBB.getIterator(), LoadCmpBB);
312   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
313   MF->insert(++StoreBB->getIterator(), FailBB);
314   MF->insert(++FailBB->getIterator(), DoneBB);
315 
316   // .Lloadcmp:
317   //     ldaxp xDestLo, xDestHi, [xAddr]
318   //     cmp xDestLo, xDesiredLo
319   //     sbcs xDestHi, xDesiredHi
320   //     b.ne .Ldone
321   BuildMI(LoadCmpBB, DL, TII->get(LdxpOp))
322       .addReg(DestLo.getReg(), RegState::Define)
323       .addReg(DestHi.getReg(), RegState::Define)
324       .addReg(AddrReg);
325   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
326       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
327       .addReg(DesiredLoReg)
328       .addImm(0);
329   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
330     .addUse(AArch64::WZR)
331     .addUse(AArch64::WZR)
332     .addImm(AArch64CC::EQ);
333   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
334       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
335       .addReg(DesiredHiReg)
336       .addImm(0);
337   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
338       .addUse(StatusReg, RegState::Kill)
339       .addUse(StatusReg, RegState::Kill)
340       .addImm(AArch64CC::EQ);
341   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
342       .addUse(StatusReg, getKillRegState(StatusDead))
343       .addMBB(FailBB);
344   LoadCmpBB->addSuccessor(FailBB);
345   LoadCmpBB->addSuccessor(StoreBB);
346 
347   // .Lstore:
348   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
349   //     cbnz wStatus, .Lloadcmp
350   BuildMI(StoreBB, DL, TII->get(StxpOp), StatusReg)
351       .addReg(NewLoReg)
352       .addReg(NewHiReg)
353       .addReg(AddrReg);
354   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
355       .addReg(StatusReg, getKillRegState(StatusDead))
356       .addMBB(LoadCmpBB);
357   BuildMI(StoreBB, DL, TII->get(AArch64::B)).addMBB(DoneBB);
358   StoreBB->addSuccessor(LoadCmpBB);
359   StoreBB->addSuccessor(DoneBB);
360 
361   // .Lfail:
362   //     stlxp wStatus, xDestLo, xDestHi, [xAddr]
363   //     cbnz wStatus, .Lloadcmp
364   BuildMI(FailBB, DL, TII->get(StxpOp), StatusReg)
365       .addReg(DestLo.getReg())
366       .addReg(DestHi.getReg())
367       .addReg(AddrReg);
368   BuildMI(FailBB, DL, TII->get(AArch64::CBNZW))
369       .addReg(StatusReg, getKillRegState(StatusDead))
370       .addMBB(LoadCmpBB);
371   FailBB->addSuccessor(LoadCmpBB);
372   FailBB->addSuccessor(DoneBB);
373 
374   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
375   DoneBB->transferSuccessors(&MBB);
376 
377   MBB.addSuccessor(LoadCmpBB);
378 
379   NextMBBI = MBB.end();
380   MI.eraseFromParent();
381 
382   // Recompute liveness bottom up.
383   LivePhysRegs LiveRegs;
384   computeAndAddLiveIns(LiveRegs, *DoneBB);
385   computeAndAddLiveIns(LiveRegs, *FailBB);
386   computeAndAddLiveIns(LiveRegs, *StoreBB);
387   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
388 
389   // Do an extra pass in the loop to get the loop carried dependencies right.
390   FailBB->clearLiveIns();
391   computeAndAddLiveIns(LiveRegs, *FailBB);
392   StoreBB->clearLiveIns();
393   computeAndAddLiveIns(LiveRegs, *StoreBB);
394   LoadCmpBB->clearLiveIns();
395   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
396 
397   return true;
398 }
399 
400 /// \brief Expand Pseudos to Instructions with destructive operands.
401 ///
402 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
403 /// or for fixing relaxed register allocation conditions to comply with
404 /// the instructions register constraints. The latter case may be cheaper
405 /// than setting the register constraints in the register allocator,
406 /// since that will insert regular MOV instructions rather than MOVPRFX.
407 ///
408 /// Example (after register allocation):
409 ///
410 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
411 ///
412 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
413 /// * We cannot map directly to FSUB_ZPmZ_B because the register
414 ///   constraints of the instruction are not met.
415 /// * Also the _ZERO specifies the false lanes need to be zeroed.
416 ///
417 /// We first try to see if the destructive operand == result operand,
418 /// if not, we try to swap the operands, e.g.
419 ///
420 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
421 ///
422 /// But because FSUB_ZPmZ is not commutative, this is semantically
423 /// different, so we need a reverse instruction:
424 ///
425 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
426 ///
427 /// Then we implement the zeroing of the false lanes of Z0 by adding
428 /// a zeroing MOVPRFX instruction:
429 ///
430 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
431 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
432 ///
433 /// Note that this can only be done for _ZERO or _UNDEF variants where
434 /// we can guarantee the false lanes to be zeroed (by implementing this)
435 /// or that they are undef (don't care / not used), otherwise the
436 /// swapping of operands is illegal because the operation is not
437 /// (or cannot be emulated to be) fully commutative.
438 bool AArch64ExpandPseudo::expand_DestructiveOp(
439                             MachineInstr &MI,
440                             MachineBasicBlock &MBB,
441                             MachineBasicBlock::iterator MBBI) {
442   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
443   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
444   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
445   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
446 
447   unsigned DstReg = MI.getOperand(0).getReg();
448   bool DstIsDead = MI.getOperand(0).isDead();
449 
450   if (DType == AArch64::DestructiveBinary)
451     assert(DstReg != MI.getOperand(3).getReg());
452 
453   bool UseRev = false;
454   unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
455   switch (DType) {
456   case AArch64::DestructiveBinaryComm:
457   case AArch64::DestructiveBinaryCommWithRev:
458     if (DstReg == MI.getOperand(3).getReg()) {
459       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
460       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
461       UseRev = true;
462       break;
463     }
464     LLVM_FALLTHROUGH;
465   case AArch64::DestructiveBinary:
466   case AArch64::DestructiveBinaryImm:
467     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
468     break;
469   case AArch64::DestructiveUnaryPassthru:
470     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
471     break;
472   case AArch64::DestructiveTernaryCommWithRev:
473     std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
474     if (DstReg == MI.getOperand(3).getReg()) {
475       // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
476       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
477       UseRev = true;
478     } else if (DstReg == MI.getOperand(4).getReg()) {
479       // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
480       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
481       UseRev = true;
482     }
483     break;
484   default:
485     llvm_unreachable("Unsupported Destructive Operand type");
486   }
487 
488 #ifndef NDEBUG
489   // MOVPRFX can only be used if the destination operand
490   // is the destructive operand, not as any other operand,
491   // so the Destructive Operand must be unique.
492   bool DOPRegIsUnique = false;
493   switch (DType) {
494   case AArch64::DestructiveBinaryComm:
495   case AArch64::DestructiveBinaryCommWithRev:
496     DOPRegIsUnique =
497       DstReg != MI.getOperand(DOPIdx).getReg() ||
498       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
499     break;
500   case AArch64::DestructiveUnaryPassthru:
501   case AArch64::DestructiveBinaryImm:
502     DOPRegIsUnique = true;
503     break;
504   case AArch64::DestructiveTernaryCommWithRev:
505     DOPRegIsUnique =
506         DstReg != MI.getOperand(DOPIdx).getReg() ||
507         (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
508          MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
509     break;
510   }
511 #endif
512 
513   // Resolve the reverse opcode
514   if (UseRev) {
515     int NewOpcode;
516     // e.g. DIV -> DIVR
517     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
518       Opcode = NewOpcode;
519     // e.g. DIVR -> DIV
520     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
521       Opcode = NewOpcode;
522   }
523 
524   // Get the right MOVPRFX
525   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
526   unsigned MovPrfx, MovPrfxZero;
527   switch (ElementSize) {
528   case AArch64::ElementSizeNone:
529   case AArch64::ElementSizeB:
530     MovPrfx = AArch64::MOVPRFX_ZZ;
531     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
532     break;
533   case AArch64::ElementSizeH:
534     MovPrfx = AArch64::MOVPRFX_ZZ;
535     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
536     break;
537   case AArch64::ElementSizeS:
538     MovPrfx = AArch64::MOVPRFX_ZZ;
539     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
540     break;
541   case AArch64::ElementSizeD:
542     MovPrfx = AArch64::MOVPRFX_ZZ;
543     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
544     break;
545   default:
546     llvm_unreachable("Unsupported ElementSize");
547   }
548 
549   //
550   // Create the destructive operation (if required)
551   //
552   MachineInstrBuilder PRFX, DOP;
553   if (FalseZero) {
554 #ifndef NDEBUG
555     assert(DOPRegIsUnique && "The destructive operand should be unique");
556 #endif
557     assert(ElementSize != AArch64::ElementSizeNone &&
558            "This instruction is unpredicated");
559 
560     // Merge source operand into destination register
561     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
562                .addReg(DstReg, RegState::Define)
563                .addReg(MI.getOperand(PredIdx).getReg())
564                .addReg(MI.getOperand(DOPIdx).getReg());
565 
566     // After the movprfx, the destructive operand is same as Dst
567     DOPIdx = 0;
568   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
569 #ifndef NDEBUG
570     assert(DOPRegIsUnique && "The destructive operand should be unique");
571 #endif
572     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
573                .addReg(DstReg, RegState::Define)
574                .addReg(MI.getOperand(DOPIdx).getReg());
575     DOPIdx = 0;
576   }
577 
578   //
579   // Create the destructive operation
580   //
581   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
582     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
583 
584   switch (DType) {
585   case AArch64::DestructiveUnaryPassthru:
586     DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
587         .add(MI.getOperand(PredIdx))
588         .add(MI.getOperand(SrcIdx));
589     break;
590   case AArch64::DestructiveBinaryImm:
591   case AArch64::DestructiveBinaryComm:
592   case AArch64::DestructiveBinaryCommWithRev:
593     DOP.add(MI.getOperand(PredIdx))
594        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
595        .add(MI.getOperand(SrcIdx));
596     break;
597   case AArch64::DestructiveTernaryCommWithRev:
598     DOP.add(MI.getOperand(PredIdx))
599         .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
600         .add(MI.getOperand(SrcIdx))
601         .add(MI.getOperand(Src2Idx));
602     break;
603   }
604 
605   if (PRFX) {
606     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
607     transferImpOps(MI, PRFX, DOP);
608   } else
609     transferImpOps(MI, DOP, DOP);
610 
611   MI.eraseFromParent();
612   return true;
613 }
614 
615 bool AArch64ExpandPseudo::expandSetTagLoop(
616     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
617     MachineBasicBlock::iterator &NextMBBI) {
618   MachineInstr &MI = *MBBI;
619   DebugLoc DL = MI.getDebugLoc();
620   Register SizeReg = MI.getOperand(0).getReg();
621   Register AddressReg = MI.getOperand(1).getReg();
622 
623   MachineFunction *MF = MBB.getParent();
624 
625   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
626   const unsigned OpCode1 =
627       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
628   const unsigned OpCode2 =
629       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
630 
631   unsigned Size = MI.getOperand(2).getImm();
632   assert(Size > 0 && Size % 16 == 0);
633   if (Size % (16 * 2) != 0) {
634     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
635         .addReg(AddressReg)
636         .addReg(AddressReg)
637         .addImm(1);
638     Size -= 16;
639   }
640   MachineBasicBlock::iterator I =
641       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
642           .addImm(Size);
643   expandMOVImm(MBB, I, 64);
644 
645   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
646   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
647 
648   MF->insert(++MBB.getIterator(), LoopBB);
649   MF->insert(++LoopBB->getIterator(), DoneBB);
650 
651   BuildMI(LoopBB, DL, TII->get(OpCode2))
652       .addDef(AddressReg)
653       .addReg(AddressReg)
654       .addReg(AddressReg)
655       .addImm(2)
656       .cloneMemRefs(MI)
657       .setMIFlags(MI.getFlags());
658   BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
659       .addDef(SizeReg)
660       .addReg(SizeReg)
661       .addImm(16 * 2)
662       .addImm(0);
663   BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
664 
665   LoopBB->addSuccessor(LoopBB);
666   LoopBB->addSuccessor(DoneBB);
667 
668   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
669   DoneBB->transferSuccessors(&MBB);
670 
671   MBB.addSuccessor(LoopBB);
672 
673   NextMBBI = MBB.end();
674   MI.eraseFromParent();
675   // Recompute liveness bottom up.
676   LivePhysRegs LiveRegs;
677   computeAndAddLiveIns(LiveRegs, *DoneBB);
678   computeAndAddLiveIns(LiveRegs, *LoopBB);
679   // Do an extra pass in the loop to get the loop carried dependencies right.
680   // FIXME: is this necessary?
681   LoopBB->clearLiveIns();
682   computeAndAddLiveIns(LiveRegs, *LoopBB);
683   DoneBB->clearLiveIns();
684   computeAndAddLiveIns(LiveRegs, *DoneBB);
685 
686   return true;
687 }
688 
689 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
690                                              MachineBasicBlock::iterator MBBI,
691                                              unsigned Opc, unsigned N) {
692   const TargetRegisterInfo *TRI =
693       MBB.getParent()->getSubtarget().getRegisterInfo();
694   MachineInstr &MI = *MBBI;
695   for (unsigned Offset = 0; Offset < N; ++Offset) {
696     int ImmOffset = MI.getOperand(2).getImm() + Offset;
697     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
698     assert(ImmOffset >= -256 && ImmOffset < 256 &&
699            "Immediate spill offset out of range");
700     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
701         .addReg(
702             TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
703             Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
704         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
705         .addImm(ImmOffset);
706   }
707   MI.eraseFromParent();
708   return true;
709 }
710 
711 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
712     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
713   // Expand CALL_RVMARKER pseudo to a branch, followed by the special `mov x29,
714   // x29` marker. Mark the sequence as bundle, to avoid passes moving other code
715   // in between.
716   MachineInstr &MI = *MBBI;
717 
718   MachineInstr *OriginalCall;
719   MachineOperand &CallTarget = MI.getOperand(0);
720   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
721          "invalid operand for regular call");
722   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
723   OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
724   OriginalCall->addOperand(CallTarget);
725 
726   unsigned RegMaskStartIdx = 1;
727   // Skip register arguments. Those are added during ISel, but are not
728   // needed for the concrete branch.
729   while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
730     auto MOP = MI.getOperand(RegMaskStartIdx);
731     assert(MOP.isReg() && "can only add register operands");
732     OriginalCall->addOperand(MachineOperand::CreateReg(
733         MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
734     RegMaskStartIdx++;
735   }
736   for (; RegMaskStartIdx < MI.getNumOperands(); ++RegMaskStartIdx)
737     OriginalCall->addOperand(MI.getOperand(RegMaskStartIdx));
738 
739   auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
740                      .addReg(AArch64::FP, RegState::Define)
741                      .addReg(AArch64::XZR)
742                      .addReg(AArch64::FP)
743                      .addImm(0)
744                      .getInstr();
745   if (MI.shouldUpdateCallSiteInfo())
746     MBB.getParent()->moveCallSiteInfo(&MI, Marker);
747   MI.eraseFromParent();
748   finalizeBundle(MBB, OriginalCall->getIterator(),
749                  std::next(Marker->getIterator()));
750   return true;
751 }
752 
753 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
754     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
755   Register CtxReg = MBBI->getOperand(0).getReg();
756   Register BaseReg = MBBI->getOperand(1).getReg();
757   int Offset = MBBI->getOperand(2).getImm();
758   DebugLoc DL(MBBI->getDebugLoc());
759   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
760 
761   if (STI.getTargetTriple().getArchName() != "arm64e") {
762     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
763         .addUse(CtxReg)
764         .addUse(BaseReg)
765         .addImm(Offset / 8)
766         .setMIFlag(MachineInstr::FrameSetup);
767     MBBI->eraseFromParent();
768     return true;
769   }
770 
771   // We need to sign the context in an address-discriminated way. 0xc31a is a
772   // fixed random value, chosen as part of the ABI.
773   //     add x16, xBase, #Offset
774   //     movk x16, #0xc31a, lsl #48
775   //     mov x17, x22/xzr
776   //     pacdb x17, x16
777   //     str x17, [xBase, #Offset]
778   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
779   BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
780       .addUse(BaseReg)
781       .addImm(abs(Offset))
782       .addImm(0)
783       .setMIFlag(MachineInstr::FrameSetup);
784   BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
785       .addUse(AArch64::X16)
786       .addImm(0xc31a)
787       .addImm(48)
788       .setMIFlag(MachineInstr::FrameSetup);
789   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
790   // move it somewhere before signing.
791   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
792       .addUse(AArch64::XZR)
793       .addUse(CtxReg)
794       .addImm(0)
795       .setMIFlag(MachineInstr::FrameSetup);
796   BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
797       .addUse(AArch64::X17)
798       .addUse(AArch64::X16)
799       .setMIFlag(MachineInstr::FrameSetup);
800   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
801       .addUse(AArch64::X17)
802       .addUse(BaseReg)
803       .addImm(Offset / 8)
804       .setMIFlag(MachineInstr::FrameSetup);
805 
806   MBBI->eraseFromParent();
807   return true;
808 }
809 
810 /// If MBBI references a pseudo instruction that should be expanded here,
811 /// do the expansion and return true.  Otherwise return false.
812 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
813                                    MachineBasicBlock::iterator MBBI,
814                                    MachineBasicBlock::iterator &NextMBBI) {
815   MachineInstr &MI = *MBBI;
816   unsigned Opcode = MI.getOpcode();
817 
818   // Check if we can expand the destructive op
819   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
820   if (OrigInstr != -1) {
821     auto &Orig = TII->get(OrigInstr);
822     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
823            != AArch64::NotDestructive) {
824       return expand_DestructiveOp(MI, MBB, MBBI);
825     }
826   }
827 
828   switch (Opcode) {
829   default:
830     break;
831 
832   case AArch64::BSPv8i8:
833   case AArch64::BSPv16i8: {
834     Register DstReg = MI.getOperand(0).getReg();
835     if (DstReg == MI.getOperand(3).getReg()) {
836       // Expand to BIT
837       BuildMI(MBB, MBBI, MI.getDebugLoc(),
838               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
839                                                   : AArch64::BITv16i8))
840           .add(MI.getOperand(0))
841           .add(MI.getOperand(3))
842           .add(MI.getOperand(2))
843           .add(MI.getOperand(1));
844     } else if (DstReg == MI.getOperand(2).getReg()) {
845       // Expand to BIF
846       BuildMI(MBB, MBBI, MI.getDebugLoc(),
847               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
848                                                   : AArch64::BIFv16i8))
849           .add(MI.getOperand(0))
850           .add(MI.getOperand(2))
851           .add(MI.getOperand(3))
852           .add(MI.getOperand(1));
853     } else {
854       // Expand to BSL, use additional move if required
855       if (DstReg == MI.getOperand(1).getReg()) {
856         BuildMI(MBB, MBBI, MI.getDebugLoc(),
857                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
858                                                     : AArch64::BSLv16i8))
859             .add(MI.getOperand(0))
860             .add(MI.getOperand(1))
861             .add(MI.getOperand(2))
862             .add(MI.getOperand(3));
863       } else {
864         BuildMI(MBB, MBBI, MI.getDebugLoc(),
865                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
866                                                     : AArch64::ORRv16i8))
867             .addReg(DstReg,
868                     RegState::Define |
869                         getRenamableRegState(MI.getOperand(0).isRenamable()))
870             .add(MI.getOperand(1))
871             .add(MI.getOperand(1));
872         BuildMI(MBB, MBBI, MI.getDebugLoc(),
873                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
874                                                     : AArch64::BSLv16i8))
875             .add(MI.getOperand(0))
876             .addReg(DstReg,
877                     RegState::Kill |
878                         getRenamableRegState(MI.getOperand(0).isRenamable()))
879             .add(MI.getOperand(2))
880             .add(MI.getOperand(3));
881       }
882     }
883     MI.eraseFromParent();
884     return true;
885   }
886 
887   case AArch64::ADDWrr:
888   case AArch64::SUBWrr:
889   case AArch64::ADDXrr:
890   case AArch64::SUBXrr:
891   case AArch64::ADDSWrr:
892   case AArch64::SUBSWrr:
893   case AArch64::ADDSXrr:
894   case AArch64::SUBSXrr:
895   case AArch64::ANDWrr:
896   case AArch64::ANDXrr:
897   case AArch64::BICWrr:
898   case AArch64::BICXrr:
899   case AArch64::ANDSWrr:
900   case AArch64::ANDSXrr:
901   case AArch64::BICSWrr:
902   case AArch64::BICSXrr:
903   case AArch64::EONWrr:
904   case AArch64::EONXrr:
905   case AArch64::EORWrr:
906   case AArch64::EORXrr:
907   case AArch64::ORNWrr:
908   case AArch64::ORNXrr:
909   case AArch64::ORRWrr:
910   case AArch64::ORRXrr: {
911     unsigned Opcode;
912     switch (MI.getOpcode()) {
913     default:
914       return false;
915     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
916     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
917     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
918     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
919     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
920     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
921     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
922     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
923     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
924     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
925     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
926     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
927     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
928     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
929     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
930     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
931     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
932     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
933     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
934     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
935     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
936     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
937     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
938     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
939     }
940     MachineInstrBuilder MIB1 =
941         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode),
942                 MI.getOperand(0).getReg())
943             .add(MI.getOperand(1))
944             .add(MI.getOperand(2))
945             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
946     transferImpOps(MI, MIB1, MIB1);
947     MI.eraseFromParent();
948     return true;
949   }
950 
951   case AArch64::LOADgot: {
952     MachineFunction *MF = MBB.getParent();
953     Register DstReg = MI.getOperand(0).getReg();
954     const MachineOperand &MO1 = MI.getOperand(1);
955     unsigned Flags = MO1.getTargetFlags();
956 
957     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
958       // Tiny codemodel expand to LDR
959       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
960                                         TII->get(AArch64::LDRXl), DstReg);
961 
962       if (MO1.isGlobal()) {
963         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
964       } else if (MO1.isSymbol()) {
965         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
966       } else {
967         assert(MO1.isCPI() &&
968                "Only expect globals, externalsymbols, or constant pools");
969         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
970       }
971     } else {
972       // Small codemodel expand into ADRP + LDR.
973       MachineFunction &MF = *MI.getParent()->getParent();
974       DebugLoc DL = MI.getDebugLoc();
975       MachineInstrBuilder MIB1 =
976           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
977 
978       MachineInstrBuilder MIB2;
979       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
980         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
981         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
982         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
983         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
984                    .addDef(Reg32)
985                    .addReg(DstReg, RegState::Kill)
986                    .addReg(DstReg, DstFlags | RegState::Implicit);
987       } else {
988         unsigned DstReg = MI.getOperand(0).getReg();
989         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
990                    .add(MI.getOperand(0))
991                    .addUse(DstReg, RegState::Kill);
992       }
993 
994       if (MO1.isGlobal()) {
995         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
996         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
997                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
998       } else if (MO1.isSymbol()) {
999         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1000         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1001                                                         AArch64II::MO_PAGEOFF |
1002                                                         AArch64II::MO_NC);
1003       } else {
1004         assert(MO1.isCPI() &&
1005                "Only expect globals, externalsymbols, or constant pools");
1006         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1007                                   Flags | AArch64II::MO_PAGE);
1008         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1009                                   Flags | AArch64II::MO_PAGEOFF |
1010                                       AArch64II::MO_NC);
1011       }
1012 
1013       transferImpOps(MI, MIB1, MIB2);
1014     }
1015     MI.eraseFromParent();
1016     return true;
1017   }
1018   case AArch64::MOVaddrBA: {
1019     MachineFunction &MF = *MI.getParent()->getParent();
1020     if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1021       // blockaddress expressions have to come from a constant pool because the
1022       // largest addend (and hence offset within a function) allowed for ADRP is
1023       // only 8MB.
1024       const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1025       assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1026 
1027       MachineConstantPool *MCP = MF.getConstantPool();
1028       unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1029 
1030       Register DstReg = MI.getOperand(0).getReg();
1031       auto MIB1 =
1032           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1033               .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1034       auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1035                           TII->get(AArch64::LDRXui), DstReg)
1036                       .addUse(DstReg)
1037                       .addConstantPoolIndex(
1038                           CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1039       transferImpOps(MI, MIB1, MIB2);
1040       MI.eraseFromParent();
1041       return true;
1042     }
1043   }
1044     LLVM_FALLTHROUGH;
1045   case AArch64::MOVaddr:
1046   case AArch64::MOVaddrJT:
1047   case AArch64::MOVaddrCP:
1048   case AArch64::MOVaddrTLS:
1049   case AArch64::MOVaddrEXT: {
1050     // Expand into ADRP + ADD.
1051     Register DstReg = MI.getOperand(0).getReg();
1052     MachineInstrBuilder MIB1 =
1053         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1054             .add(MI.getOperand(1));
1055 
1056     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1057       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1058       // We do so by creating a MOVK that sets bits 48-63 of the register to
1059       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1060       // the small code model so we can assume a binary size of <= 4GB, which
1061       // makes the untagged PC relative offset positive. The binary must also be
1062       // loaded into address range [0, 2^48). Both of these properties need to
1063       // be ensured at runtime when using tagged addresses.
1064       auto Tag = MI.getOperand(1);
1065       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1066       Tag.setOffset(0x100000000);
1067       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1068           .addReg(DstReg)
1069           .add(Tag)
1070           .addImm(48);
1071     }
1072 
1073     MachineInstrBuilder MIB2 =
1074         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1075             .add(MI.getOperand(0))
1076             .addReg(DstReg)
1077             .add(MI.getOperand(2))
1078             .addImm(0);
1079 
1080     transferImpOps(MI, MIB1, MIB2);
1081     MI.eraseFromParent();
1082     return true;
1083   }
1084   case AArch64::ADDlowTLS:
1085     // Produce a plain ADD
1086     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1087         .add(MI.getOperand(0))
1088         .add(MI.getOperand(1))
1089         .add(MI.getOperand(2))
1090         .addImm(0);
1091     MI.eraseFromParent();
1092     return true;
1093 
1094   case AArch64::MOVbaseTLS: {
1095     Register DstReg = MI.getOperand(0).getReg();
1096     auto SysReg = AArch64SysReg::TPIDR_EL0;
1097     MachineFunction *MF = MBB.getParent();
1098     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1099       SysReg = AArch64SysReg::TPIDR_EL3;
1100     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1101       SysReg = AArch64SysReg::TPIDR_EL2;
1102     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1103       SysReg = AArch64SysReg::TPIDR_EL1;
1104     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1105         .addImm(SysReg);
1106     MI.eraseFromParent();
1107     return true;
1108   }
1109 
1110   case AArch64::MOVi32imm:
1111     return expandMOVImm(MBB, MBBI, 32);
1112   case AArch64::MOVi64imm:
1113     return expandMOVImm(MBB, MBBI, 64);
1114   case AArch64::RET_ReallyLR: {
1115     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1116     // function and missing live-ins. We are fine in practice because callee
1117     // saved register handling ensures the register value is restored before
1118     // RET, but we need the undef flag here to appease the MachineVerifier
1119     // liveness checks.
1120     MachineInstrBuilder MIB =
1121         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1122           .addReg(AArch64::LR, RegState::Undef);
1123     transferImpOps(MI, MIB, MIB);
1124     MI.eraseFromParent();
1125     return true;
1126   }
1127   case AArch64::CMP_SWAP_8:
1128     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1129                           AArch64::SUBSWrx,
1130                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1131                           AArch64::WZR, NextMBBI);
1132   case AArch64::CMP_SWAP_16:
1133     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1134                           AArch64::SUBSWrx,
1135                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1136                           AArch64::WZR, NextMBBI);
1137   case AArch64::CMP_SWAP_32:
1138     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1139                           AArch64::SUBSWrs,
1140                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1141                           AArch64::WZR, NextMBBI);
1142   case AArch64::CMP_SWAP_64:
1143     return expandCMP_SWAP(MBB, MBBI,
1144                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1145                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1146                           AArch64::XZR, NextMBBI);
1147   case AArch64::CMP_SWAP_128:
1148   case AArch64::CMP_SWAP_128_RELEASE:
1149   case AArch64::CMP_SWAP_128_ACQUIRE:
1150   case AArch64::CMP_SWAP_128_MONOTONIC:
1151     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1152 
1153   case AArch64::AESMCrrTied:
1154   case AArch64::AESIMCrrTied: {
1155     MachineInstrBuilder MIB =
1156     BuildMI(MBB, MBBI, MI.getDebugLoc(),
1157             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1158                                                       AArch64::AESIMCrr))
1159       .add(MI.getOperand(0))
1160       .add(MI.getOperand(1));
1161     transferImpOps(MI, MIB, MIB);
1162     MI.eraseFromParent();
1163     return true;
1164    }
1165    case AArch64::IRGstack: {
1166      MachineFunction &MF = *MBB.getParent();
1167      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1168      const AArch64FrameLowering *TFI =
1169          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1170 
1171      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1172      // almost always point to SP-after-prologue; if not, emit a longer
1173      // instruction sequence.
1174      int BaseOffset = -AFI->getTaggedBasePointerOffset();
1175      Register FrameReg;
1176      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1177          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1178          /*PreferFP=*/false,
1179          /*ForSimm=*/true);
1180      Register SrcReg = FrameReg;
1181      if (FrameRegOffset) {
1182        // Use output register as temporary.
1183        SrcReg = MI.getOperand(0).getReg();
1184        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1185                        FrameRegOffset, TII);
1186      }
1187      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1188          .add(MI.getOperand(0))
1189          .addUse(SrcReg)
1190          .add(MI.getOperand(2));
1191      MI.eraseFromParent();
1192      return true;
1193    }
1194    case AArch64::TAGPstack: {
1195      int64_t Offset = MI.getOperand(2).getImm();
1196      BuildMI(MBB, MBBI, MI.getDebugLoc(),
1197              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1198          .add(MI.getOperand(0))
1199          .add(MI.getOperand(1))
1200          .addImm(std::abs(Offset))
1201          .add(MI.getOperand(4));
1202      MI.eraseFromParent();
1203      return true;
1204    }
1205    case AArch64::STGloop_wback:
1206    case AArch64::STZGloop_wback:
1207      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1208    case AArch64::STGloop:
1209    case AArch64::STZGloop:
1210      report_fatal_error(
1211          "Non-writeback variants of STGloop / STZGloop should not "
1212          "survive past PrologEpilogInserter.");
1213    case AArch64::STR_ZZZZXI:
1214      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1215    case AArch64::STR_ZZZXI:
1216      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1217    case AArch64::STR_ZZXI:
1218      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1219    case AArch64::LDR_ZZZZXI:
1220      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1221    case AArch64::LDR_ZZZXI:
1222      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1223    case AArch64::LDR_ZZXI:
1224      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1225    case AArch64::BLR_RVMARKER:
1226      return expandCALL_RVMARKER(MBB, MBBI);
1227    case AArch64::StoreSwiftAsyncContext:
1228      return expandStoreSwiftAsyncContext(MBB, MBBI);
1229   }
1230   return false;
1231 }
1232 
1233 /// Iterate over the instructions in basic block MBB and expand any
1234 /// pseudo instructions.  Return true if anything was modified.
1235 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1236   bool Modified = false;
1237 
1238   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1239   while (MBBI != E) {
1240     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1241     Modified |= expandMI(MBB, MBBI, NMBBI);
1242     MBBI = NMBBI;
1243   }
1244 
1245   return Modified;
1246 }
1247 
1248 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1249   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1250 
1251   bool Modified = false;
1252   for (auto &MBB : MF)
1253     Modified |= expandMBB(MBB);
1254   return Modified;
1255 }
1256 
1257 /// Returns an instance of the pseudo instruction expansion pass.
1258 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1259   return new AArch64ExpandPseudo();
1260 }
1261