xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (revision 271171e0d97b88ba2a7c3bf750c9672b484c1c13)
1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/CodeGen.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Target/TargetMachine.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <limits>
43 #include <utility>
44 
45 using namespace llvm;
46 
47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
48 
49 namespace {
50 
51 class AArch64ExpandPseudo : public MachineFunctionPass {
52 public:
53   const AArch64InstrInfo *TII;
54 
55   static char ID;
56 
57   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
58     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
59   }
60 
61   bool runOnMachineFunction(MachineFunction &Fn) override;
62 
63   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
64 
65 private:
66   bool expandMBB(MachineBasicBlock &MBB);
67   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
68                 MachineBasicBlock::iterator &NextMBBI);
69   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
70                     unsigned BitSize);
71 
72   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
73                             MachineBasicBlock::iterator MBBI);
74   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
75                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
76                       unsigned ExtendImm, unsigned ZeroReg,
77                       MachineBasicBlock::iterator &NextMBBI);
78   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
79                           MachineBasicBlock::iterator MBBI,
80                           MachineBasicBlock::iterator &NextMBBI);
81   bool expandSetTagLoop(MachineBasicBlock &MBB,
82                         MachineBasicBlock::iterator MBBI,
83                         MachineBasicBlock::iterator &NextMBBI);
84   bool expandSVESpillFill(MachineBasicBlock &MBB,
85                           MachineBasicBlock::iterator MBBI, unsigned Opc,
86                           unsigned N);
87   bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
88                            MachineBasicBlock::iterator MBBI);
89   bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
90   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
91                                     MachineBasicBlock::iterator MBBI);
92 };
93 
94 } // end anonymous namespace
95 
96 char AArch64ExpandPseudo::ID = 0;
97 
98 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
99                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
100 
101 /// Transfer implicit operands on the pseudo instruction to the
102 /// instructions created from the expansion.
103 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
104                            MachineInstrBuilder &DefMI) {
105   const MCInstrDesc &Desc = OldMI.getDesc();
106   for (const MachineOperand &MO :
107        llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
108     assert(MO.isReg() && MO.getReg());
109     if (MO.isUse())
110       UseMI.add(MO);
111     else
112       DefMI.add(MO);
113   }
114 }
115 
116 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
117 /// real move-immediate instructions to synthesize the immediate.
118 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
119                                        MachineBasicBlock::iterator MBBI,
120                                        unsigned BitSize) {
121   MachineInstr &MI = *MBBI;
122   Register DstReg = MI.getOperand(0).getReg();
123   uint64_t RenamableState =
124       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
125   uint64_t Imm = MI.getOperand(1).getImm();
126 
127   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
128     // Useless def, and we don't want to risk creating an invalid ORR (which
129     // would really write to sp).
130     MI.eraseFromParent();
131     return true;
132   }
133 
134   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
135   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
136   assert(Insn.size() != 0);
137 
138   SmallVector<MachineInstrBuilder, 4> MIBS;
139   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
140     bool LastItem = std::next(I) == E;
141     switch (I->Opcode)
142     {
143     default: llvm_unreachable("unhandled!"); break;
144 
145     case AArch64::ORRWri:
146     case AArch64::ORRXri:
147       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
148         .add(MI.getOperand(0))
149         .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
150         .addImm(I->Op2));
151       break;
152     case AArch64::MOVNWi:
153     case AArch64::MOVNXi:
154     case AArch64::MOVZWi:
155     case AArch64::MOVZXi: {
156       bool DstIsDead = MI.getOperand(0).isDead();
157       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
158         .addReg(DstReg, RegState::Define |
159                 getDeadRegState(DstIsDead && LastItem) |
160                 RenamableState)
161         .addImm(I->Op1)
162         .addImm(I->Op2));
163       } break;
164     case AArch64::MOVKWi:
165     case AArch64::MOVKXi: {
166       Register DstReg = MI.getOperand(0).getReg();
167       bool DstIsDead = MI.getOperand(0).isDead();
168       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
169         .addReg(DstReg,
170                 RegState::Define |
171                 getDeadRegState(DstIsDead && LastItem) |
172                 RenamableState)
173         .addReg(DstReg)
174         .addImm(I->Op1)
175         .addImm(I->Op2));
176       } break;
177     }
178   }
179   transferImpOps(MI, MIBS.front(), MIBS.back());
180   MI.eraseFromParent();
181   return true;
182 }
183 
184 bool AArch64ExpandPseudo::expandCMP_SWAP(
185     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
186     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
187     MachineBasicBlock::iterator &NextMBBI) {
188   MachineInstr &MI = *MBBI;
189   DebugLoc DL = MI.getDebugLoc();
190   const MachineOperand &Dest = MI.getOperand(0);
191   Register StatusReg = MI.getOperand(1).getReg();
192   bool StatusDead = MI.getOperand(1).isDead();
193   // Duplicating undef operands into 2 instructions does not guarantee the same
194   // value on both; However undef should be replaced by xzr anyway.
195   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
196   Register AddrReg = MI.getOperand(2).getReg();
197   Register DesiredReg = MI.getOperand(3).getReg();
198   Register NewReg = MI.getOperand(4).getReg();
199 
200   MachineFunction *MF = MBB.getParent();
201   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
202   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
203   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
204 
205   MF->insert(++MBB.getIterator(), LoadCmpBB);
206   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
207   MF->insert(++StoreBB->getIterator(), DoneBB);
208 
209   // .Lloadcmp:
210   //     mov wStatus, 0
211   //     ldaxr xDest, [xAddr]
212   //     cmp xDest, xDesired
213   //     b.ne .Ldone
214   if (!StatusDead)
215     BuildMI(LoadCmpBB, DL, TII->get(AArch64::MOVZWi), StatusReg)
216       .addImm(0).addImm(0);
217   BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
218       .addReg(AddrReg);
219   BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
220       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
221       .addReg(DesiredReg)
222       .addImm(ExtendImm);
223   BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
224       .addImm(AArch64CC::NE)
225       .addMBB(DoneBB)
226       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
227   LoadCmpBB->addSuccessor(DoneBB);
228   LoadCmpBB->addSuccessor(StoreBB);
229 
230   // .Lstore:
231   //     stlxr wStatus, xNew, [xAddr]
232   //     cbnz wStatus, .Lloadcmp
233   BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
234       .addReg(NewReg)
235       .addReg(AddrReg);
236   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
237       .addReg(StatusReg, getKillRegState(StatusDead))
238       .addMBB(LoadCmpBB);
239   StoreBB->addSuccessor(LoadCmpBB);
240   StoreBB->addSuccessor(DoneBB);
241 
242   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
243   DoneBB->transferSuccessors(&MBB);
244 
245   MBB.addSuccessor(LoadCmpBB);
246 
247   NextMBBI = MBB.end();
248   MI.eraseFromParent();
249 
250   // Recompute livein lists.
251   LivePhysRegs LiveRegs;
252   computeAndAddLiveIns(LiveRegs, *DoneBB);
253   computeAndAddLiveIns(LiveRegs, *StoreBB);
254   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
255   // Do an extra pass around the loop to get loop carried registers right.
256   StoreBB->clearLiveIns();
257   computeAndAddLiveIns(LiveRegs, *StoreBB);
258   LoadCmpBB->clearLiveIns();
259   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
260 
261   return true;
262 }
263 
264 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
265     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
266     MachineBasicBlock::iterator &NextMBBI) {
267   MachineInstr &MI = *MBBI;
268   DebugLoc DL = MI.getDebugLoc();
269   MachineOperand &DestLo = MI.getOperand(0);
270   MachineOperand &DestHi = MI.getOperand(1);
271   Register StatusReg = MI.getOperand(2).getReg();
272   bool StatusDead = MI.getOperand(2).isDead();
273   // Duplicating undef operands into 2 instructions does not guarantee the same
274   // value on both; However undef should be replaced by xzr anyway.
275   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
276   Register AddrReg = MI.getOperand(3).getReg();
277   Register DesiredLoReg = MI.getOperand(4).getReg();
278   Register DesiredHiReg = MI.getOperand(5).getReg();
279   Register NewLoReg = MI.getOperand(6).getReg();
280   Register NewHiReg = MI.getOperand(7).getReg();
281 
282   unsigned LdxpOp, StxpOp;
283 
284   switch (MI.getOpcode()) {
285   case AArch64::CMP_SWAP_128_MONOTONIC:
286     LdxpOp = AArch64::LDXPX;
287     StxpOp = AArch64::STXPX;
288     break;
289   case AArch64::CMP_SWAP_128_RELEASE:
290     LdxpOp = AArch64::LDXPX;
291     StxpOp = AArch64::STLXPX;
292     break;
293   case AArch64::CMP_SWAP_128_ACQUIRE:
294     LdxpOp = AArch64::LDAXPX;
295     StxpOp = AArch64::STXPX;
296     break;
297   case AArch64::CMP_SWAP_128:
298     LdxpOp = AArch64::LDAXPX;
299     StxpOp = AArch64::STLXPX;
300     break;
301   default:
302     llvm_unreachable("Unexpected opcode");
303   }
304 
305   MachineFunction *MF = MBB.getParent();
306   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
307   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
308   auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
309   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
310 
311   MF->insert(++MBB.getIterator(), LoadCmpBB);
312   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
313   MF->insert(++StoreBB->getIterator(), FailBB);
314   MF->insert(++FailBB->getIterator(), DoneBB);
315 
316   // .Lloadcmp:
317   //     ldaxp xDestLo, xDestHi, [xAddr]
318   //     cmp xDestLo, xDesiredLo
319   //     sbcs xDestHi, xDesiredHi
320   //     b.ne .Ldone
321   BuildMI(LoadCmpBB, DL, TII->get(LdxpOp))
322       .addReg(DestLo.getReg(), RegState::Define)
323       .addReg(DestHi.getReg(), RegState::Define)
324       .addReg(AddrReg);
325   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
326       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
327       .addReg(DesiredLoReg)
328       .addImm(0);
329   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
330     .addUse(AArch64::WZR)
331     .addUse(AArch64::WZR)
332     .addImm(AArch64CC::EQ);
333   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
334       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
335       .addReg(DesiredHiReg)
336       .addImm(0);
337   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
338       .addUse(StatusReg, RegState::Kill)
339       .addUse(StatusReg, RegState::Kill)
340       .addImm(AArch64CC::EQ);
341   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
342       .addUse(StatusReg, getKillRegState(StatusDead))
343       .addMBB(FailBB);
344   LoadCmpBB->addSuccessor(FailBB);
345   LoadCmpBB->addSuccessor(StoreBB);
346 
347   // .Lstore:
348   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
349   //     cbnz wStatus, .Lloadcmp
350   BuildMI(StoreBB, DL, TII->get(StxpOp), StatusReg)
351       .addReg(NewLoReg)
352       .addReg(NewHiReg)
353       .addReg(AddrReg);
354   BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
355       .addReg(StatusReg, getKillRegState(StatusDead))
356       .addMBB(LoadCmpBB);
357   BuildMI(StoreBB, DL, TII->get(AArch64::B)).addMBB(DoneBB);
358   StoreBB->addSuccessor(LoadCmpBB);
359   StoreBB->addSuccessor(DoneBB);
360 
361   // .Lfail:
362   //     stlxp wStatus, xDestLo, xDestHi, [xAddr]
363   //     cbnz wStatus, .Lloadcmp
364   BuildMI(FailBB, DL, TII->get(StxpOp), StatusReg)
365       .addReg(DestLo.getReg())
366       .addReg(DestHi.getReg())
367       .addReg(AddrReg);
368   BuildMI(FailBB, DL, TII->get(AArch64::CBNZW))
369       .addReg(StatusReg, getKillRegState(StatusDead))
370       .addMBB(LoadCmpBB);
371   FailBB->addSuccessor(LoadCmpBB);
372   FailBB->addSuccessor(DoneBB);
373 
374   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
375   DoneBB->transferSuccessors(&MBB);
376 
377   MBB.addSuccessor(LoadCmpBB);
378 
379   NextMBBI = MBB.end();
380   MI.eraseFromParent();
381 
382   // Recompute liveness bottom up.
383   LivePhysRegs LiveRegs;
384   computeAndAddLiveIns(LiveRegs, *DoneBB);
385   computeAndAddLiveIns(LiveRegs, *FailBB);
386   computeAndAddLiveIns(LiveRegs, *StoreBB);
387   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
388 
389   // Do an extra pass in the loop to get the loop carried dependencies right.
390   FailBB->clearLiveIns();
391   computeAndAddLiveIns(LiveRegs, *FailBB);
392   StoreBB->clearLiveIns();
393   computeAndAddLiveIns(LiveRegs, *StoreBB);
394   LoadCmpBB->clearLiveIns();
395   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
396 
397   return true;
398 }
399 
400 /// \brief Expand Pseudos to Instructions with destructive operands.
401 ///
402 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
403 /// or for fixing relaxed register allocation conditions to comply with
404 /// the instructions register constraints. The latter case may be cheaper
405 /// than setting the register constraints in the register allocator,
406 /// since that will insert regular MOV instructions rather than MOVPRFX.
407 ///
408 /// Example (after register allocation):
409 ///
410 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
411 ///
412 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
413 /// * We cannot map directly to FSUB_ZPmZ_B because the register
414 ///   constraints of the instruction are not met.
415 /// * Also the _ZERO specifies the false lanes need to be zeroed.
416 ///
417 /// We first try to see if the destructive operand == result operand,
418 /// if not, we try to swap the operands, e.g.
419 ///
420 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
421 ///
422 /// But because FSUB_ZPmZ is not commutative, this is semantically
423 /// different, so we need a reverse instruction:
424 ///
425 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
426 ///
427 /// Then we implement the zeroing of the false lanes of Z0 by adding
428 /// a zeroing MOVPRFX instruction:
429 ///
430 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
431 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
432 ///
433 /// Note that this can only be done for _ZERO or _UNDEF variants where
434 /// we can guarantee the false lanes to be zeroed (by implementing this)
435 /// or that they are undef (don't care / not used), otherwise the
436 /// swapping of operands is illegal because the operation is not
437 /// (or cannot be emulated to be) fully commutative.
438 bool AArch64ExpandPseudo::expand_DestructiveOp(
439                             MachineInstr &MI,
440                             MachineBasicBlock &MBB,
441                             MachineBasicBlock::iterator MBBI) {
442   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
443   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
444   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
445   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
446 
447   Register DstReg = MI.getOperand(0).getReg();
448   bool DstIsDead = MI.getOperand(0).isDead();
449 
450   if (DType == AArch64::DestructiveBinary)
451     assert(DstReg != MI.getOperand(3).getReg());
452 
453   bool UseRev = false;
454   unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
455   switch (DType) {
456   case AArch64::DestructiveBinaryComm:
457   case AArch64::DestructiveBinaryCommWithRev:
458     if (DstReg == MI.getOperand(3).getReg()) {
459       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
460       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
461       UseRev = true;
462       break;
463     }
464     LLVM_FALLTHROUGH;
465   case AArch64::DestructiveBinary:
466   case AArch64::DestructiveBinaryImm:
467     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
468     break;
469   case AArch64::DestructiveUnaryPassthru:
470     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
471     break;
472   case AArch64::DestructiveTernaryCommWithRev:
473     std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
474     if (DstReg == MI.getOperand(3).getReg()) {
475       // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
476       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
477       UseRev = true;
478     } else if (DstReg == MI.getOperand(4).getReg()) {
479       // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
480       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
481       UseRev = true;
482     }
483     break;
484   default:
485     llvm_unreachable("Unsupported Destructive Operand type");
486   }
487 
488 #ifndef NDEBUG
489   // MOVPRFX can only be used if the destination operand
490   // is the destructive operand, not as any other operand,
491   // so the Destructive Operand must be unique.
492   bool DOPRegIsUnique = false;
493   switch (DType) {
494   case AArch64::DestructiveBinaryComm:
495   case AArch64::DestructiveBinaryCommWithRev:
496     DOPRegIsUnique =
497       DstReg != MI.getOperand(DOPIdx).getReg() ||
498       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
499     break;
500   case AArch64::DestructiveUnaryPassthru:
501   case AArch64::DestructiveBinaryImm:
502     DOPRegIsUnique = true;
503     break;
504   case AArch64::DestructiveTernaryCommWithRev:
505     DOPRegIsUnique =
506         DstReg != MI.getOperand(DOPIdx).getReg() ||
507         (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
508          MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
509     break;
510   }
511 #endif
512 
513   // Resolve the reverse opcode
514   if (UseRev) {
515     int NewOpcode;
516     // e.g. DIV -> DIVR
517     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
518       Opcode = NewOpcode;
519     // e.g. DIVR -> DIV
520     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
521       Opcode = NewOpcode;
522   }
523 
524   // Get the right MOVPRFX
525   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
526   unsigned MovPrfx, MovPrfxZero;
527   switch (ElementSize) {
528   case AArch64::ElementSizeNone:
529   case AArch64::ElementSizeB:
530     MovPrfx = AArch64::MOVPRFX_ZZ;
531     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
532     break;
533   case AArch64::ElementSizeH:
534     MovPrfx = AArch64::MOVPRFX_ZZ;
535     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
536     break;
537   case AArch64::ElementSizeS:
538     MovPrfx = AArch64::MOVPRFX_ZZ;
539     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
540     break;
541   case AArch64::ElementSizeD:
542     MovPrfx = AArch64::MOVPRFX_ZZ;
543     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
544     break;
545   default:
546     llvm_unreachable("Unsupported ElementSize");
547   }
548 
549   //
550   // Create the destructive operation (if required)
551   //
552   MachineInstrBuilder PRFX, DOP;
553   if (FalseZero) {
554 #ifndef NDEBUG
555     assert(DOPRegIsUnique && "The destructive operand should be unique");
556 #endif
557     assert(ElementSize != AArch64::ElementSizeNone &&
558            "This instruction is unpredicated");
559 
560     // Merge source operand into destination register
561     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
562                .addReg(DstReg, RegState::Define)
563                .addReg(MI.getOperand(PredIdx).getReg())
564                .addReg(MI.getOperand(DOPIdx).getReg());
565 
566     // After the movprfx, the destructive operand is same as Dst
567     DOPIdx = 0;
568   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
569 #ifndef NDEBUG
570     assert(DOPRegIsUnique && "The destructive operand should be unique");
571 #endif
572     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
573                .addReg(DstReg, RegState::Define)
574                .addReg(MI.getOperand(DOPIdx).getReg());
575     DOPIdx = 0;
576   }
577 
578   //
579   // Create the destructive operation
580   //
581   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
582     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
583 
584   switch (DType) {
585   case AArch64::DestructiveUnaryPassthru:
586     DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
587         .add(MI.getOperand(PredIdx))
588         .add(MI.getOperand(SrcIdx));
589     break;
590   case AArch64::DestructiveBinaryImm:
591   case AArch64::DestructiveBinaryComm:
592   case AArch64::DestructiveBinaryCommWithRev:
593     DOP.add(MI.getOperand(PredIdx))
594        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
595        .add(MI.getOperand(SrcIdx));
596     break;
597   case AArch64::DestructiveTernaryCommWithRev:
598     DOP.add(MI.getOperand(PredIdx))
599         .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
600         .add(MI.getOperand(SrcIdx))
601         .add(MI.getOperand(Src2Idx));
602     break;
603   }
604 
605   if (PRFX) {
606     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
607     transferImpOps(MI, PRFX, DOP);
608   } else
609     transferImpOps(MI, DOP, DOP);
610 
611   MI.eraseFromParent();
612   return true;
613 }
614 
615 bool AArch64ExpandPseudo::expandSetTagLoop(
616     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
617     MachineBasicBlock::iterator &NextMBBI) {
618   MachineInstr &MI = *MBBI;
619   DebugLoc DL = MI.getDebugLoc();
620   Register SizeReg = MI.getOperand(0).getReg();
621   Register AddressReg = MI.getOperand(1).getReg();
622 
623   MachineFunction *MF = MBB.getParent();
624 
625   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
626   const unsigned OpCode1 =
627       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
628   const unsigned OpCode2 =
629       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
630 
631   unsigned Size = MI.getOperand(2).getImm();
632   assert(Size > 0 && Size % 16 == 0);
633   if (Size % (16 * 2) != 0) {
634     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
635         .addReg(AddressReg)
636         .addReg(AddressReg)
637         .addImm(1);
638     Size -= 16;
639   }
640   MachineBasicBlock::iterator I =
641       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
642           .addImm(Size);
643   expandMOVImm(MBB, I, 64);
644 
645   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
646   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
647 
648   MF->insert(++MBB.getIterator(), LoopBB);
649   MF->insert(++LoopBB->getIterator(), DoneBB);
650 
651   BuildMI(LoopBB, DL, TII->get(OpCode2))
652       .addDef(AddressReg)
653       .addReg(AddressReg)
654       .addReg(AddressReg)
655       .addImm(2)
656       .cloneMemRefs(MI)
657       .setMIFlags(MI.getFlags());
658   BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
659       .addDef(SizeReg)
660       .addReg(SizeReg)
661       .addImm(16 * 2)
662       .addImm(0);
663   BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
664 
665   LoopBB->addSuccessor(LoopBB);
666   LoopBB->addSuccessor(DoneBB);
667 
668   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
669   DoneBB->transferSuccessors(&MBB);
670 
671   MBB.addSuccessor(LoopBB);
672 
673   NextMBBI = MBB.end();
674   MI.eraseFromParent();
675   // Recompute liveness bottom up.
676   LivePhysRegs LiveRegs;
677   computeAndAddLiveIns(LiveRegs, *DoneBB);
678   computeAndAddLiveIns(LiveRegs, *LoopBB);
679   // Do an extra pass in the loop to get the loop carried dependencies right.
680   // FIXME: is this necessary?
681   LoopBB->clearLiveIns();
682   computeAndAddLiveIns(LiveRegs, *LoopBB);
683   DoneBB->clearLiveIns();
684   computeAndAddLiveIns(LiveRegs, *DoneBB);
685 
686   return true;
687 }
688 
689 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
690                                              MachineBasicBlock::iterator MBBI,
691                                              unsigned Opc, unsigned N) {
692   const TargetRegisterInfo *TRI =
693       MBB.getParent()->getSubtarget().getRegisterInfo();
694   MachineInstr &MI = *MBBI;
695   for (unsigned Offset = 0; Offset < N; ++Offset) {
696     int ImmOffset = MI.getOperand(2).getImm() + Offset;
697     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
698     assert(ImmOffset >= -256 && ImmOffset < 256 &&
699            "Immediate spill offset out of range");
700     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
701         .addReg(
702             TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
703             Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
704         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
705         .addImm(ImmOffset);
706   }
707   MI.eraseFromParent();
708   return true;
709 }
710 
711 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
712     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
713   // Expand CALL_RVMARKER pseudo to:
714   // - a branch to the call target, followed by
715   // - the special `mov x29, x29` marker, and
716   // - another branch, to the runtime function
717   // Mark the sequence as bundle, to avoid passes moving other code in between.
718   MachineInstr &MI = *MBBI;
719 
720   MachineInstr *OriginalCall;
721   MachineOperand &RVTarget = MI.getOperand(0);
722   MachineOperand &CallTarget = MI.getOperand(1);
723   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
724          "invalid operand for regular call");
725   assert(RVTarget.isGlobal() && "invalid operand for attached call");
726   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
727   OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
728   OriginalCall->addOperand(CallTarget);
729 
730   unsigned RegMaskStartIdx = 2;
731   // Skip register arguments. Those are added during ISel, but are not
732   // needed for the concrete branch.
733   while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
734     auto MOP = MI.getOperand(RegMaskStartIdx);
735     assert(MOP.isReg() && "can only add register operands");
736     OriginalCall->addOperand(MachineOperand::CreateReg(
737         MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
738     RegMaskStartIdx++;
739   }
740   for (const MachineOperand &MO :
741        llvm::drop_begin(MI.operands(), RegMaskStartIdx))
742     OriginalCall->addOperand(MO);
743 
744   BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
745                      .addReg(AArch64::FP, RegState::Define)
746                      .addReg(AArch64::XZR)
747                      .addReg(AArch64::FP)
748                      .addImm(0);
749 
750   auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
751                      .add(RVTarget)
752                      .getInstr();
753 
754   if (MI.shouldUpdateCallSiteInfo())
755     MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
756 
757   MI.eraseFromParent();
758   finalizeBundle(MBB, OriginalCall->getIterator(),
759                  std::next(RVCall->getIterator()));
760   return true;
761 }
762 
763 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
764                                          MachineBasicBlock::iterator MBBI) {
765   // Expand CALL_BTI pseudo to:
766   // - a branch to the call target
767   // - a BTI instruction
768   // Mark the sequence as a bundle, to avoid passes moving other code in
769   // between.
770 
771   MachineInstr &MI = *MBBI;
772   MachineOperand &CallTarget = MI.getOperand(0);
773   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
774          "invalid operand for regular call");
775   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
776   MachineInstr *Call =
777       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
778   Call->addOperand(CallTarget);
779 
780   MachineInstr *BTI =
781       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
782           // BTI J so that setjmp can to BR to this.
783           .addImm(36)
784           .getInstr();
785 
786   if (MI.shouldUpdateCallSiteInfo())
787     MBB.getParent()->moveCallSiteInfo(&MI, Call);
788 
789   MI.eraseFromParent();
790   finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
791   return true;
792 }
793 
794 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
795     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
796   Register CtxReg = MBBI->getOperand(0).getReg();
797   Register BaseReg = MBBI->getOperand(1).getReg();
798   int Offset = MBBI->getOperand(2).getImm();
799   DebugLoc DL(MBBI->getDebugLoc());
800   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
801 
802   if (STI.getTargetTriple().getArchName() != "arm64e") {
803     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
804         .addUse(CtxReg)
805         .addUse(BaseReg)
806         .addImm(Offset / 8)
807         .setMIFlag(MachineInstr::FrameSetup);
808     MBBI->eraseFromParent();
809     return true;
810   }
811 
812   // We need to sign the context in an address-discriminated way. 0xc31a is a
813   // fixed random value, chosen as part of the ABI.
814   //     add x16, xBase, #Offset
815   //     movk x16, #0xc31a, lsl #48
816   //     mov x17, x22/xzr
817   //     pacdb x17, x16
818   //     str x17, [xBase, #Offset]
819   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
820   BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
821       .addUse(BaseReg)
822       .addImm(abs(Offset))
823       .addImm(0)
824       .setMIFlag(MachineInstr::FrameSetup);
825   BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
826       .addUse(AArch64::X16)
827       .addImm(0xc31a)
828       .addImm(48)
829       .setMIFlag(MachineInstr::FrameSetup);
830   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
831   // move it somewhere before signing.
832   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
833       .addUse(AArch64::XZR)
834       .addUse(CtxReg)
835       .addImm(0)
836       .setMIFlag(MachineInstr::FrameSetup);
837   BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
838       .addUse(AArch64::X17)
839       .addUse(AArch64::X16)
840       .setMIFlag(MachineInstr::FrameSetup);
841   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
842       .addUse(AArch64::X17)
843       .addUse(BaseReg)
844       .addImm(Offset / 8)
845       .setMIFlag(MachineInstr::FrameSetup);
846 
847   MBBI->eraseFromParent();
848   return true;
849 }
850 
851 /// If MBBI references a pseudo instruction that should be expanded here,
852 /// do the expansion and return true.  Otherwise return false.
853 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
854                                    MachineBasicBlock::iterator MBBI,
855                                    MachineBasicBlock::iterator &NextMBBI) {
856   MachineInstr &MI = *MBBI;
857   unsigned Opcode = MI.getOpcode();
858 
859   // Check if we can expand the destructive op
860   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
861   if (OrigInstr != -1) {
862     auto &Orig = TII->get(OrigInstr);
863     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
864            != AArch64::NotDestructive) {
865       return expand_DestructiveOp(MI, MBB, MBBI);
866     }
867   }
868 
869   switch (Opcode) {
870   default:
871     break;
872 
873   case AArch64::BSPv8i8:
874   case AArch64::BSPv16i8: {
875     Register DstReg = MI.getOperand(0).getReg();
876     if (DstReg == MI.getOperand(3).getReg()) {
877       // Expand to BIT
878       BuildMI(MBB, MBBI, MI.getDebugLoc(),
879               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
880                                                   : AArch64::BITv16i8))
881           .add(MI.getOperand(0))
882           .add(MI.getOperand(3))
883           .add(MI.getOperand(2))
884           .add(MI.getOperand(1));
885     } else if (DstReg == MI.getOperand(2).getReg()) {
886       // Expand to BIF
887       BuildMI(MBB, MBBI, MI.getDebugLoc(),
888               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
889                                                   : AArch64::BIFv16i8))
890           .add(MI.getOperand(0))
891           .add(MI.getOperand(2))
892           .add(MI.getOperand(3))
893           .add(MI.getOperand(1));
894     } else {
895       // Expand to BSL, use additional move if required
896       if (DstReg == MI.getOperand(1).getReg()) {
897         BuildMI(MBB, MBBI, MI.getDebugLoc(),
898                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
899                                                     : AArch64::BSLv16i8))
900             .add(MI.getOperand(0))
901             .add(MI.getOperand(1))
902             .add(MI.getOperand(2))
903             .add(MI.getOperand(3));
904       } else {
905         BuildMI(MBB, MBBI, MI.getDebugLoc(),
906                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
907                                                     : AArch64::ORRv16i8))
908             .addReg(DstReg,
909                     RegState::Define |
910                         getRenamableRegState(MI.getOperand(0).isRenamable()))
911             .add(MI.getOperand(1))
912             .add(MI.getOperand(1));
913         BuildMI(MBB, MBBI, MI.getDebugLoc(),
914                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
915                                                     : AArch64::BSLv16i8))
916             .add(MI.getOperand(0))
917             .addReg(DstReg,
918                     RegState::Kill |
919                         getRenamableRegState(MI.getOperand(0).isRenamable()))
920             .add(MI.getOperand(2))
921             .add(MI.getOperand(3));
922       }
923     }
924     MI.eraseFromParent();
925     return true;
926   }
927 
928   case AArch64::ADDWrr:
929   case AArch64::SUBWrr:
930   case AArch64::ADDXrr:
931   case AArch64::SUBXrr:
932   case AArch64::ADDSWrr:
933   case AArch64::SUBSWrr:
934   case AArch64::ADDSXrr:
935   case AArch64::SUBSXrr:
936   case AArch64::ANDWrr:
937   case AArch64::ANDXrr:
938   case AArch64::BICWrr:
939   case AArch64::BICXrr:
940   case AArch64::ANDSWrr:
941   case AArch64::ANDSXrr:
942   case AArch64::BICSWrr:
943   case AArch64::BICSXrr:
944   case AArch64::EONWrr:
945   case AArch64::EONXrr:
946   case AArch64::EORWrr:
947   case AArch64::EORXrr:
948   case AArch64::ORNWrr:
949   case AArch64::ORNXrr:
950   case AArch64::ORRWrr:
951   case AArch64::ORRXrr: {
952     unsigned Opcode;
953     switch (MI.getOpcode()) {
954     default:
955       return false;
956     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
957     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
958     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
959     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
960     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
961     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
962     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
963     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
964     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
965     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
966     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
967     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
968     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
969     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
970     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
971     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
972     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
973     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
974     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
975     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
976     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
977     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
978     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
979     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
980     }
981     MachineFunction &MF = *MBB.getParent();
982     // Try to create new inst without implicit operands added.
983     MachineInstr *NewMI = MF.CreateMachineInstr(
984         TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
985     MBB.insert(MBBI, NewMI);
986     MachineInstrBuilder MIB1(MF, NewMI);
987     MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
988         .add(MI.getOperand(1))
989         .add(MI.getOperand(2))
990         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
991     transferImpOps(MI, MIB1, MIB1);
992     MI.eraseFromParent();
993     return true;
994   }
995 
996   case AArch64::LOADgot: {
997     MachineFunction *MF = MBB.getParent();
998     Register DstReg = MI.getOperand(0).getReg();
999     const MachineOperand &MO1 = MI.getOperand(1);
1000     unsigned Flags = MO1.getTargetFlags();
1001 
1002     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1003       // Tiny codemodel expand to LDR
1004       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1005                                         TII->get(AArch64::LDRXl), DstReg);
1006 
1007       if (MO1.isGlobal()) {
1008         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1009       } else if (MO1.isSymbol()) {
1010         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1011       } else {
1012         assert(MO1.isCPI() &&
1013                "Only expect globals, externalsymbols, or constant pools");
1014         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1015       }
1016     } else {
1017       // Small codemodel expand into ADRP + LDR.
1018       MachineFunction &MF = *MI.getParent()->getParent();
1019       DebugLoc DL = MI.getDebugLoc();
1020       MachineInstrBuilder MIB1 =
1021           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1022 
1023       MachineInstrBuilder MIB2;
1024       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1025         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1026         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1027         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1028         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1029                    .addDef(Reg32)
1030                    .addReg(DstReg, RegState::Kill)
1031                    .addReg(DstReg, DstFlags | RegState::Implicit);
1032       } else {
1033         Register DstReg = MI.getOperand(0).getReg();
1034         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1035                    .add(MI.getOperand(0))
1036                    .addUse(DstReg, RegState::Kill);
1037       }
1038 
1039       if (MO1.isGlobal()) {
1040         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1041         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1042                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1043       } else if (MO1.isSymbol()) {
1044         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1045         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1046                                                         AArch64II::MO_PAGEOFF |
1047                                                         AArch64II::MO_NC);
1048       } else {
1049         assert(MO1.isCPI() &&
1050                "Only expect globals, externalsymbols, or constant pools");
1051         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1052                                   Flags | AArch64II::MO_PAGE);
1053         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1054                                   Flags | AArch64II::MO_PAGEOFF |
1055                                       AArch64II::MO_NC);
1056       }
1057 
1058       transferImpOps(MI, MIB1, MIB2);
1059     }
1060     MI.eraseFromParent();
1061     return true;
1062   }
1063   case AArch64::MOVaddrBA: {
1064     MachineFunction &MF = *MI.getParent()->getParent();
1065     if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1066       // blockaddress expressions have to come from a constant pool because the
1067       // largest addend (and hence offset within a function) allowed for ADRP is
1068       // only 8MB.
1069       const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1070       assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1071 
1072       MachineConstantPool *MCP = MF.getConstantPool();
1073       unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1074 
1075       Register DstReg = MI.getOperand(0).getReg();
1076       auto MIB1 =
1077           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1078               .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1079       auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1080                           TII->get(AArch64::LDRXui), DstReg)
1081                       .addUse(DstReg)
1082                       .addConstantPoolIndex(
1083                           CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1084       transferImpOps(MI, MIB1, MIB2);
1085       MI.eraseFromParent();
1086       return true;
1087     }
1088   }
1089     LLVM_FALLTHROUGH;
1090   case AArch64::MOVaddr:
1091   case AArch64::MOVaddrJT:
1092   case AArch64::MOVaddrCP:
1093   case AArch64::MOVaddrTLS:
1094   case AArch64::MOVaddrEXT: {
1095     // Expand into ADRP + ADD.
1096     Register DstReg = MI.getOperand(0).getReg();
1097     assert(DstReg != AArch64::XZR);
1098     MachineInstrBuilder MIB1 =
1099         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1100             .add(MI.getOperand(1));
1101 
1102     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1103       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1104       // We do so by creating a MOVK that sets bits 48-63 of the register to
1105       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1106       // the small code model so we can assume a binary size of <= 4GB, which
1107       // makes the untagged PC relative offset positive. The binary must also be
1108       // loaded into address range [0, 2^48). Both of these properties need to
1109       // be ensured at runtime when using tagged addresses.
1110       auto Tag = MI.getOperand(1);
1111       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1112       Tag.setOffset(0x100000000);
1113       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1114           .addReg(DstReg)
1115           .add(Tag)
1116           .addImm(48);
1117     }
1118 
1119     MachineInstrBuilder MIB2 =
1120         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1121             .add(MI.getOperand(0))
1122             .addReg(DstReg)
1123             .add(MI.getOperand(2))
1124             .addImm(0);
1125 
1126     transferImpOps(MI, MIB1, MIB2);
1127     MI.eraseFromParent();
1128     return true;
1129   }
1130   case AArch64::ADDlowTLS:
1131     // Produce a plain ADD
1132     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1133         .add(MI.getOperand(0))
1134         .add(MI.getOperand(1))
1135         .add(MI.getOperand(2))
1136         .addImm(0);
1137     MI.eraseFromParent();
1138     return true;
1139 
1140   case AArch64::MOVbaseTLS: {
1141     Register DstReg = MI.getOperand(0).getReg();
1142     auto SysReg = AArch64SysReg::TPIDR_EL0;
1143     MachineFunction *MF = MBB.getParent();
1144     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1145       SysReg = AArch64SysReg::TPIDR_EL3;
1146     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1147       SysReg = AArch64SysReg::TPIDR_EL2;
1148     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1149       SysReg = AArch64SysReg::TPIDR_EL1;
1150     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1151         .addImm(SysReg);
1152     MI.eraseFromParent();
1153     return true;
1154   }
1155 
1156   case AArch64::MOVi32imm:
1157     return expandMOVImm(MBB, MBBI, 32);
1158   case AArch64::MOVi64imm:
1159     return expandMOVImm(MBB, MBBI, 64);
1160   case AArch64::RET_ReallyLR: {
1161     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1162     // function and missing live-ins. We are fine in practice because callee
1163     // saved register handling ensures the register value is restored before
1164     // RET, but we need the undef flag here to appease the MachineVerifier
1165     // liveness checks.
1166     MachineInstrBuilder MIB =
1167         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1168           .addReg(AArch64::LR, RegState::Undef);
1169     transferImpOps(MI, MIB, MIB);
1170     MI.eraseFromParent();
1171     return true;
1172   }
1173   case AArch64::CMP_SWAP_8:
1174     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1175                           AArch64::SUBSWrx,
1176                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1177                           AArch64::WZR, NextMBBI);
1178   case AArch64::CMP_SWAP_16:
1179     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1180                           AArch64::SUBSWrx,
1181                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1182                           AArch64::WZR, NextMBBI);
1183   case AArch64::CMP_SWAP_32:
1184     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1185                           AArch64::SUBSWrs,
1186                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1187                           AArch64::WZR, NextMBBI);
1188   case AArch64::CMP_SWAP_64:
1189     return expandCMP_SWAP(MBB, MBBI,
1190                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1191                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1192                           AArch64::XZR, NextMBBI);
1193   case AArch64::CMP_SWAP_128:
1194   case AArch64::CMP_SWAP_128_RELEASE:
1195   case AArch64::CMP_SWAP_128_ACQUIRE:
1196   case AArch64::CMP_SWAP_128_MONOTONIC:
1197     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1198 
1199   case AArch64::AESMCrrTied:
1200   case AArch64::AESIMCrrTied: {
1201     MachineInstrBuilder MIB =
1202     BuildMI(MBB, MBBI, MI.getDebugLoc(),
1203             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1204                                                       AArch64::AESIMCrr))
1205       .add(MI.getOperand(0))
1206       .add(MI.getOperand(1));
1207     transferImpOps(MI, MIB, MIB);
1208     MI.eraseFromParent();
1209     return true;
1210    }
1211    case AArch64::IRGstack: {
1212      MachineFunction &MF = *MBB.getParent();
1213      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1214      const AArch64FrameLowering *TFI =
1215          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1216 
1217      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1218      // almost always point to SP-after-prologue; if not, emit a longer
1219      // instruction sequence.
1220      int BaseOffset = -AFI->getTaggedBasePointerOffset();
1221      Register FrameReg;
1222      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1223          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1224          /*PreferFP=*/false,
1225          /*ForSimm=*/true);
1226      Register SrcReg = FrameReg;
1227      if (FrameRegOffset) {
1228        // Use output register as temporary.
1229        SrcReg = MI.getOperand(0).getReg();
1230        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1231                        FrameRegOffset, TII);
1232      }
1233      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1234          .add(MI.getOperand(0))
1235          .addUse(SrcReg)
1236          .add(MI.getOperand(2));
1237      MI.eraseFromParent();
1238      return true;
1239    }
1240    case AArch64::TAGPstack: {
1241      int64_t Offset = MI.getOperand(2).getImm();
1242      BuildMI(MBB, MBBI, MI.getDebugLoc(),
1243              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1244          .add(MI.getOperand(0))
1245          .add(MI.getOperand(1))
1246          .addImm(std::abs(Offset))
1247          .add(MI.getOperand(4));
1248      MI.eraseFromParent();
1249      return true;
1250    }
1251    case AArch64::STGloop_wback:
1252    case AArch64::STZGloop_wback:
1253      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1254    case AArch64::STGloop:
1255    case AArch64::STZGloop:
1256      report_fatal_error(
1257          "Non-writeback variants of STGloop / STZGloop should not "
1258          "survive past PrologEpilogInserter.");
1259    case AArch64::STR_ZZZZXI:
1260      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1261    case AArch64::STR_ZZZXI:
1262      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1263    case AArch64::STR_ZZXI:
1264      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1265    case AArch64::LDR_ZZZZXI:
1266      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1267    case AArch64::LDR_ZZZXI:
1268      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1269    case AArch64::LDR_ZZXI:
1270      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1271    case AArch64::BLR_RVMARKER:
1272      return expandCALL_RVMARKER(MBB, MBBI);
1273    case AArch64::BLR_BTI:
1274      return expandCALL_BTI(MBB, MBBI);
1275    case AArch64::StoreSwiftAsyncContext:
1276      return expandStoreSwiftAsyncContext(MBB, MBBI);
1277   }
1278   return false;
1279 }
1280 
1281 /// Iterate over the instructions in basic block MBB and expand any
1282 /// pseudo instructions.  Return true if anything was modified.
1283 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1284   bool Modified = false;
1285 
1286   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1287   while (MBBI != E) {
1288     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1289     Modified |= expandMI(MBB, MBBI, NMBBI);
1290     MBBI = NMBBI;
1291   }
1292 
1293   return Modified;
1294 }
1295 
1296 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1297   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1298 
1299   bool Modified = false;
1300   for (auto &MBB : MF)
1301     Modified |= expandMBB(MBB);
1302   return Modified;
1303 }
1304 
1305 /// Returns an instance of the pseudo instruction expansion pass.
1306 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1307   return new AArch64ExpandPseudo();
1308 }
1309