xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/CodeGen/LivePhysRegs.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineConstantPool.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/TargetSubtargetInfo.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/CodeGen.h"
35 #include "llvm/Support/MathExtras.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/TargetParser/Triple.h"
38 #include <cassert>
39 #include <cstdint>
40 #include <iterator>
41 #include <utility>
42 
43 using namespace llvm;
44 
45 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
46 
47 namespace {
48 
49 class AArch64ExpandPseudo : public MachineFunctionPass {
50 public:
51   const AArch64InstrInfo *TII;
52 
53   static char ID;
54 
AArch64ExpandPseudo()55   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
56     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
57   }
58 
59   bool runOnMachineFunction(MachineFunction &Fn) override;
60 
getPassName() const61   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
62 
63 private:
64   bool expandMBB(MachineBasicBlock &MBB);
65   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
66                 MachineBasicBlock::iterator &NextMBBI);
67   bool expandMultiVecPseudo(MachineBasicBlock &MBB,
68                             MachineBasicBlock::iterator MBBI,
69                             TargetRegisterClass ContiguousClass,
70                             TargetRegisterClass StridedClass,
71                             unsigned ContiguousOpc, unsigned StridedOpc);
72   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
73                     unsigned BitSize);
74 
75   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
76                             MachineBasicBlock::iterator MBBI);
77   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
78                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
79                       unsigned ExtendImm, unsigned ZeroReg,
80                       MachineBasicBlock::iterator &NextMBBI);
81   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
82                           MachineBasicBlock::iterator MBBI,
83                           MachineBasicBlock::iterator &NextMBBI);
84   bool expandSetTagLoop(MachineBasicBlock &MBB,
85                         MachineBasicBlock::iterator MBBI,
86                         MachineBasicBlock::iterator &NextMBBI);
87   bool expandSVESpillFill(MachineBasicBlock &MBB,
88                           MachineBasicBlock::iterator MBBI, unsigned Opc,
89                           unsigned N);
90   bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
91                            MachineBasicBlock::iterator MBBI);
92   bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
93   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
94                                     MachineBasicBlock::iterator MBBI);
95   MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
96                                      MachineBasicBlock::iterator MBBI);
97   MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
98                                         MachineBasicBlock::iterator MBBI);
99 };
100 
101 } // end anonymous namespace
102 
103 char AArch64ExpandPseudo::ID = 0;
104 
105 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
106                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
107 
108 /// Transfer implicit operands on the pseudo instruction to the
109 /// instructions created from the expansion.
transferImpOps(MachineInstr & OldMI,MachineInstrBuilder & UseMI,MachineInstrBuilder & DefMI)110 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
111                            MachineInstrBuilder &DefMI) {
112   const MCInstrDesc &Desc = OldMI.getDesc();
113   for (const MachineOperand &MO :
114        llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
115     assert(MO.isReg() && MO.getReg());
116     if (MO.isUse())
117       UseMI.add(MO);
118     else
119       DefMI.add(MO);
120   }
121 }
122 
123 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
124 /// real move-immediate instructions to synthesize the immediate.
expandMOVImm(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned BitSize)125 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
126                                        MachineBasicBlock::iterator MBBI,
127                                        unsigned BitSize) {
128   MachineInstr &MI = *MBBI;
129   Register DstReg = MI.getOperand(0).getReg();
130   uint64_t RenamableState =
131       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
132   uint64_t Imm = MI.getOperand(1).getImm();
133 
134   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
135     // Useless def, and we don't want to risk creating an invalid ORR (which
136     // would really write to sp).
137     MI.eraseFromParent();
138     return true;
139   }
140 
141   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
142   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
143   assert(Insn.size() != 0);
144 
145   SmallVector<MachineInstrBuilder, 4> MIBS;
146   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
147     bool LastItem = std::next(I) == E;
148     switch (I->Opcode)
149     {
150     default: llvm_unreachable("unhandled!"); break;
151 
152     case AArch64::ORRWri:
153     case AArch64::ORRXri:
154       if (I->Op1 == 0) {
155         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
156                            .add(MI.getOperand(0))
157                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
158                            .addImm(I->Op2));
159       } else {
160         Register DstReg = MI.getOperand(0).getReg();
161         bool DstIsDead = MI.getOperand(0).isDead();
162         MIBS.push_back(
163             BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
164                 .addReg(DstReg, RegState::Define |
165                                     getDeadRegState(DstIsDead && LastItem) |
166                                     RenamableState)
167                 .addReg(DstReg)
168                 .addImm(I->Op2));
169       }
170       break;
171     case AArch64::ORRWrs:
172     case AArch64::ORRXrs: {
173       Register DstReg = MI.getOperand(0).getReg();
174       bool DstIsDead = MI.getOperand(0).isDead();
175       MIBS.push_back(
176           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
177               .addReg(DstReg, RegState::Define |
178                                   getDeadRegState(DstIsDead && LastItem) |
179                                   RenamableState)
180               .addReg(DstReg)
181               .addReg(DstReg)
182               .addImm(I->Op2));
183     } break;
184     case AArch64::ANDXri:
185     case AArch64::EORXri:
186       if (I->Op1 == 0) {
187         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
188                            .add(MI.getOperand(0))
189                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
190                            .addImm(I->Op2));
191       } else {
192         Register DstReg = MI.getOperand(0).getReg();
193         bool DstIsDead = MI.getOperand(0).isDead();
194         MIBS.push_back(
195             BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
196                 .addReg(DstReg, RegState::Define |
197                                     getDeadRegState(DstIsDead && LastItem) |
198                                     RenamableState)
199                 .addReg(DstReg)
200                 .addImm(I->Op2));
201       }
202       break;
203     case AArch64::MOVNWi:
204     case AArch64::MOVNXi:
205     case AArch64::MOVZWi:
206     case AArch64::MOVZXi: {
207       bool DstIsDead = MI.getOperand(0).isDead();
208       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
209         .addReg(DstReg, RegState::Define |
210                 getDeadRegState(DstIsDead && LastItem) |
211                 RenamableState)
212         .addImm(I->Op1)
213         .addImm(I->Op2));
214       } break;
215     case AArch64::MOVKWi:
216     case AArch64::MOVKXi: {
217       Register DstReg = MI.getOperand(0).getReg();
218       bool DstIsDead = MI.getOperand(0).isDead();
219       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
220         .addReg(DstReg,
221                 RegState::Define |
222                 getDeadRegState(DstIsDead && LastItem) |
223                 RenamableState)
224         .addReg(DstReg)
225         .addImm(I->Op1)
226         .addImm(I->Op2));
227       } break;
228     }
229   }
230   transferImpOps(MI, MIBS.front(), MIBS.back());
231   MI.eraseFromParent();
232   return true;
233 }
234 
expandCMP_SWAP(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned LdarOp,unsigned StlrOp,unsigned CmpOp,unsigned ExtendImm,unsigned ZeroReg,MachineBasicBlock::iterator & NextMBBI)235 bool AArch64ExpandPseudo::expandCMP_SWAP(
236     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
237     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
238     MachineBasicBlock::iterator &NextMBBI) {
239   MachineInstr &MI = *MBBI;
240   MIMetadata MIMD(MI);
241   const MachineOperand &Dest = MI.getOperand(0);
242   Register StatusReg = MI.getOperand(1).getReg();
243   bool StatusDead = MI.getOperand(1).isDead();
244   // Duplicating undef operands into 2 instructions does not guarantee the same
245   // value on both; However undef should be replaced by xzr anyway.
246   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
247   Register AddrReg = MI.getOperand(2).getReg();
248   Register DesiredReg = MI.getOperand(3).getReg();
249   Register NewReg = MI.getOperand(4).getReg();
250 
251   MachineFunction *MF = MBB.getParent();
252   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
253   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
254   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
255 
256   MF->insert(++MBB.getIterator(), LoadCmpBB);
257   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
258   MF->insert(++StoreBB->getIterator(), DoneBB);
259 
260   // .Lloadcmp:
261   //     mov wStatus, 0
262   //     ldaxr xDest, [xAddr]
263   //     cmp xDest, xDesired
264   //     b.ne .Ldone
265   if (!StatusDead)
266     BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
267       .addImm(0).addImm(0);
268   BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
269       .addReg(AddrReg);
270   BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
271       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
272       .addReg(DesiredReg)
273       .addImm(ExtendImm);
274   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
275       .addImm(AArch64CC::NE)
276       .addMBB(DoneBB)
277       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
278   LoadCmpBB->addSuccessor(DoneBB);
279   LoadCmpBB->addSuccessor(StoreBB);
280 
281   // .Lstore:
282   //     stlxr wStatus, xNew, [xAddr]
283   //     cbnz wStatus, .Lloadcmp
284   BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
285       .addReg(NewReg)
286       .addReg(AddrReg);
287   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
288       .addReg(StatusReg, getKillRegState(StatusDead))
289       .addMBB(LoadCmpBB);
290   StoreBB->addSuccessor(LoadCmpBB);
291   StoreBB->addSuccessor(DoneBB);
292 
293   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
294   DoneBB->transferSuccessors(&MBB);
295 
296   MBB.addSuccessor(LoadCmpBB);
297 
298   NextMBBI = MBB.end();
299   MI.eraseFromParent();
300 
301   // Recompute livein lists.
302   LivePhysRegs LiveRegs;
303   computeAndAddLiveIns(LiveRegs, *DoneBB);
304   computeAndAddLiveIns(LiveRegs, *StoreBB);
305   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
306   // Do an extra pass around the loop to get loop carried registers right.
307   StoreBB->clearLiveIns();
308   computeAndAddLiveIns(LiveRegs, *StoreBB);
309   LoadCmpBB->clearLiveIns();
310   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
311 
312   return true;
313 }
314 
expandCMP_SWAP_128(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)315 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
316     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
317     MachineBasicBlock::iterator &NextMBBI) {
318   MachineInstr &MI = *MBBI;
319   MIMetadata MIMD(MI);
320   MachineOperand &DestLo = MI.getOperand(0);
321   MachineOperand &DestHi = MI.getOperand(1);
322   Register StatusReg = MI.getOperand(2).getReg();
323   bool StatusDead = MI.getOperand(2).isDead();
324   // Duplicating undef operands into 2 instructions does not guarantee the same
325   // value on both; However undef should be replaced by xzr anyway.
326   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
327   Register AddrReg = MI.getOperand(3).getReg();
328   Register DesiredLoReg = MI.getOperand(4).getReg();
329   Register DesiredHiReg = MI.getOperand(5).getReg();
330   Register NewLoReg = MI.getOperand(6).getReg();
331   Register NewHiReg = MI.getOperand(7).getReg();
332 
333   unsigned LdxpOp, StxpOp;
334 
335   switch (MI.getOpcode()) {
336   case AArch64::CMP_SWAP_128_MONOTONIC:
337     LdxpOp = AArch64::LDXPX;
338     StxpOp = AArch64::STXPX;
339     break;
340   case AArch64::CMP_SWAP_128_RELEASE:
341     LdxpOp = AArch64::LDXPX;
342     StxpOp = AArch64::STLXPX;
343     break;
344   case AArch64::CMP_SWAP_128_ACQUIRE:
345     LdxpOp = AArch64::LDAXPX;
346     StxpOp = AArch64::STXPX;
347     break;
348   case AArch64::CMP_SWAP_128:
349     LdxpOp = AArch64::LDAXPX;
350     StxpOp = AArch64::STLXPX;
351     break;
352   default:
353     llvm_unreachable("Unexpected opcode");
354   }
355 
356   MachineFunction *MF = MBB.getParent();
357   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
358   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
359   auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
360   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
361 
362   MF->insert(++MBB.getIterator(), LoadCmpBB);
363   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
364   MF->insert(++StoreBB->getIterator(), FailBB);
365   MF->insert(++FailBB->getIterator(), DoneBB);
366 
367   // .Lloadcmp:
368   //     ldaxp xDestLo, xDestHi, [xAddr]
369   //     cmp xDestLo, xDesiredLo
370   //     sbcs xDestHi, xDesiredHi
371   //     b.ne .Ldone
372   BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
373       .addReg(DestLo.getReg(), RegState::Define)
374       .addReg(DestHi.getReg(), RegState::Define)
375       .addReg(AddrReg);
376   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
377       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
378       .addReg(DesiredLoReg)
379       .addImm(0);
380   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
381     .addUse(AArch64::WZR)
382     .addUse(AArch64::WZR)
383     .addImm(AArch64CC::EQ);
384   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
385       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
386       .addReg(DesiredHiReg)
387       .addImm(0);
388   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
389       .addUse(StatusReg, RegState::Kill)
390       .addUse(StatusReg, RegState::Kill)
391       .addImm(AArch64CC::EQ);
392   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
393       .addUse(StatusReg, getKillRegState(StatusDead))
394       .addMBB(FailBB);
395   LoadCmpBB->addSuccessor(FailBB);
396   LoadCmpBB->addSuccessor(StoreBB);
397 
398   // .Lstore:
399   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
400   //     cbnz wStatus, .Lloadcmp
401   BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
402       .addReg(NewLoReg)
403       .addReg(NewHiReg)
404       .addReg(AddrReg);
405   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
406       .addReg(StatusReg, getKillRegState(StatusDead))
407       .addMBB(LoadCmpBB);
408   BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
409   StoreBB->addSuccessor(LoadCmpBB);
410   StoreBB->addSuccessor(DoneBB);
411 
412   // .Lfail:
413   //     stlxp wStatus, xDestLo, xDestHi, [xAddr]
414   //     cbnz wStatus, .Lloadcmp
415   BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
416       .addReg(DestLo.getReg())
417       .addReg(DestHi.getReg())
418       .addReg(AddrReg);
419   BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
420       .addReg(StatusReg, getKillRegState(StatusDead))
421       .addMBB(LoadCmpBB);
422   FailBB->addSuccessor(LoadCmpBB);
423   FailBB->addSuccessor(DoneBB);
424 
425   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
426   DoneBB->transferSuccessors(&MBB);
427 
428   MBB.addSuccessor(LoadCmpBB);
429 
430   NextMBBI = MBB.end();
431   MI.eraseFromParent();
432 
433   // Recompute liveness bottom up.
434   LivePhysRegs LiveRegs;
435   computeAndAddLiveIns(LiveRegs, *DoneBB);
436   computeAndAddLiveIns(LiveRegs, *FailBB);
437   computeAndAddLiveIns(LiveRegs, *StoreBB);
438   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
439 
440   // Do an extra pass in the loop to get the loop carried dependencies right.
441   FailBB->clearLiveIns();
442   computeAndAddLiveIns(LiveRegs, *FailBB);
443   StoreBB->clearLiveIns();
444   computeAndAddLiveIns(LiveRegs, *StoreBB);
445   LoadCmpBB->clearLiveIns();
446   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
447 
448   return true;
449 }
450 
451 /// \brief Expand Pseudos to Instructions with destructive operands.
452 ///
453 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
454 /// or for fixing relaxed register allocation conditions to comply with
455 /// the instructions register constraints. The latter case may be cheaper
456 /// than setting the register constraints in the register allocator,
457 /// since that will insert regular MOV instructions rather than MOVPRFX.
458 ///
459 /// Example (after register allocation):
460 ///
461 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
462 ///
463 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
464 /// * We cannot map directly to FSUB_ZPmZ_B because the register
465 ///   constraints of the instruction are not met.
466 /// * Also the _ZERO specifies the false lanes need to be zeroed.
467 ///
468 /// We first try to see if the destructive operand == result operand,
469 /// if not, we try to swap the operands, e.g.
470 ///
471 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
472 ///
473 /// But because FSUB_ZPmZ is not commutative, this is semantically
474 /// different, so we need a reverse instruction:
475 ///
476 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
477 ///
478 /// Then we implement the zeroing of the false lanes of Z0 by adding
479 /// a zeroing MOVPRFX instruction:
480 ///
481 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
482 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
483 ///
484 /// Note that this can only be done for _ZERO or _UNDEF variants where
485 /// we can guarantee the false lanes to be zeroed (by implementing this)
486 /// or that they are undef (don't care / not used), otherwise the
487 /// swapping of operands is illegal because the operation is not
488 /// (or cannot be emulated to be) fully commutative.
expand_DestructiveOp(MachineInstr & MI,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)489 bool AArch64ExpandPseudo::expand_DestructiveOp(
490                             MachineInstr &MI,
491                             MachineBasicBlock &MBB,
492                             MachineBasicBlock::iterator MBBI) {
493   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
494   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
495   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
496   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
497   Register DstReg = MI.getOperand(0).getReg();
498   bool DstIsDead = MI.getOperand(0).isDead();
499   bool UseRev = false;
500   unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
501 
502   switch (DType) {
503   case AArch64::DestructiveBinaryComm:
504   case AArch64::DestructiveBinaryCommWithRev:
505     if (DstReg == MI.getOperand(3).getReg()) {
506       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
507       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
508       UseRev = true;
509       break;
510     }
511     [[fallthrough]];
512   case AArch64::DestructiveBinary:
513   case AArch64::DestructiveBinaryImm:
514     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
515     break;
516   case AArch64::DestructiveUnaryPassthru:
517     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
518     break;
519   case AArch64::DestructiveTernaryCommWithRev:
520     std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
521     if (DstReg == MI.getOperand(3).getReg()) {
522       // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
523       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
524       UseRev = true;
525     } else if (DstReg == MI.getOperand(4).getReg()) {
526       // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
527       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
528       UseRev = true;
529     }
530     break;
531   default:
532     llvm_unreachable("Unsupported Destructive Operand type");
533   }
534 
535   // MOVPRFX can only be used if the destination operand
536   // is the destructive operand, not as any other operand,
537   // so the Destructive Operand must be unique.
538   bool DOPRegIsUnique = false;
539   switch (DType) {
540   case AArch64::DestructiveBinary:
541     DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
542     break;
543   case AArch64::DestructiveBinaryComm:
544   case AArch64::DestructiveBinaryCommWithRev:
545     DOPRegIsUnique =
546       DstReg != MI.getOperand(DOPIdx).getReg() ||
547       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
548     break;
549   case AArch64::DestructiveUnaryPassthru:
550   case AArch64::DestructiveBinaryImm:
551     DOPRegIsUnique = true;
552     break;
553   case AArch64::DestructiveTernaryCommWithRev:
554     DOPRegIsUnique =
555         DstReg != MI.getOperand(DOPIdx).getReg() ||
556         (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
557          MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
558     break;
559   }
560 
561   // Resolve the reverse opcode
562   if (UseRev) {
563     int NewOpcode;
564     // e.g. DIV -> DIVR
565     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
566       Opcode = NewOpcode;
567     // e.g. DIVR -> DIV
568     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
569       Opcode = NewOpcode;
570   }
571 
572   // Get the right MOVPRFX
573   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
574   unsigned MovPrfx, LSLZero, MovPrfxZero;
575   switch (ElementSize) {
576   case AArch64::ElementSizeNone:
577   case AArch64::ElementSizeB:
578     MovPrfx = AArch64::MOVPRFX_ZZ;
579     LSLZero = AArch64::LSL_ZPmI_B;
580     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
581     break;
582   case AArch64::ElementSizeH:
583     MovPrfx = AArch64::MOVPRFX_ZZ;
584     LSLZero = AArch64::LSL_ZPmI_H;
585     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
586     break;
587   case AArch64::ElementSizeS:
588     MovPrfx = AArch64::MOVPRFX_ZZ;
589     LSLZero = AArch64::LSL_ZPmI_S;
590     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
591     break;
592   case AArch64::ElementSizeD:
593     MovPrfx = AArch64::MOVPRFX_ZZ;
594     LSLZero = AArch64::LSL_ZPmI_D;
595     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
596     break;
597   default:
598     llvm_unreachable("Unsupported ElementSize");
599   }
600 
601   //
602   // Create the destructive operation (if required)
603   //
604   MachineInstrBuilder PRFX, DOP;
605   if (FalseZero) {
606     // If we cannot prefix the requested instruction we'll instead emit a
607     // prefixed_zeroing_mov for DestructiveBinary.
608     assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
609             DType == AArch64::DestructiveBinaryComm ||
610             DType == AArch64::DestructiveBinaryCommWithRev) &&
611            "The destructive operand should be unique");
612     assert(ElementSize != AArch64::ElementSizeNone &&
613            "This instruction is unpredicated");
614 
615     // Merge source operand into destination register
616     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
617                .addReg(DstReg, RegState::Define)
618                .addReg(MI.getOperand(PredIdx).getReg())
619                .addReg(MI.getOperand(DOPIdx).getReg());
620 
621     // After the movprfx, the destructive operand is same as Dst
622     DOPIdx = 0;
623 
624     // Create the additional LSL to zero the lanes when the DstReg is not
625     // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
626     // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
627     if ((DType == AArch64::DestructiveBinary ||
628          DType == AArch64::DestructiveBinaryComm ||
629          DType == AArch64::DestructiveBinaryCommWithRev) &&
630         !DOPRegIsUnique) {
631       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
632           .addReg(DstReg, RegState::Define)
633           .add(MI.getOperand(PredIdx))
634           .addReg(DstReg)
635           .addImm(0);
636     }
637   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
638     assert(DOPRegIsUnique && "The destructive operand should be unique");
639     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
640                .addReg(DstReg, RegState::Define)
641                .addReg(MI.getOperand(DOPIdx).getReg());
642     DOPIdx = 0;
643   }
644 
645   //
646   // Create the destructive operation
647   //
648   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
649     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
650 
651   switch (DType) {
652   case AArch64::DestructiveUnaryPassthru:
653     DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
654         .add(MI.getOperand(PredIdx))
655         .add(MI.getOperand(SrcIdx));
656     break;
657   case AArch64::DestructiveBinary:
658   case AArch64::DestructiveBinaryImm:
659   case AArch64::DestructiveBinaryComm:
660   case AArch64::DestructiveBinaryCommWithRev:
661     DOP.add(MI.getOperand(PredIdx))
662        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
663        .add(MI.getOperand(SrcIdx));
664     break;
665   case AArch64::DestructiveTernaryCommWithRev:
666     DOP.add(MI.getOperand(PredIdx))
667         .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
668         .add(MI.getOperand(SrcIdx))
669         .add(MI.getOperand(Src2Idx));
670     break;
671   }
672 
673   if (PRFX) {
674     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
675     transferImpOps(MI, PRFX, DOP);
676   } else
677     transferImpOps(MI, DOP, DOP);
678 
679   MI.eraseFromParent();
680   return true;
681 }
682 
expandSetTagLoop(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)683 bool AArch64ExpandPseudo::expandSetTagLoop(
684     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
685     MachineBasicBlock::iterator &NextMBBI) {
686   MachineInstr &MI = *MBBI;
687   DebugLoc DL = MI.getDebugLoc();
688   Register SizeReg = MI.getOperand(0).getReg();
689   Register AddressReg = MI.getOperand(1).getReg();
690 
691   MachineFunction *MF = MBB.getParent();
692 
693   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
694   const unsigned OpCode1 =
695       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
696   const unsigned OpCode2 =
697       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
698 
699   unsigned Size = MI.getOperand(2).getImm();
700   assert(Size > 0 && Size % 16 == 0);
701   if (Size % (16 * 2) != 0) {
702     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
703         .addReg(AddressReg)
704         .addReg(AddressReg)
705         .addImm(1);
706     Size -= 16;
707   }
708   MachineBasicBlock::iterator I =
709       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
710           .addImm(Size);
711   expandMOVImm(MBB, I, 64);
712 
713   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
714   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
715 
716   MF->insert(++MBB.getIterator(), LoopBB);
717   MF->insert(++LoopBB->getIterator(), DoneBB);
718 
719   BuildMI(LoopBB, DL, TII->get(OpCode2))
720       .addDef(AddressReg)
721       .addReg(AddressReg)
722       .addReg(AddressReg)
723       .addImm(2)
724       .cloneMemRefs(MI)
725       .setMIFlags(MI.getFlags());
726   BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
727       .addDef(SizeReg)
728       .addReg(SizeReg)
729       .addImm(16 * 2)
730       .addImm(0);
731   BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
732       .addImm(AArch64CC::NE)
733       .addMBB(LoopBB)
734       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
735 
736   LoopBB->addSuccessor(LoopBB);
737   LoopBB->addSuccessor(DoneBB);
738 
739   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
740   DoneBB->transferSuccessors(&MBB);
741 
742   MBB.addSuccessor(LoopBB);
743 
744   NextMBBI = MBB.end();
745   MI.eraseFromParent();
746   // Recompute liveness bottom up.
747   LivePhysRegs LiveRegs;
748   computeAndAddLiveIns(LiveRegs, *DoneBB);
749   computeAndAddLiveIns(LiveRegs, *LoopBB);
750   // Do an extra pass in the loop to get the loop carried dependencies right.
751   // FIXME: is this necessary?
752   LoopBB->clearLiveIns();
753   computeAndAddLiveIns(LiveRegs, *LoopBB);
754   DoneBB->clearLiveIns();
755   computeAndAddLiveIns(LiveRegs, *DoneBB);
756 
757   return true;
758 }
759 
expandSVESpillFill(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned Opc,unsigned N)760 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
761                                              MachineBasicBlock::iterator MBBI,
762                                              unsigned Opc, unsigned N) {
763   assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
764           Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
765          "Unexpected opcode");
766   unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
767                         ? RegState::Define
768                         : 0;
769   unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
770                       ? AArch64::zsub0
771                       : AArch64::psub0;
772   const TargetRegisterInfo *TRI =
773       MBB.getParent()->getSubtarget().getRegisterInfo();
774   MachineInstr &MI = *MBBI;
775   for (unsigned Offset = 0; Offset < N; ++Offset) {
776     int ImmOffset = MI.getOperand(2).getImm() + Offset;
777     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
778     assert(ImmOffset >= -256 && ImmOffset < 256 &&
779            "Immediate spill offset out of range");
780     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
781         .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),
782                 RState)
783         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
784         .addImm(ImmOffset);
785   }
786   MI.eraseFromParent();
787   return true;
788 }
789 
790 // Create a call with the passed opcode and explicit operands, copying over all
791 // the implicit operands from *MBBI, starting at the regmask.
createCallWithOps(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const AArch64InstrInfo * TII,unsigned Opcode,ArrayRef<MachineOperand> ExplicitOps,unsigned RegMaskStartIdx)792 static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,
793                                        MachineBasicBlock::iterator MBBI,
794                                        const AArch64InstrInfo *TII,
795                                        unsigned Opcode,
796                                        ArrayRef<MachineOperand> ExplicitOps,
797                                        unsigned RegMaskStartIdx) {
798   // Build the MI, with explicit operands first (including the call target).
799   MachineInstr *Call = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode))
800                            .add(ExplicitOps)
801                            .getInstr();
802 
803   // Register arguments are added during ISel, but cannot be added as explicit
804   // operands of the branch as it expects to be B <target> which is only one
805   // operand. Instead they are implicit operands used by the branch.
806   while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) {
807     const MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx);
808     assert(MOP.isReg() && "can only add register operands");
809     Call->addOperand(MachineOperand::CreateReg(
810         MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
811         /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
812     RegMaskStartIdx++;
813   }
814   for (const MachineOperand &MO :
815        llvm::drop_begin(MBBI->operands(), RegMaskStartIdx))
816     Call->addOperand(MO);
817 
818   return Call;
819 }
820 
821 // Create a call to CallTarget, copying over all the operands from *MBBI,
822 // starting at the regmask.
createCall(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const AArch64InstrInfo * TII,MachineOperand & CallTarget,unsigned RegMaskStartIdx)823 static MachineInstr *createCall(MachineBasicBlock &MBB,
824                                 MachineBasicBlock::iterator MBBI,
825                                 const AArch64InstrInfo *TII,
826                                 MachineOperand &CallTarget,
827                                 unsigned RegMaskStartIdx) {
828   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
829 
830   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
831          "invalid operand for regular call");
832   return createCallWithOps(MBB, MBBI, TII, Opc, CallTarget, RegMaskStartIdx);
833 }
834 
expandCALL_RVMARKER(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)835 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
836     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
837   // Expand CALL_RVMARKER pseudo to:
838   // - a branch to the call target, followed by
839   // - the special `mov x29, x29` marker, and
840   // - another branch, to the runtime function
841   // Mark the sequence as bundle, to avoid passes moving other code in between.
842   MachineInstr &MI = *MBBI;
843   MachineOperand &RVTarget = MI.getOperand(0);
844   assert(RVTarget.isGlobal() && "invalid operand for attached call");
845 
846   MachineInstr *OriginalCall = nullptr;
847 
848   if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
849     // ptrauth call.
850     const MachineOperand &CallTarget = MI.getOperand(1);
851     const MachineOperand &Key = MI.getOperand(2);
852     const MachineOperand &IntDisc = MI.getOperand(3);
853     const MachineOperand &AddrDisc = MI.getOperand(4);
854 
855     assert((Key.getImm() == AArch64PACKey::IA ||
856             Key.getImm() == AArch64PACKey::IB) &&
857            "Invalid auth call key");
858 
859     MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
860 
861     OriginalCall = createCallWithOps(MBB, MBBI, TII, AArch64::BLRA, Ops,
862                                      /*RegMaskStartIdx=*/5);
863   } else {
864     assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
865     OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(1),
866                               // Regmask starts after the RV and call targets.
867                               /*RegMaskStartIdx=*/2);
868   }
869 
870   BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
871                      .addReg(AArch64::FP, RegState::Define)
872                      .addReg(AArch64::XZR)
873                      .addReg(AArch64::FP)
874                      .addImm(0);
875 
876   auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
877                      .add(RVTarget)
878                      .getInstr();
879 
880   if (MI.shouldUpdateCallSiteInfo())
881     MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
882 
883   MI.eraseFromParent();
884   finalizeBundle(MBB, OriginalCall->getIterator(),
885                  std::next(RVCall->getIterator()));
886   return true;
887 }
888 
expandCALL_BTI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)889 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
890                                          MachineBasicBlock::iterator MBBI) {
891   // Expand CALL_BTI pseudo to:
892   // - a branch to the call target
893   // - a BTI instruction
894   // Mark the sequence as a bundle, to avoid passes moving other code in
895   // between.
896   MachineInstr &MI = *MBBI;
897   MachineInstr *Call = createCall(MBB, MBBI, TII, MI.getOperand(0),
898                                   // Regmask starts after the call target.
899                                   /*RegMaskStartIdx=*/1);
900 
901   Call->setCFIType(*MBB.getParent(), MI.getCFIType());
902 
903   MachineInstr *BTI =
904       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
905           // BTI J so that setjmp can to BR to this.
906           .addImm(36)
907           .getInstr();
908 
909   if (MI.shouldUpdateCallSiteInfo())
910     MBB.getParent()->moveCallSiteInfo(&MI, Call);
911 
912   MI.eraseFromParent();
913   finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
914   return true;
915 }
916 
expandStoreSwiftAsyncContext(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)917 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
918     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
919   Register CtxReg = MBBI->getOperand(0).getReg();
920   Register BaseReg = MBBI->getOperand(1).getReg();
921   int Offset = MBBI->getOperand(2).getImm();
922   DebugLoc DL(MBBI->getDebugLoc());
923   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
924 
925   if (STI.getTargetTriple().getArchName() != "arm64e") {
926     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
927         .addUse(CtxReg)
928         .addUse(BaseReg)
929         .addImm(Offset / 8)
930         .setMIFlag(MachineInstr::FrameSetup);
931     MBBI->eraseFromParent();
932     return true;
933   }
934 
935   // We need to sign the context in an address-discriminated way. 0xc31a is a
936   // fixed random value, chosen as part of the ABI.
937   //     add x16, xBase, #Offset
938   //     movk x16, #0xc31a, lsl #48
939   //     mov x17, x22/xzr
940   //     pacdb x17, x16
941   //     str x17, [xBase, #Offset]
942   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
943   BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
944       .addUse(BaseReg)
945       .addImm(abs(Offset))
946       .addImm(0)
947       .setMIFlag(MachineInstr::FrameSetup);
948   BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
949       .addUse(AArch64::X16)
950       .addImm(0xc31a)
951       .addImm(48)
952       .setMIFlag(MachineInstr::FrameSetup);
953   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
954   // move it somewhere before signing.
955   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
956       .addUse(AArch64::XZR)
957       .addUse(CtxReg)
958       .addImm(0)
959       .setMIFlag(MachineInstr::FrameSetup);
960   BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
961       .addUse(AArch64::X17)
962       .addUse(AArch64::X16)
963       .setMIFlag(MachineInstr::FrameSetup);
964   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
965       .addUse(AArch64::X17)
966       .addUse(BaseReg)
967       .addImm(Offset / 8)
968       .setMIFlag(MachineInstr::FrameSetup);
969 
970   MBBI->eraseFromParent();
971   return true;
972 }
973 
974 MachineBasicBlock *
expandRestoreZA(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)975 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
976                                      MachineBasicBlock::iterator MBBI) {
977   MachineInstr &MI = *MBBI;
978   assert((std::next(MBBI) != MBB.end() ||
979           MI.getParent()->successors().begin() !=
980               MI.getParent()->successors().end()) &&
981          "Unexpected unreachable in block that restores ZA");
982 
983   // Compare TPIDR2_EL0 value against 0.
984   DebugLoc DL = MI.getDebugLoc();
985   MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
986                                 .add(MI.getOperand(0));
987 
988   // Split MBB and create two new blocks:
989   //  - MBB now contains all instructions before RestoreZAPseudo.
990   //  - SMBB contains the RestoreZAPseudo instruction only.
991   //  - EndBB contains all instructions after RestoreZAPseudo.
992   MachineInstr &PrevMI = *std::prev(MBBI);
993   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
994   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
995                                  ? *SMBB->successors().begin()
996                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
997 
998   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
999   Cbz.addMBB(SMBB);
1000   BuildMI(&MBB, DL, TII->get(AArch64::B))
1001       .addMBB(EndBB);
1002   MBB.addSuccessor(EndBB);
1003 
1004   // Replace the pseudo with a call (BL).
1005   MachineInstrBuilder MIB =
1006       BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
1007   MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
1008   for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1009     MIB.add(MI.getOperand(I));
1010   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1011 
1012   MI.eraseFromParent();
1013   return EndBB;
1014 }
1015 
1016 MachineBasicBlock *
expandCondSMToggle(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)1017 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
1018                                         MachineBasicBlock::iterator MBBI) {
1019   MachineInstr &MI = *MBBI;
1020   // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1021   // Exception handling code generated by Clang may introduce unreachables and it
1022   // seems unnecessary to restore pstate.sm when that happens. Note that it is
1023   // not just an optimisation, the code below expects a successor instruction/block
1024   // in order to split the block at MBBI.
1025   if (std::next(MBBI) == MBB.end() &&
1026       MI.getParent()->successors().begin() ==
1027           MI.getParent()->successors().end()) {
1028     MI.eraseFromParent();
1029     return &MBB;
1030   }
1031 
1032   // Expand the pseudo into smstart or smstop instruction. The pseudo has the
1033   // following operands:
1034   //
1035   //   MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1036   //
1037   // The pseudo is expanded into a conditional smstart/smstop, with a
1038   // check if pstate.sm (register) equals the expected value, and if not,
1039   // invokes the smstart/smstop.
1040   //
1041   // As an example, the following block contains a normal call from a
1042   // streaming-compatible function:
1043   //
1044   // OrigBB:
1045   //   MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask>  <- Cond SMSTOP
1046   //   bl @normal_callee
1047   //   MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask>  <- Cond SMSTART
1048   //
1049   // ...which will be transformed into:
1050   //
1051   // OrigBB:
1052   //   TBNZx %0:gpr64, 0, SMBB
1053   //   b EndBB
1054   //
1055   // SMBB:
1056   //   MSRpstatesvcrImm1 3, 0, <regmask>                  <- SMSTOP
1057   //
1058   // EndBB:
1059   //   bl @normal_callee
1060   //   MSRcond_pstatesvcrImm1 3, 1, <regmask>             <- SMSTART
1061   //
1062   DebugLoc DL = MI.getDebugLoc();
1063 
1064   // Create the conditional branch based on the third operand of the
1065   // instruction, which tells us if we are wrapping a normal or streaming
1066   // function.
1067   // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1068   // expected value for the callee (0 for a normal callee and 1 for a streaming
1069   // callee).
1070   unsigned Opc;
1071   switch (MI.getOperand(2).getImm()) {
1072   case AArch64SME::Always:
1073     llvm_unreachable("Should have matched to instruction directly");
1074   case AArch64SME::IfCallerIsStreaming:
1075     Opc = AArch64::TBNZW;
1076     break;
1077   case AArch64SME::IfCallerIsNonStreaming:
1078     Opc = AArch64::TBZW;
1079     break;
1080   }
1081   auto PStateSM = MI.getOperand(3).getReg();
1082   auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1083   unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
1084   MachineInstrBuilder Tbx =
1085       BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
1086 
1087   // Split MBB and create two new blocks:
1088   //  - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1089   //  - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1090   //  - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1091   MachineInstr &PrevMI = *std::prev(MBBI);
1092   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
1093   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
1094                                  ? *SMBB->successors().begin()
1095                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
1096 
1097   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1098   Tbx.addMBB(SMBB);
1099   BuildMI(&MBB, DL, TII->get(AArch64::B))
1100       .addMBB(EndBB);
1101   MBB.addSuccessor(EndBB);
1102 
1103   // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1104   MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
1105                                     TII->get(AArch64::MSRpstatesvcrImm1));
1106   // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1107   // these contain the CopyFromReg for the first argument and the flag to
1108   // indicate whether the callee is streaming or normal).
1109   MIB.add(MI.getOperand(0));
1110   MIB.add(MI.getOperand(1));
1111   for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1112     MIB.add(MI.getOperand(i));
1113 
1114   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1115 
1116   MI.eraseFromParent();
1117   return EndBB;
1118 }
1119 
expandMultiVecPseudo(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,TargetRegisterClass ContiguousClass,TargetRegisterClass StridedClass,unsigned ContiguousOp,unsigned StridedOpc)1120 bool AArch64ExpandPseudo::expandMultiVecPseudo(
1121     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1122     TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1123     unsigned ContiguousOp, unsigned StridedOpc) {
1124   MachineInstr &MI = *MBBI;
1125   Register Tuple = MI.getOperand(0).getReg();
1126 
1127   auto ContiguousRange = ContiguousClass.getRegisters();
1128   auto StridedRange = StridedClass.getRegisters();
1129   unsigned Opc;
1130   if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {
1131     Opc = ContiguousOp;
1132   } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {
1133     Opc = StridedOpc;
1134   } else
1135     llvm_unreachable("Cannot expand Multi-Vector pseudo");
1136 
1137   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1138                                 .add(MI.getOperand(0))
1139                                 .add(MI.getOperand(1))
1140                                 .add(MI.getOperand(2))
1141                                 .add(MI.getOperand(3));
1142   transferImpOps(MI, MIB, MIB);
1143   MI.eraseFromParent();
1144   return true;
1145 }
1146 
1147 /// If MBBI references a pseudo instruction that should be expanded here,
1148 /// do the expansion and return true.  Otherwise return false.
expandMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)1149 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1150                                    MachineBasicBlock::iterator MBBI,
1151                                    MachineBasicBlock::iterator &NextMBBI) {
1152   MachineInstr &MI = *MBBI;
1153   unsigned Opcode = MI.getOpcode();
1154 
1155   // Check if we can expand the destructive op
1156   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1157   if (OrigInstr != -1) {
1158     auto &Orig = TII->get(OrigInstr);
1159     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1160         AArch64::NotDestructive) {
1161       return expand_DestructiveOp(MI, MBB, MBBI);
1162     }
1163   }
1164 
1165   switch (Opcode) {
1166   default:
1167     break;
1168 
1169   case AArch64::BSPv8i8:
1170   case AArch64::BSPv16i8: {
1171     Register DstReg = MI.getOperand(0).getReg();
1172     if (DstReg == MI.getOperand(3).getReg()) {
1173       // Expand to BIT
1174       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1175               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1176                                                   : AArch64::BITv16i8))
1177           .add(MI.getOperand(0))
1178           .add(MI.getOperand(3))
1179           .add(MI.getOperand(2))
1180           .add(MI.getOperand(1));
1181     } else if (DstReg == MI.getOperand(2).getReg()) {
1182       // Expand to BIF
1183       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1184               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1185                                                   : AArch64::BIFv16i8))
1186           .add(MI.getOperand(0))
1187           .add(MI.getOperand(2))
1188           .add(MI.getOperand(3))
1189           .add(MI.getOperand(1));
1190     } else {
1191       // Expand to BSL, use additional move if required
1192       if (DstReg == MI.getOperand(1).getReg()) {
1193         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1194                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1195                                                     : AArch64::BSLv16i8))
1196             .add(MI.getOperand(0))
1197             .add(MI.getOperand(1))
1198             .add(MI.getOperand(2))
1199             .add(MI.getOperand(3));
1200       } else {
1201         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1202                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1203                                                     : AArch64::ORRv16i8))
1204             .addReg(DstReg,
1205                     RegState::Define |
1206                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1207             .add(MI.getOperand(1))
1208             .add(MI.getOperand(1));
1209         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1210                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1211                                                     : AArch64::BSLv16i8))
1212             .add(MI.getOperand(0))
1213             .addReg(DstReg,
1214                     RegState::Kill |
1215                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1216             .add(MI.getOperand(2))
1217             .add(MI.getOperand(3));
1218       }
1219     }
1220     MI.eraseFromParent();
1221     return true;
1222   }
1223 
1224   case AArch64::ADDWrr:
1225   case AArch64::SUBWrr:
1226   case AArch64::ADDXrr:
1227   case AArch64::SUBXrr:
1228   case AArch64::ADDSWrr:
1229   case AArch64::SUBSWrr:
1230   case AArch64::ADDSXrr:
1231   case AArch64::SUBSXrr:
1232   case AArch64::ANDWrr:
1233   case AArch64::ANDXrr:
1234   case AArch64::BICWrr:
1235   case AArch64::BICXrr:
1236   case AArch64::ANDSWrr:
1237   case AArch64::ANDSXrr:
1238   case AArch64::BICSWrr:
1239   case AArch64::BICSXrr:
1240   case AArch64::EONWrr:
1241   case AArch64::EONXrr:
1242   case AArch64::EORWrr:
1243   case AArch64::EORXrr:
1244   case AArch64::ORNWrr:
1245   case AArch64::ORNXrr:
1246   case AArch64::ORRWrr:
1247   case AArch64::ORRXrr: {
1248     unsigned Opcode;
1249     switch (MI.getOpcode()) {
1250     default:
1251       return false;
1252     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
1253     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
1254     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
1255     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
1256     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
1257     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
1258     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
1259     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
1260     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
1261     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
1262     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
1263     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
1264     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
1265     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
1266     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
1267     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
1268     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
1269     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
1270     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
1271     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
1272     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
1273     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
1274     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
1275     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
1276     }
1277     MachineFunction &MF = *MBB.getParent();
1278     // Try to create new inst without implicit operands added.
1279     MachineInstr *NewMI = MF.CreateMachineInstr(
1280         TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1281     MBB.insert(MBBI, NewMI);
1282     MachineInstrBuilder MIB1(MF, NewMI);
1283     MIB1->setPCSections(MF, MI.getPCSections());
1284     MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1285         .add(MI.getOperand(1))
1286         .add(MI.getOperand(2))
1287         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1288     transferImpOps(MI, MIB1, MIB1);
1289     if (auto DebugNumber = MI.peekDebugInstrNum())
1290       NewMI->setDebugInstrNum(DebugNumber);
1291     MI.eraseFromParent();
1292     return true;
1293   }
1294 
1295   case AArch64::LOADgot: {
1296     MachineFunction *MF = MBB.getParent();
1297     Register DstReg = MI.getOperand(0).getReg();
1298     const MachineOperand &MO1 = MI.getOperand(1);
1299     unsigned Flags = MO1.getTargetFlags();
1300 
1301     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1302       // Tiny codemodel expand to LDR
1303       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1304                                         TII->get(AArch64::LDRXl), DstReg);
1305 
1306       if (MO1.isGlobal()) {
1307         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1308       } else if (MO1.isSymbol()) {
1309         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1310       } else {
1311         assert(MO1.isCPI() &&
1312                "Only expect globals, externalsymbols, or constant pools");
1313         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1314       }
1315     } else {
1316       // Small codemodel expand into ADRP + LDR.
1317       MachineFunction &MF = *MI.getParent()->getParent();
1318       DebugLoc DL = MI.getDebugLoc();
1319       MachineInstrBuilder MIB1 =
1320           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1321 
1322       MachineInstrBuilder MIB2;
1323       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1324         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1325         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1326         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1327         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1328                    .addDef(Reg32)
1329                    .addReg(DstReg, RegState::Kill)
1330                    .addReg(DstReg, DstFlags | RegState::Implicit);
1331       } else {
1332         Register DstReg = MI.getOperand(0).getReg();
1333         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1334                    .add(MI.getOperand(0))
1335                    .addUse(DstReg, RegState::Kill);
1336       }
1337 
1338       if (MO1.isGlobal()) {
1339         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1340         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1341                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1342       } else if (MO1.isSymbol()) {
1343         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1344         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1345                                                         AArch64II::MO_PAGEOFF |
1346                                                         AArch64II::MO_NC);
1347       } else {
1348         assert(MO1.isCPI() &&
1349                "Only expect globals, externalsymbols, or constant pools");
1350         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1351                                   Flags | AArch64II::MO_PAGE);
1352         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1353                                   Flags | AArch64II::MO_PAGEOFF |
1354                                       AArch64II::MO_NC);
1355       }
1356 
1357       transferImpOps(MI, MIB1, MIB2);
1358     }
1359     MI.eraseFromParent();
1360     return true;
1361   }
1362   case AArch64::MOVaddrBA: {
1363     MachineFunction &MF = *MI.getParent()->getParent();
1364     if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1365       // blockaddress expressions have to come from a constant pool because the
1366       // largest addend (and hence offset within a function) allowed for ADRP is
1367       // only 8MB.
1368       const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1369       assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1370 
1371       MachineConstantPool *MCP = MF.getConstantPool();
1372       unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1373 
1374       Register DstReg = MI.getOperand(0).getReg();
1375       auto MIB1 =
1376           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1377               .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1378       auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1379                           TII->get(AArch64::LDRXui), DstReg)
1380                       .addUse(DstReg)
1381                       .addConstantPoolIndex(
1382                           CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1383       transferImpOps(MI, MIB1, MIB2);
1384       MI.eraseFromParent();
1385       return true;
1386     }
1387   }
1388     [[fallthrough]];
1389   case AArch64::MOVaddr:
1390   case AArch64::MOVaddrJT:
1391   case AArch64::MOVaddrCP:
1392   case AArch64::MOVaddrTLS:
1393   case AArch64::MOVaddrEXT: {
1394     // Expand into ADRP + ADD.
1395     Register DstReg = MI.getOperand(0).getReg();
1396     assert(DstReg != AArch64::XZR);
1397     MachineInstrBuilder MIB1 =
1398         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1399             .add(MI.getOperand(1));
1400 
1401     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1402       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1403       // We do so by creating a MOVK that sets bits 48-63 of the register to
1404       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1405       // the small code model so we can assume a binary size of <= 4GB, which
1406       // makes the untagged PC relative offset positive. The binary must also be
1407       // loaded into address range [0, 2^48). Both of these properties need to
1408       // be ensured at runtime when using tagged addresses.
1409       auto Tag = MI.getOperand(1);
1410       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1411       Tag.setOffset(0x100000000);
1412       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1413           .addReg(DstReg)
1414           .add(Tag)
1415           .addImm(48);
1416     }
1417 
1418     MachineInstrBuilder MIB2 =
1419         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1420             .add(MI.getOperand(0))
1421             .addReg(DstReg)
1422             .add(MI.getOperand(2))
1423             .addImm(0);
1424 
1425     transferImpOps(MI, MIB1, MIB2);
1426     MI.eraseFromParent();
1427     return true;
1428   }
1429   case AArch64::ADDlowTLS:
1430     // Produce a plain ADD
1431     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1432         .add(MI.getOperand(0))
1433         .add(MI.getOperand(1))
1434         .add(MI.getOperand(2))
1435         .addImm(0);
1436     MI.eraseFromParent();
1437     return true;
1438 
1439   case AArch64::MOVbaseTLS: {
1440     Register DstReg = MI.getOperand(0).getReg();
1441     auto SysReg = AArch64SysReg::TPIDR_EL0;
1442     MachineFunction *MF = MBB.getParent();
1443     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1444       SysReg = AArch64SysReg::TPIDR_EL3;
1445     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1446       SysReg = AArch64SysReg::TPIDR_EL2;
1447     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1448       SysReg = AArch64SysReg::TPIDR_EL1;
1449     else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1450       SysReg = AArch64SysReg::TPIDRRO_EL0;
1451     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1452         .addImm(SysReg);
1453     MI.eraseFromParent();
1454     return true;
1455   }
1456 
1457   case AArch64::MOVi32imm:
1458     return expandMOVImm(MBB, MBBI, 32);
1459   case AArch64::MOVi64imm:
1460     return expandMOVImm(MBB, MBBI, 64);
1461   case AArch64::RET_ReallyLR: {
1462     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1463     // function and missing live-ins. We are fine in practice because callee
1464     // saved register handling ensures the register value is restored before
1465     // RET, but we need the undef flag here to appease the MachineVerifier
1466     // liveness checks.
1467     MachineInstrBuilder MIB =
1468         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1469           .addReg(AArch64::LR, RegState::Undef);
1470     transferImpOps(MI, MIB, MIB);
1471     MI.eraseFromParent();
1472     return true;
1473   }
1474   case AArch64::CMP_SWAP_8:
1475     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1476                           AArch64::SUBSWrx,
1477                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1478                           AArch64::WZR, NextMBBI);
1479   case AArch64::CMP_SWAP_16:
1480     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1481                           AArch64::SUBSWrx,
1482                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1483                           AArch64::WZR, NextMBBI);
1484   case AArch64::CMP_SWAP_32:
1485     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1486                           AArch64::SUBSWrs,
1487                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1488                           AArch64::WZR, NextMBBI);
1489   case AArch64::CMP_SWAP_64:
1490     return expandCMP_SWAP(MBB, MBBI,
1491                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1492                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1493                           AArch64::XZR, NextMBBI);
1494   case AArch64::CMP_SWAP_128:
1495   case AArch64::CMP_SWAP_128_RELEASE:
1496   case AArch64::CMP_SWAP_128_ACQUIRE:
1497   case AArch64::CMP_SWAP_128_MONOTONIC:
1498     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1499 
1500   case AArch64::AESMCrrTied:
1501   case AArch64::AESIMCrrTied: {
1502     MachineInstrBuilder MIB =
1503     BuildMI(MBB, MBBI, MI.getDebugLoc(),
1504             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1505                                                       AArch64::AESIMCrr))
1506       .add(MI.getOperand(0))
1507       .add(MI.getOperand(1));
1508     transferImpOps(MI, MIB, MIB);
1509     MI.eraseFromParent();
1510     return true;
1511    }
1512    case AArch64::IRGstack: {
1513      MachineFunction &MF = *MBB.getParent();
1514      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1515      const AArch64FrameLowering *TFI =
1516          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1517 
1518      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1519      // almost always point to SP-after-prologue; if not, emit a longer
1520      // instruction sequence.
1521      int BaseOffset = -AFI->getTaggedBasePointerOffset();
1522      Register FrameReg;
1523      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1524          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1525          /*PreferFP=*/false,
1526          /*ForSimm=*/true);
1527      Register SrcReg = FrameReg;
1528      if (FrameRegOffset) {
1529        // Use output register as temporary.
1530        SrcReg = MI.getOperand(0).getReg();
1531        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1532                        FrameRegOffset, TII);
1533      }
1534      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1535          .add(MI.getOperand(0))
1536          .addUse(SrcReg)
1537          .add(MI.getOperand(2));
1538      MI.eraseFromParent();
1539      return true;
1540    }
1541    case AArch64::TAGPstack: {
1542      int64_t Offset = MI.getOperand(2).getImm();
1543      BuildMI(MBB, MBBI, MI.getDebugLoc(),
1544              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1545          .add(MI.getOperand(0))
1546          .add(MI.getOperand(1))
1547          .addImm(std::abs(Offset))
1548          .add(MI.getOperand(4));
1549      MI.eraseFromParent();
1550      return true;
1551    }
1552    case AArch64::STGloop_wback:
1553    case AArch64::STZGloop_wback:
1554      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1555    case AArch64::STGloop:
1556    case AArch64::STZGloop:
1557      report_fatal_error(
1558          "Non-writeback variants of STGloop / STZGloop should not "
1559          "survive past PrologEpilogInserter.");
1560    case AArch64::STR_ZZZZXI:
1561      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1562    case AArch64::STR_ZZZXI:
1563      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1564    case AArch64::STR_ZZXI:
1565      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1566    case AArch64::STR_PPXI:
1567      return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
1568    case AArch64::LDR_ZZZZXI:
1569      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1570    case AArch64::LDR_ZZZXI:
1571      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1572    case AArch64::LDR_ZZXI:
1573      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1574    case AArch64::LDR_PPXI:
1575      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
1576    case AArch64::BLR_RVMARKER:
1577    case AArch64::BLRA_RVMARKER:
1578      return expandCALL_RVMARKER(MBB, MBBI);
1579    case AArch64::BLR_BTI:
1580      return expandCALL_BTI(MBB, MBBI);
1581    case AArch64::StoreSwiftAsyncContext:
1582      return expandStoreSwiftAsyncContext(MBB, MBBI);
1583    case AArch64::RestoreZAPseudo: {
1584      auto *NewMBB = expandRestoreZA(MBB, MBBI);
1585      if (NewMBB != &MBB)
1586        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1587      return true;
1588    }
1589    case AArch64::MSRpstatePseudo: {
1590      auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1591      if (NewMBB != &MBB)
1592        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1593      return true;
1594    }
1595    case AArch64::COALESCER_BARRIER_FPR16:
1596    case AArch64::COALESCER_BARRIER_FPR32:
1597    case AArch64::COALESCER_BARRIER_FPR64:
1598    case AArch64::COALESCER_BARRIER_FPR128:
1599      MI.eraseFromParent();
1600      return true;
1601    case AArch64::LD1B_2Z_IMM_PSEUDO:
1602      return expandMultiVecPseudo(
1603          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1604          AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1605    case AArch64::LD1H_2Z_IMM_PSEUDO:
1606      return expandMultiVecPseudo(
1607          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1608          AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1609    case AArch64::LD1W_2Z_IMM_PSEUDO:
1610      return expandMultiVecPseudo(
1611          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1612          AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1613    case AArch64::LD1D_2Z_IMM_PSEUDO:
1614      return expandMultiVecPseudo(
1615          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1616          AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1617    case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1618      return expandMultiVecPseudo(
1619          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1620          AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1621    case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1622      return expandMultiVecPseudo(
1623          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1624          AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1625    case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1626      return expandMultiVecPseudo(
1627          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1628          AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1629    case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1630      return expandMultiVecPseudo(
1631          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1632          AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1633    case AArch64::LD1B_2Z_PSEUDO:
1634      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1635                                  AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1636                                  AArch64::LD1B_2Z_STRIDED);
1637    case AArch64::LD1H_2Z_PSEUDO:
1638      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1639                                  AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1640                                  AArch64::LD1H_2Z_STRIDED);
1641    case AArch64::LD1W_2Z_PSEUDO:
1642      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1643                                  AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1644                                  AArch64::LD1W_2Z_STRIDED);
1645    case AArch64::LD1D_2Z_PSEUDO:
1646      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1647                                  AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1648                                  AArch64::LD1D_2Z_STRIDED);
1649    case AArch64::LDNT1B_2Z_PSEUDO:
1650      return expandMultiVecPseudo(
1651          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1652          AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1653    case AArch64::LDNT1H_2Z_PSEUDO:
1654      return expandMultiVecPseudo(
1655          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1656          AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1657    case AArch64::LDNT1W_2Z_PSEUDO:
1658      return expandMultiVecPseudo(
1659          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1660          AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1661    case AArch64::LDNT1D_2Z_PSEUDO:
1662      return expandMultiVecPseudo(
1663          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1664          AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1665    case AArch64::LD1B_4Z_IMM_PSEUDO:
1666      return expandMultiVecPseudo(
1667          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1668          AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1669    case AArch64::LD1H_4Z_IMM_PSEUDO:
1670      return expandMultiVecPseudo(
1671          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1672          AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1673    case AArch64::LD1W_4Z_IMM_PSEUDO:
1674      return expandMultiVecPseudo(
1675          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1676          AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1677    case AArch64::LD1D_4Z_IMM_PSEUDO:
1678      return expandMultiVecPseudo(
1679          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1680          AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1681    case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1682      return expandMultiVecPseudo(
1683          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1684          AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1685    case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1686      return expandMultiVecPseudo(
1687          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1688          AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1689    case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1690      return expandMultiVecPseudo(
1691          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1692          AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1693    case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1694      return expandMultiVecPseudo(
1695          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1696          AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1697    case AArch64::LD1B_4Z_PSEUDO:
1698      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1699                                  AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1700                                  AArch64::LD1B_4Z_STRIDED);
1701    case AArch64::LD1H_4Z_PSEUDO:
1702      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1703                                  AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1704                                  AArch64::LD1H_4Z_STRIDED);
1705    case AArch64::LD1W_4Z_PSEUDO:
1706      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1707                                  AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1708                                  AArch64::LD1W_4Z_STRIDED);
1709    case AArch64::LD1D_4Z_PSEUDO:
1710      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1711                                  AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1712                                  AArch64::LD1D_4Z_STRIDED);
1713    case AArch64::LDNT1B_4Z_PSEUDO:
1714      return expandMultiVecPseudo(
1715          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1716          AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1717    case AArch64::LDNT1H_4Z_PSEUDO:
1718      return expandMultiVecPseudo(
1719          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1720          AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1721    case AArch64::LDNT1W_4Z_PSEUDO:
1722      return expandMultiVecPseudo(
1723          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1724          AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1725    case AArch64::LDNT1D_4Z_PSEUDO:
1726      return expandMultiVecPseudo(
1727          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1728          AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1729   }
1730   return false;
1731 }
1732 
1733 /// Iterate over the instructions in basic block MBB and expand any
1734 /// pseudo instructions.  Return true if anything was modified.
expandMBB(MachineBasicBlock & MBB)1735 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1736   bool Modified = false;
1737 
1738   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1739   while (MBBI != E) {
1740     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1741     Modified |= expandMI(MBB, MBBI, NMBBI);
1742     MBBI = NMBBI;
1743   }
1744 
1745   return Modified;
1746 }
1747 
runOnMachineFunction(MachineFunction & MF)1748 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1749   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1750 
1751   bool Modified = false;
1752   for (auto &MBB : MF)
1753     Modified |= expandMBB(MBB);
1754   return Modified;
1755 }
1756 
1757 /// Returns an instance of the pseudo instruction expansion pass.
createAArch64ExpandPseudoPass()1758 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1759   return new AArch64ExpandPseudo();
1760 }
1761