xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp (revision d56accc7c3dcc897489b6a07834763a03b9f3d68)
1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
14 #include "AArch64MachineFunctionInfo.h"
15 #include "AArch64Subtarget.h"
16 #include "MCTargetDesc/AArch64AddressingModes.h"
17 #include "Utils/AArch64BaseInfo.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineMemOperand.h"
27 #include "llvm/CodeGen/MachineModuleInfo.h"
28 #include "llvm/CodeGen/MachineOperand.h"
29 #include "llvm/CodeGen/MachineRegisterInfo.h"
30 #include "llvm/CodeGen/StackMaps.h"
31 #include "llvm/CodeGen/TargetRegisterInfo.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugInfoMetadata.h"
34 #include "llvm/IR/DebugLoc.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/MC/MCAsmInfo.h"
37 #include "llvm/MC/MCInst.h"
38 #include "llvm/MC/MCInstBuilder.h"
39 #include "llvm/MC/MCInstrDesc.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/CodeGen.h"
42 #include "llvm/Support/CommandLine.h"
43 #include "llvm/Support/Compiler.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/MathExtras.h"
46 #include "llvm/Target/TargetMachine.h"
47 #include "llvm/Target/TargetOptions.h"
48 #include <cassert>
49 #include <cstdint>
50 #include <iterator>
51 #include <utility>
52 
53 using namespace llvm;
54 
55 #define GET_INSTRINFO_CTOR_DTOR
56 #include "AArch64GenInstrInfo.inc"
57 
58 static cl::opt<unsigned> TBZDisplacementBits(
59     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
60     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
61 
62 static cl::opt<unsigned> CBZDisplacementBits(
63     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
64     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
65 
66 static cl::opt<unsigned>
67     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
68                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
69 
70 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
71     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
72                           AArch64::CATCHRET),
73       RI(STI.getTargetTriple()), Subtarget(STI) {}
74 
75 /// GetInstSize - Return the number of bytes of code the specified
76 /// instruction may be.  This returns the maximum number of bytes.
77 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
78   const MachineBasicBlock &MBB = *MI.getParent();
79   const MachineFunction *MF = MBB.getParent();
80   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
81 
82   {
83     auto Op = MI.getOpcode();
84     if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
85       return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
86   }
87 
88   // Meta-instructions emit no code.
89   if (MI.isMetaInstruction())
90     return 0;
91 
92   // FIXME: We currently only handle pseudoinstructions that don't get expanded
93   //        before the assembly printer.
94   unsigned NumBytes = 0;
95   const MCInstrDesc &Desc = MI.getDesc();
96 
97   // Size should be preferably set in
98   // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
99   // Specific cases handle instructions of variable sizes
100   switch (Desc.getOpcode()) {
101   default:
102     if (Desc.getSize())
103       return Desc.getSize();
104 
105     // Anything not explicitly designated otherwise (i.e. pseudo-instructions
106     // with fixed constant size but not specified in .td file) is a normal
107     // 4-byte insn.
108     NumBytes = 4;
109     break;
110   case TargetOpcode::STACKMAP:
111     // The upper bound for a stackmap intrinsic is the full length of its shadow
112     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
113     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
114     break;
115   case TargetOpcode::PATCHPOINT:
116     // The size of the patchpoint intrinsic is the number of bytes requested
117     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
118     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
119     break;
120   case TargetOpcode::STATEPOINT:
121     NumBytes = StatepointOpers(&MI).getNumPatchBytes();
122     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
123     // No patch bytes means a normal call inst is emitted
124     if (NumBytes == 0)
125       NumBytes = 4;
126     break;
127   case AArch64::SPACE:
128     NumBytes = MI.getOperand(1).getImm();
129     break;
130   case TargetOpcode::BUNDLE:
131     NumBytes = getInstBundleLength(MI);
132     break;
133   }
134 
135   return NumBytes;
136 }
137 
138 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
139   unsigned Size = 0;
140   MachineBasicBlock::const_instr_iterator I = MI.getIterator();
141   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
142   while (++I != E && I->isInsideBundle()) {
143     assert(!I->isBundle() && "No nested bundle!");
144     Size += getInstSizeInBytes(*I);
145   }
146   return Size;
147 }
148 
149 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
150                             SmallVectorImpl<MachineOperand> &Cond) {
151   // Block ends with fall-through condbranch.
152   switch (LastInst->getOpcode()) {
153   default:
154     llvm_unreachable("Unknown branch instruction?");
155   case AArch64::Bcc:
156     Target = LastInst->getOperand(1).getMBB();
157     Cond.push_back(LastInst->getOperand(0));
158     break;
159   case AArch64::CBZW:
160   case AArch64::CBZX:
161   case AArch64::CBNZW:
162   case AArch64::CBNZX:
163     Target = LastInst->getOperand(1).getMBB();
164     Cond.push_back(MachineOperand::CreateImm(-1));
165     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
166     Cond.push_back(LastInst->getOperand(0));
167     break;
168   case AArch64::TBZW:
169   case AArch64::TBZX:
170   case AArch64::TBNZW:
171   case AArch64::TBNZX:
172     Target = LastInst->getOperand(2).getMBB();
173     Cond.push_back(MachineOperand::CreateImm(-1));
174     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
175     Cond.push_back(LastInst->getOperand(0));
176     Cond.push_back(LastInst->getOperand(1));
177   }
178 }
179 
180 static unsigned getBranchDisplacementBits(unsigned Opc) {
181   switch (Opc) {
182   default:
183     llvm_unreachable("unexpected opcode!");
184   case AArch64::B:
185     return 64;
186   case AArch64::TBNZW:
187   case AArch64::TBZW:
188   case AArch64::TBNZX:
189   case AArch64::TBZX:
190     return TBZDisplacementBits;
191   case AArch64::CBNZW:
192   case AArch64::CBZW:
193   case AArch64::CBNZX:
194   case AArch64::CBZX:
195     return CBZDisplacementBits;
196   case AArch64::Bcc:
197     return BCCDisplacementBits;
198   }
199 }
200 
201 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
202                                              int64_t BrOffset) const {
203   unsigned Bits = getBranchDisplacementBits(BranchOp);
204   assert(Bits >= 3 && "max branch displacement must be enough to jump"
205                       "over conditional branch expansion");
206   return isIntN(Bits, BrOffset / 4);
207 }
208 
209 MachineBasicBlock *
210 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
211   switch (MI.getOpcode()) {
212   default:
213     llvm_unreachable("unexpected opcode!");
214   case AArch64::B:
215     return MI.getOperand(0).getMBB();
216   case AArch64::TBZW:
217   case AArch64::TBNZW:
218   case AArch64::TBZX:
219   case AArch64::TBNZX:
220     return MI.getOperand(2).getMBB();
221   case AArch64::CBZW:
222   case AArch64::CBNZW:
223   case AArch64::CBZX:
224   case AArch64::CBNZX:
225   case AArch64::Bcc:
226     return MI.getOperand(1).getMBB();
227   }
228 }
229 
230 // Branch analysis.
231 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
232                                      MachineBasicBlock *&TBB,
233                                      MachineBasicBlock *&FBB,
234                                      SmallVectorImpl<MachineOperand> &Cond,
235                                      bool AllowModify) const {
236   // If the block has no terminators, it just falls into the block after it.
237   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
238   if (I == MBB.end())
239     return false;
240 
241   // Skip over SpeculationBarrierEndBB terminators
242   if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
243       I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
244     --I;
245   }
246 
247   if (!isUnpredicatedTerminator(*I))
248     return false;
249 
250   // Get the last instruction in the block.
251   MachineInstr *LastInst = &*I;
252 
253   // If there is only one terminator instruction, process it.
254   unsigned LastOpc = LastInst->getOpcode();
255   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
256     if (isUncondBranchOpcode(LastOpc)) {
257       TBB = LastInst->getOperand(0).getMBB();
258       return false;
259     }
260     if (isCondBranchOpcode(LastOpc)) {
261       // Block ends with fall-through condbranch.
262       parseCondBranch(LastInst, TBB, Cond);
263       return false;
264     }
265     return true; // Can't handle indirect branch.
266   }
267 
268   // Get the instruction before it if it is a terminator.
269   MachineInstr *SecondLastInst = &*I;
270   unsigned SecondLastOpc = SecondLastInst->getOpcode();
271 
272   // If AllowModify is true and the block ends with two or more unconditional
273   // branches, delete all but the first unconditional branch.
274   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
275     while (isUncondBranchOpcode(SecondLastOpc)) {
276       LastInst->eraseFromParent();
277       LastInst = SecondLastInst;
278       LastOpc = LastInst->getOpcode();
279       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
280         // Return now the only terminator is an unconditional branch.
281         TBB = LastInst->getOperand(0).getMBB();
282         return false;
283       } else {
284         SecondLastInst = &*I;
285         SecondLastOpc = SecondLastInst->getOpcode();
286       }
287     }
288   }
289 
290   // If we're allowed to modify and the block ends in a unconditional branch
291   // which could simply fallthrough, remove the branch.  (Note: This case only
292   // matters when we can't understand the whole sequence, otherwise it's also
293   // handled by BranchFolding.cpp.)
294   if (AllowModify && isUncondBranchOpcode(LastOpc) &&
295       MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
296     LastInst->eraseFromParent();
297     LastInst = SecondLastInst;
298     LastOpc = LastInst->getOpcode();
299     if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
300       assert(!isUncondBranchOpcode(LastOpc) &&
301              "unreachable unconditional branches removed above");
302 
303       if (isCondBranchOpcode(LastOpc)) {
304         // Block ends with fall-through condbranch.
305         parseCondBranch(LastInst, TBB, Cond);
306         return false;
307       }
308       return true; // Can't handle indirect branch.
309     } else {
310       SecondLastInst = &*I;
311       SecondLastOpc = SecondLastInst->getOpcode();
312     }
313   }
314 
315   // If there are three terminators, we don't know what sort of block this is.
316   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
317     return true;
318 
319   // If the block ends with a B and a Bcc, handle it.
320   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
321     parseCondBranch(SecondLastInst, TBB, Cond);
322     FBB = LastInst->getOperand(0).getMBB();
323     return false;
324   }
325 
326   // If the block ends with two unconditional branches, handle it.  The second
327   // one is not executed, so remove it.
328   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
329     TBB = SecondLastInst->getOperand(0).getMBB();
330     I = LastInst;
331     if (AllowModify)
332       I->eraseFromParent();
333     return false;
334   }
335 
336   // ...likewise if it ends with an indirect branch followed by an unconditional
337   // branch.
338   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
339     I = LastInst;
340     if (AllowModify)
341       I->eraseFromParent();
342     return true;
343   }
344 
345   // Otherwise, can't handle this.
346   return true;
347 }
348 
349 bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
350                                               MachineBranchPredicate &MBP,
351                                               bool AllowModify) const {
352   // For the moment, handle only a block which ends with a cb(n)zx followed by
353   // a fallthrough.  Why this?  Because it is a common form.
354   // TODO: Should we handle b.cc?
355 
356   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
357   if (I == MBB.end())
358     return true;
359 
360   // Skip over SpeculationBarrierEndBB terminators
361   if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
362       I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
363     --I;
364   }
365 
366   if (!isUnpredicatedTerminator(*I))
367     return true;
368 
369   // Get the last instruction in the block.
370   MachineInstr *LastInst = &*I;
371   unsigned LastOpc = LastInst->getOpcode();
372   if (!isCondBranchOpcode(LastOpc))
373     return true;
374 
375   switch (LastOpc) {
376   default:
377     return true;
378   case AArch64::CBZW:
379   case AArch64::CBZX:
380   case AArch64::CBNZW:
381   case AArch64::CBNZX:
382     break;
383   };
384 
385   MBP.TrueDest = LastInst->getOperand(1).getMBB();
386   assert(MBP.TrueDest && "expected!");
387   MBP.FalseDest = MBB.getNextNode();
388 
389   MBP.ConditionDef = nullptr;
390   MBP.SingleUseCondition = false;
391 
392   MBP.LHS = LastInst->getOperand(0);
393   MBP.RHS = MachineOperand::CreateImm(0);
394   MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
395                                             : MachineBranchPredicate::PRED_EQ;
396   return false;
397 }
398 
399 bool AArch64InstrInfo::reverseBranchCondition(
400     SmallVectorImpl<MachineOperand> &Cond) const {
401   if (Cond[0].getImm() != -1) {
402     // Regular Bcc
403     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
404     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
405   } else {
406     // Folded compare-and-branch
407     switch (Cond[1].getImm()) {
408     default:
409       llvm_unreachable("Unknown conditional branch!");
410     case AArch64::CBZW:
411       Cond[1].setImm(AArch64::CBNZW);
412       break;
413     case AArch64::CBNZW:
414       Cond[1].setImm(AArch64::CBZW);
415       break;
416     case AArch64::CBZX:
417       Cond[1].setImm(AArch64::CBNZX);
418       break;
419     case AArch64::CBNZX:
420       Cond[1].setImm(AArch64::CBZX);
421       break;
422     case AArch64::TBZW:
423       Cond[1].setImm(AArch64::TBNZW);
424       break;
425     case AArch64::TBNZW:
426       Cond[1].setImm(AArch64::TBZW);
427       break;
428     case AArch64::TBZX:
429       Cond[1].setImm(AArch64::TBNZX);
430       break;
431     case AArch64::TBNZX:
432       Cond[1].setImm(AArch64::TBZX);
433       break;
434     }
435   }
436 
437   return false;
438 }
439 
440 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
441                                         int *BytesRemoved) const {
442   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
443   if (I == MBB.end())
444     return 0;
445 
446   if (!isUncondBranchOpcode(I->getOpcode()) &&
447       !isCondBranchOpcode(I->getOpcode()))
448     return 0;
449 
450   // Remove the branch.
451   I->eraseFromParent();
452 
453   I = MBB.end();
454 
455   if (I == MBB.begin()) {
456     if (BytesRemoved)
457       *BytesRemoved = 4;
458     return 1;
459   }
460   --I;
461   if (!isCondBranchOpcode(I->getOpcode())) {
462     if (BytesRemoved)
463       *BytesRemoved = 4;
464     return 1;
465   }
466 
467   // Remove the branch.
468   I->eraseFromParent();
469   if (BytesRemoved)
470     *BytesRemoved = 8;
471 
472   return 2;
473 }
474 
475 void AArch64InstrInfo::instantiateCondBranch(
476     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
477     ArrayRef<MachineOperand> Cond) const {
478   if (Cond[0].getImm() != -1) {
479     // Regular Bcc
480     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
481   } else {
482     // Folded compare-and-branch
483     // Note that we use addOperand instead of addReg to keep the flags.
484     const MachineInstrBuilder MIB =
485         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
486     if (Cond.size() > 3)
487       MIB.addImm(Cond[3].getImm());
488     MIB.addMBB(TBB);
489   }
490 }
491 
492 unsigned AArch64InstrInfo::insertBranch(
493     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
494     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
495   // Shouldn't be a fall through.
496   assert(TBB && "insertBranch must not be told to insert a fallthrough");
497 
498   if (!FBB) {
499     if (Cond.empty()) // Unconditional branch?
500       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
501     else
502       instantiateCondBranch(MBB, DL, TBB, Cond);
503 
504     if (BytesAdded)
505       *BytesAdded = 4;
506 
507     return 1;
508   }
509 
510   // Two-way conditional branch.
511   instantiateCondBranch(MBB, DL, TBB, Cond);
512   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
513 
514   if (BytesAdded)
515     *BytesAdded = 8;
516 
517   return 2;
518 }
519 
520 // Find the original register that VReg is copied from.
521 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
522   while (Register::isVirtualRegister(VReg)) {
523     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
524     if (!DefMI->isFullCopy())
525       return VReg;
526     VReg = DefMI->getOperand(1).getReg();
527   }
528   return VReg;
529 }
530 
531 // Determine if VReg is defined by an instruction that can be folded into a
532 // csel instruction. If so, return the folded opcode, and the replacement
533 // register.
534 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
535                                 unsigned *NewVReg = nullptr) {
536   VReg = removeCopies(MRI, VReg);
537   if (!Register::isVirtualRegister(VReg))
538     return 0;
539 
540   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
541   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
542   unsigned Opc = 0;
543   unsigned SrcOpNum = 0;
544   switch (DefMI->getOpcode()) {
545   case AArch64::ADDSXri:
546   case AArch64::ADDSWri:
547     // if NZCV is used, do not fold.
548     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
549       return 0;
550     // fall-through to ADDXri and ADDWri.
551     LLVM_FALLTHROUGH;
552   case AArch64::ADDXri:
553   case AArch64::ADDWri:
554     // add x, 1 -> csinc.
555     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
556         DefMI->getOperand(3).getImm() != 0)
557       return 0;
558     SrcOpNum = 1;
559     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
560     break;
561 
562   case AArch64::ORNXrr:
563   case AArch64::ORNWrr: {
564     // not x -> csinv, represented as orn dst, xzr, src.
565     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
566     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
567       return 0;
568     SrcOpNum = 2;
569     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
570     break;
571   }
572 
573   case AArch64::SUBSXrr:
574   case AArch64::SUBSWrr:
575     // if NZCV is used, do not fold.
576     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
577       return 0;
578     // fall-through to SUBXrr and SUBWrr.
579     LLVM_FALLTHROUGH;
580   case AArch64::SUBXrr:
581   case AArch64::SUBWrr: {
582     // neg x -> csneg, represented as sub dst, xzr, src.
583     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
584     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
585       return 0;
586     SrcOpNum = 2;
587     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
588     break;
589   }
590   default:
591     return 0;
592   }
593   assert(Opc && SrcOpNum && "Missing parameters");
594 
595   if (NewVReg)
596     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
597   return Opc;
598 }
599 
600 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
601                                        ArrayRef<MachineOperand> Cond,
602                                        Register DstReg, Register TrueReg,
603                                        Register FalseReg, int &CondCycles,
604                                        int &TrueCycles,
605                                        int &FalseCycles) const {
606   // Check register classes.
607   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
608   const TargetRegisterClass *RC =
609       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
610   if (!RC)
611     return false;
612 
613   // Also need to check the dest regclass, in case we're trying to optimize
614   // something like:
615   // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
616   if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
617     return false;
618 
619   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
620   unsigned ExtraCondLat = Cond.size() != 1;
621 
622   // GPRs are handled by csel.
623   // FIXME: Fold in x+1, -x, and ~x when applicable.
624   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
625       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
626     // Single-cycle csel, csinc, csinv, and csneg.
627     CondCycles = 1 + ExtraCondLat;
628     TrueCycles = FalseCycles = 1;
629     if (canFoldIntoCSel(MRI, TrueReg))
630       TrueCycles = 0;
631     else if (canFoldIntoCSel(MRI, FalseReg))
632       FalseCycles = 0;
633     return true;
634   }
635 
636   // Scalar floating point is handled by fcsel.
637   // FIXME: Form fabs, fmin, and fmax when applicable.
638   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
639       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
640     CondCycles = 5 + ExtraCondLat;
641     TrueCycles = FalseCycles = 2;
642     return true;
643   }
644 
645   // Can't do vectors.
646   return false;
647 }
648 
649 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
650                                     MachineBasicBlock::iterator I,
651                                     const DebugLoc &DL, Register DstReg,
652                                     ArrayRef<MachineOperand> Cond,
653                                     Register TrueReg, Register FalseReg) const {
654   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
655 
656   // Parse the condition code, see parseCondBranch() above.
657   AArch64CC::CondCode CC;
658   switch (Cond.size()) {
659   default:
660     llvm_unreachable("Unknown condition opcode in Cond");
661   case 1: // b.cc
662     CC = AArch64CC::CondCode(Cond[0].getImm());
663     break;
664   case 3: { // cbz/cbnz
665     // We must insert a compare against 0.
666     bool Is64Bit;
667     switch (Cond[1].getImm()) {
668     default:
669       llvm_unreachable("Unknown branch opcode in Cond");
670     case AArch64::CBZW:
671       Is64Bit = false;
672       CC = AArch64CC::EQ;
673       break;
674     case AArch64::CBZX:
675       Is64Bit = true;
676       CC = AArch64CC::EQ;
677       break;
678     case AArch64::CBNZW:
679       Is64Bit = false;
680       CC = AArch64CC::NE;
681       break;
682     case AArch64::CBNZX:
683       Is64Bit = true;
684       CC = AArch64CC::NE;
685       break;
686     }
687     Register SrcReg = Cond[2].getReg();
688     if (Is64Bit) {
689       // cmp reg, #0 is actually subs xzr, reg, #0.
690       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
691       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
692           .addReg(SrcReg)
693           .addImm(0)
694           .addImm(0);
695     } else {
696       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
697       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
698           .addReg(SrcReg)
699           .addImm(0)
700           .addImm(0);
701     }
702     break;
703   }
704   case 4: { // tbz/tbnz
705     // We must insert a tst instruction.
706     switch (Cond[1].getImm()) {
707     default:
708       llvm_unreachable("Unknown branch opcode in Cond");
709     case AArch64::TBZW:
710     case AArch64::TBZX:
711       CC = AArch64CC::EQ;
712       break;
713     case AArch64::TBNZW:
714     case AArch64::TBNZX:
715       CC = AArch64CC::NE;
716       break;
717     }
718     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
719     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
720       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
721           .addReg(Cond[2].getReg())
722           .addImm(
723               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
724     else
725       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
726           .addReg(Cond[2].getReg())
727           .addImm(
728               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
729     break;
730   }
731   }
732 
733   unsigned Opc = 0;
734   const TargetRegisterClass *RC = nullptr;
735   bool TryFold = false;
736   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
737     RC = &AArch64::GPR64RegClass;
738     Opc = AArch64::CSELXr;
739     TryFold = true;
740   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
741     RC = &AArch64::GPR32RegClass;
742     Opc = AArch64::CSELWr;
743     TryFold = true;
744   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
745     RC = &AArch64::FPR64RegClass;
746     Opc = AArch64::FCSELDrrr;
747   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
748     RC = &AArch64::FPR32RegClass;
749     Opc = AArch64::FCSELSrrr;
750   }
751   assert(RC && "Unsupported regclass");
752 
753   // Try folding simple instructions into the csel.
754   if (TryFold) {
755     unsigned NewVReg = 0;
756     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
757     if (FoldedOpc) {
758       // The folded opcodes csinc, csinc and csneg apply the operation to
759       // FalseReg, so we need to invert the condition.
760       CC = AArch64CC::getInvertedCondCode(CC);
761       TrueReg = FalseReg;
762     } else
763       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
764 
765     // Fold the operation. Leave any dead instructions for DCE to clean up.
766     if (FoldedOpc) {
767       FalseReg = NewVReg;
768       Opc = FoldedOpc;
769       // The extends the live range of NewVReg.
770       MRI.clearKillFlags(NewVReg);
771     }
772   }
773 
774   // Pull all virtual register into the appropriate class.
775   MRI.constrainRegClass(TrueReg, RC);
776   MRI.constrainRegClass(FalseReg, RC);
777 
778   // Insert the csel.
779   BuildMI(MBB, I, DL, get(Opc), DstReg)
780       .addReg(TrueReg)
781       .addReg(FalseReg)
782       .addImm(CC);
783 }
784 
785 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
786 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
787   uint64_t Imm = MI.getOperand(1).getImm();
788   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
789   uint64_t Encoding;
790   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
791 }
792 
793 // FIXME: this implementation should be micro-architecture dependent, so a
794 // micro-architecture target hook should be introduced here in future.
795 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
796   if (!Subtarget.hasCustomCheapAsMoveHandling())
797     return MI.isAsCheapAsAMove();
798 
799   const unsigned Opcode = MI.getOpcode();
800 
801   // Firstly, check cases gated by features.
802 
803   if (Subtarget.hasZeroCycleZeroingFP()) {
804     if (Opcode == AArch64::FMOVH0 ||
805         Opcode == AArch64::FMOVS0 ||
806         Opcode == AArch64::FMOVD0)
807       return true;
808   }
809 
810   if (Subtarget.hasZeroCycleZeroingGP()) {
811     if (Opcode == TargetOpcode::COPY &&
812         (MI.getOperand(1).getReg() == AArch64::WZR ||
813          MI.getOperand(1).getReg() == AArch64::XZR))
814       return true;
815   }
816 
817   // Secondly, check cases specific to sub-targets.
818 
819   if (Subtarget.hasExynosCheapAsMoveHandling()) {
820     if (isExynosCheapAsMove(MI))
821       return true;
822 
823     return MI.isAsCheapAsAMove();
824   }
825 
826   // Finally, check generic cases.
827 
828   switch (Opcode) {
829   default:
830     return false;
831 
832   // add/sub on register without shift
833   case AArch64::ADDWri:
834   case AArch64::ADDXri:
835   case AArch64::SUBWri:
836   case AArch64::SUBXri:
837     return (MI.getOperand(3).getImm() == 0);
838 
839   // logical ops on immediate
840   case AArch64::ANDWri:
841   case AArch64::ANDXri:
842   case AArch64::EORWri:
843   case AArch64::EORXri:
844   case AArch64::ORRWri:
845   case AArch64::ORRXri:
846     return true;
847 
848   // logical ops on register without shift
849   case AArch64::ANDWrr:
850   case AArch64::ANDXrr:
851   case AArch64::BICWrr:
852   case AArch64::BICXrr:
853   case AArch64::EONWrr:
854   case AArch64::EONXrr:
855   case AArch64::EORWrr:
856   case AArch64::EORXrr:
857   case AArch64::ORNWrr:
858   case AArch64::ORNXrr:
859   case AArch64::ORRWrr:
860   case AArch64::ORRXrr:
861     return true;
862 
863   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
864   // ORRXri, it is as cheap as MOV
865   case AArch64::MOVi32imm:
866     return canBeExpandedToORR(MI, 32);
867   case AArch64::MOVi64imm:
868     return canBeExpandedToORR(MI, 64);
869   }
870 
871   llvm_unreachable("Unknown opcode to check as cheap as a move!");
872 }
873 
874 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
875   switch (MI.getOpcode()) {
876   default:
877     return false;
878 
879   case AArch64::ADDWrs:
880   case AArch64::ADDXrs:
881   case AArch64::ADDSWrs:
882   case AArch64::ADDSXrs: {
883     unsigned Imm = MI.getOperand(3).getImm();
884     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
885     if (ShiftVal == 0)
886       return true;
887     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
888   }
889 
890   case AArch64::ADDWrx:
891   case AArch64::ADDXrx:
892   case AArch64::ADDXrx64:
893   case AArch64::ADDSWrx:
894   case AArch64::ADDSXrx:
895   case AArch64::ADDSXrx64: {
896     unsigned Imm = MI.getOperand(3).getImm();
897     switch (AArch64_AM::getArithExtendType(Imm)) {
898     default:
899       return false;
900     case AArch64_AM::UXTB:
901     case AArch64_AM::UXTH:
902     case AArch64_AM::UXTW:
903     case AArch64_AM::UXTX:
904       return AArch64_AM::getArithShiftValue(Imm) <= 4;
905     }
906   }
907 
908   case AArch64::SUBWrs:
909   case AArch64::SUBSWrs: {
910     unsigned Imm = MI.getOperand(3).getImm();
911     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
912     return ShiftVal == 0 ||
913            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
914   }
915 
916   case AArch64::SUBXrs:
917   case AArch64::SUBSXrs: {
918     unsigned Imm = MI.getOperand(3).getImm();
919     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
920     return ShiftVal == 0 ||
921            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
922   }
923 
924   case AArch64::SUBWrx:
925   case AArch64::SUBXrx:
926   case AArch64::SUBXrx64:
927   case AArch64::SUBSWrx:
928   case AArch64::SUBSXrx:
929   case AArch64::SUBSXrx64: {
930     unsigned Imm = MI.getOperand(3).getImm();
931     switch (AArch64_AM::getArithExtendType(Imm)) {
932     default:
933       return false;
934     case AArch64_AM::UXTB:
935     case AArch64_AM::UXTH:
936     case AArch64_AM::UXTW:
937     case AArch64_AM::UXTX:
938       return AArch64_AM::getArithShiftValue(Imm) == 0;
939     }
940   }
941 
942   case AArch64::LDRBBroW:
943   case AArch64::LDRBBroX:
944   case AArch64::LDRBroW:
945   case AArch64::LDRBroX:
946   case AArch64::LDRDroW:
947   case AArch64::LDRDroX:
948   case AArch64::LDRHHroW:
949   case AArch64::LDRHHroX:
950   case AArch64::LDRHroW:
951   case AArch64::LDRHroX:
952   case AArch64::LDRQroW:
953   case AArch64::LDRQroX:
954   case AArch64::LDRSBWroW:
955   case AArch64::LDRSBWroX:
956   case AArch64::LDRSBXroW:
957   case AArch64::LDRSBXroX:
958   case AArch64::LDRSHWroW:
959   case AArch64::LDRSHWroX:
960   case AArch64::LDRSHXroW:
961   case AArch64::LDRSHXroX:
962   case AArch64::LDRSWroW:
963   case AArch64::LDRSWroX:
964   case AArch64::LDRSroW:
965   case AArch64::LDRSroX:
966   case AArch64::LDRWroW:
967   case AArch64::LDRWroX:
968   case AArch64::LDRXroW:
969   case AArch64::LDRXroX:
970   case AArch64::PRFMroW:
971   case AArch64::PRFMroX:
972   case AArch64::STRBBroW:
973   case AArch64::STRBBroX:
974   case AArch64::STRBroW:
975   case AArch64::STRBroX:
976   case AArch64::STRDroW:
977   case AArch64::STRDroX:
978   case AArch64::STRHHroW:
979   case AArch64::STRHHroX:
980   case AArch64::STRHroW:
981   case AArch64::STRHroX:
982   case AArch64::STRQroW:
983   case AArch64::STRQroX:
984   case AArch64::STRSroW:
985   case AArch64::STRSroX:
986   case AArch64::STRWroW:
987   case AArch64::STRWroX:
988   case AArch64::STRXroW:
989   case AArch64::STRXroX: {
990     unsigned IsSigned = MI.getOperand(3).getImm();
991     return !IsSigned;
992   }
993   }
994 }
995 
996 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
997   unsigned Opc = MI.getOpcode();
998   switch (Opc) {
999     default:
1000       return false;
1001     case AArch64::SEH_StackAlloc:
1002     case AArch64::SEH_SaveFPLR:
1003     case AArch64::SEH_SaveFPLR_X:
1004     case AArch64::SEH_SaveReg:
1005     case AArch64::SEH_SaveReg_X:
1006     case AArch64::SEH_SaveRegP:
1007     case AArch64::SEH_SaveRegP_X:
1008     case AArch64::SEH_SaveFReg:
1009     case AArch64::SEH_SaveFReg_X:
1010     case AArch64::SEH_SaveFRegP:
1011     case AArch64::SEH_SaveFRegP_X:
1012     case AArch64::SEH_SetFP:
1013     case AArch64::SEH_AddFP:
1014     case AArch64::SEH_Nop:
1015     case AArch64::SEH_PrologEnd:
1016     case AArch64::SEH_EpilogStart:
1017     case AArch64::SEH_EpilogEnd:
1018       return true;
1019   }
1020 }
1021 
1022 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
1023                                              Register &SrcReg, Register &DstReg,
1024                                              unsigned &SubIdx) const {
1025   switch (MI.getOpcode()) {
1026   default:
1027     return false;
1028   case AArch64::SBFMXri: // aka sxtw
1029   case AArch64::UBFMXri: // aka uxtw
1030     // Check for the 32 -> 64 bit extension case, these instructions can do
1031     // much more.
1032     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1033       return false;
1034     // This is a signed or unsigned 32 -> 64 bit extension.
1035     SrcReg = MI.getOperand(1).getReg();
1036     DstReg = MI.getOperand(0).getReg();
1037     SubIdx = AArch64::sub_32;
1038     return true;
1039   }
1040 }
1041 
1042 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1043     const MachineInstr &MIa, const MachineInstr &MIb) const {
1044   const TargetRegisterInfo *TRI = &getRegisterInfo();
1045   const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1046   int64_t OffsetA = 0, OffsetB = 0;
1047   unsigned WidthA = 0, WidthB = 0;
1048   bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1049 
1050   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1051   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1052 
1053   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1054       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
1055     return false;
1056 
1057   // Retrieve the base, offset from the base and width. Width
1058   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
1059   // base are identical, and the offset of a lower memory access +
1060   // the width doesn't overlap the offset of a higher memory access,
1061   // then the memory accesses are different.
1062   // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1063   // are assumed to have the same scale (vscale).
1064   if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1065                                    WidthA, TRI) &&
1066       getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1067                                    WidthB, TRI)) {
1068     if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1069         OffsetAIsScalable == OffsetBIsScalable) {
1070       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1071       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1072       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1073       if (LowOffset + LowWidth <= HighOffset)
1074         return true;
1075     }
1076   }
1077   return false;
1078 }
1079 
1080 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
1081                                             const MachineBasicBlock *MBB,
1082                                             const MachineFunction &MF) const {
1083   if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
1084     return true;
1085   switch (MI.getOpcode()) {
1086   case AArch64::HINT:
1087     // CSDB hints are scheduling barriers.
1088     if (MI.getOperand(0).getImm() == 0x14)
1089       return true;
1090     break;
1091   case AArch64::DSB:
1092   case AArch64::ISB:
1093     // DSB and ISB also are scheduling barriers.
1094     return true;
1095   default:;
1096   }
1097   return isSEHInstruction(MI);
1098 }
1099 
1100 /// analyzeCompare - For a comparison instruction, return the source registers
1101 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1102 /// Return true if the comparison instruction can be analyzed.
1103 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
1104                                       Register &SrcReg2, int64_t &CmpMask,
1105                                       int64_t &CmpValue) const {
1106   // The first operand can be a frame index where we'd normally expect a
1107   // register.
1108   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1109   if (!MI.getOperand(1).isReg())
1110     return false;
1111 
1112   switch (MI.getOpcode()) {
1113   default:
1114     break;
1115   case AArch64::PTEST_PP:
1116     SrcReg = MI.getOperand(0).getReg();
1117     SrcReg2 = MI.getOperand(1).getReg();
1118     // Not sure about the mask and value for now...
1119     CmpMask = ~0;
1120     CmpValue = 0;
1121     return true;
1122   case AArch64::SUBSWrr:
1123   case AArch64::SUBSWrs:
1124   case AArch64::SUBSWrx:
1125   case AArch64::SUBSXrr:
1126   case AArch64::SUBSXrs:
1127   case AArch64::SUBSXrx:
1128   case AArch64::ADDSWrr:
1129   case AArch64::ADDSWrs:
1130   case AArch64::ADDSWrx:
1131   case AArch64::ADDSXrr:
1132   case AArch64::ADDSXrs:
1133   case AArch64::ADDSXrx:
1134     // Replace SUBSWrr with SUBWrr if NZCV is not used.
1135     SrcReg = MI.getOperand(1).getReg();
1136     SrcReg2 = MI.getOperand(2).getReg();
1137     CmpMask = ~0;
1138     CmpValue = 0;
1139     return true;
1140   case AArch64::SUBSWri:
1141   case AArch64::ADDSWri:
1142   case AArch64::SUBSXri:
1143   case AArch64::ADDSXri:
1144     SrcReg = MI.getOperand(1).getReg();
1145     SrcReg2 = 0;
1146     CmpMask = ~0;
1147     CmpValue = MI.getOperand(2).getImm();
1148     return true;
1149   case AArch64::ANDSWri:
1150   case AArch64::ANDSXri:
1151     // ANDS does not use the same encoding scheme as the others xxxS
1152     // instructions.
1153     SrcReg = MI.getOperand(1).getReg();
1154     SrcReg2 = 0;
1155     CmpMask = ~0;
1156     CmpValue = AArch64_AM::decodeLogicalImmediate(
1157                    MI.getOperand(2).getImm(),
1158                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1159     return true;
1160   }
1161 
1162   return false;
1163 }
1164 
1165 static bool UpdateOperandRegClass(MachineInstr &Instr) {
1166   MachineBasicBlock *MBB = Instr.getParent();
1167   assert(MBB && "Can't get MachineBasicBlock here");
1168   MachineFunction *MF = MBB->getParent();
1169   assert(MF && "Can't get MachineFunction here");
1170   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1171   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1172   MachineRegisterInfo *MRI = &MF->getRegInfo();
1173 
1174   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1175        ++OpIdx) {
1176     MachineOperand &MO = Instr.getOperand(OpIdx);
1177     const TargetRegisterClass *OpRegCstraints =
1178         Instr.getRegClassConstraint(OpIdx, TII, TRI);
1179 
1180     // If there's no constraint, there's nothing to do.
1181     if (!OpRegCstraints)
1182       continue;
1183     // If the operand is a frame index, there's nothing to do here.
1184     // A frame index operand will resolve correctly during PEI.
1185     if (MO.isFI())
1186       continue;
1187 
1188     assert(MO.isReg() &&
1189            "Operand has register constraints without being a register!");
1190 
1191     Register Reg = MO.getReg();
1192     if (Register::isPhysicalRegister(Reg)) {
1193       if (!OpRegCstraints->contains(Reg))
1194         return false;
1195     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1196                !MRI->constrainRegClass(Reg, OpRegCstraints))
1197       return false;
1198   }
1199 
1200   return true;
1201 }
1202 
1203 /// Return the opcode that does not set flags when possible - otherwise
1204 /// return the original opcode. The caller is responsible to do the actual
1205 /// substitution and legality checking.
1206 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1207   // Don't convert all compare instructions, because for some the zero register
1208   // encoding becomes the sp register.
1209   bool MIDefinesZeroReg = false;
1210   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1211     MIDefinesZeroReg = true;
1212 
1213   switch (MI.getOpcode()) {
1214   default:
1215     return MI.getOpcode();
1216   case AArch64::ADDSWrr:
1217     return AArch64::ADDWrr;
1218   case AArch64::ADDSWri:
1219     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1220   case AArch64::ADDSWrs:
1221     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1222   case AArch64::ADDSWrx:
1223     return AArch64::ADDWrx;
1224   case AArch64::ADDSXrr:
1225     return AArch64::ADDXrr;
1226   case AArch64::ADDSXri:
1227     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1228   case AArch64::ADDSXrs:
1229     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1230   case AArch64::ADDSXrx:
1231     return AArch64::ADDXrx;
1232   case AArch64::SUBSWrr:
1233     return AArch64::SUBWrr;
1234   case AArch64::SUBSWri:
1235     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1236   case AArch64::SUBSWrs:
1237     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1238   case AArch64::SUBSWrx:
1239     return AArch64::SUBWrx;
1240   case AArch64::SUBSXrr:
1241     return AArch64::SUBXrr;
1242   case AArch64::SUBSXri:
1243     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1244   case AArch64::SUBSXrs:
1245     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1246   case AArch64::SUBSXrx:
1247     return AArch64::SUBXrx;
1248   }
1249 }
1250 
1251 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1252 
1253 /// True when condition flags are accessed (either by writing or reading)
1254 /// on the instruction trace starting at From and ending at To.
1255 ///
1256 /// Note: If From and To are from different blocks it's assumed CC are accessed
1257 ///       on the path.
1258 static bool areCFlagsAccessedBetweenInstrs(
1259     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1260     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1261   // Early exit if To is at the beginning of the BB.
1262   if (To == To->getParent()->begin())
1263     return true;
1264 
1265   // Check whether the instructions are in the same basic block
1266   // If not, assume the condition flags might get modified somewhere.
1267   if (To->getParent() != From->getParent())
1268     return true;
1269 
1270   // From must be above To.
1271   assert(std::any_of(
1272       ++To.getReverse(), To->getParent()->rend(),
1273       [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1274 
1275   // We iterate backward starting at \p To until we hit \p From.
1276   for (const MachineInstr &Instr :
1277        instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1278     if (((AccessToCheck & AK_Write) &&
1279          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1280         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1281       return true;
1282   }
1283   return false;
1284 }
1285 
1286 /// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1287 /// operation which could set the flags in an identical manner
1288 bool AArch64InstrInfo::optimizePTestInstr(
1289     MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1290     const MachineRegisterInfo *MRI) const {
1291   auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1292   auto *Pred = MRI->getUniqueVRegDef(PredReg);
1293   auto NewOp = Pred->getOpcode();
1294   bool OpChanged = false;
1295 
1296   unsigned MaskOpcode = Mask->getOpcode();
1297   unsigned PredOpcode = Pred->getOpcode();
1298   bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1299   bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1300 
1301   if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
1302     // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
1303     // deactivate any lanes OTHER_INST might set.
1304     uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
1305     uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1306 
1307     // Must be an all active predicate of matching element size.
1308     if ((PredElementSize != MaskElementSize) ||
1309         (Mask->getOperand(1).getImm() != 31))
1310       return false;
1311 
1312     // Fallthough to simply remove the PTEST.
1313   } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
1314     // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1315     // instruction that sets the flags as PTEST would.
1316 
1317     // Fallthough to simply remove the PTEST.
1318   } else if (PredIsPTestLike) {
1319     // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
1320     // instructions use the same predicate.
1321     auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1322     if (Mask != PTestLikeMask)
1323       return false;
1324 
1325     // Fallthough to simply remove the PTEST.
1326   } else {
1327     switch (Pred->getOpcode()) {
1328     case AArch64::BRKB_PPzP:
1329     case AArch64::BRKPB_PPzPP: {
1330       // Op 0 is chain, 1 is the mask, 2 the previous predicate to
1331       // propagate, 3 the new predicate.
1332 
1333       // Check to see if our mask is the same as the brkpb's. If
1334       // not the resulting flag bits may be different and we
1335       // can't remove the ptest.
1336       auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1337       if (Mask != PredMask)
1338         return false;
1339 
1340       // Switch to the new opcode
1341       NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
1342                                                       : AArch64::BRKPBS_PPzPP;
1343       OpChanged = true;
1344       break;
1345     }
1346     case AArch64::BRKN_PPzP: {
1347       auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1348       if (Mask != PredMask)
1349         return false;
1350 
1351       NewOp = AArch64::BRKNS_PPzP;
1352       OpChanged = true;
1353       break;
1354     }
1355     case AArch64::RDFFR_PPz: {
1356       // rdffr   p1.b, PredMask=p0/z <--- Definition of Pred
1357       // ptest   Mask=p0, Pred=p1.b  <--- If equal masks, remove this and use
1358       //                                  `rdffrs p1.b, p0/z` above.
1359       auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1360       if (Mask != PredMask)
1361         return false;
1362 
1363       NewOp = AArch64::RDFFRS_PPz;
1364       OpChanged = true;
1365       break;
1366     }
1367     default:
1368       // Bail out if we don't recognize the input
1369       return false;
1370     }
1371   }
1372 
1373   const TargetRegisterInfo *TRI = &getRegisterInfo();
1374 
1375   // If another instruction between Pred and PTest accesses flags, don't remove
1376   // the ptest or update the earlier instruction to modify them.
1377   if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1378     return false;
1379 
1380   // If we pass all the checks, it's safe to remove the PTEST and use the flags
1381   // as they are prior to PTEST. Sometimes this requires the tested PTEST
1382   // operand to be replaced with an equivalent instruction that also sets the
1383   // flags.
1384   Pred->setDesc(get(NewOp));
1385   PTest->eraseFromParent();
1386   if (OpChanged) {
1387     bool succeeded = UpdateOperandRegClass(*Pred);
1388     (void)succeeded;
1389     assert(succeeded && "Operands have incompatible register classes!");
1390     Pred->addRegisterDefined(AArch64::NZCV, TRI);
1391   }
1392 
1393   // Ensure that the flags def is live.
1394   if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1395     unsigned i = 0, e = Pred->getNumOperands();
1396     for (; i != e; ++i) {
1397       MachineOperand &MO = Pred->getOperand(i);
1398       if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1399         MO.setIsDead(false);
1400         break;
1401       }
1402     }
1403   }
1404   return true;
1405 }
1406 
1407 /// Try to optimize a compare instruction. A compare instruction is an
1408 /// instruction which produces AArch64::NZCV. It can be truly compare
1409 /// instruction
1410 /// when there are no uses of its destination register.
1411 ///
1412 /// The following steps are tried in order:
1413 /// 1. Convert CmpInstr into an unconditional version.
1414 /// 2. Remove CmpInstr if above there is an instruction producing a needed
1415 ///    condition code or an instruction which can be converted into such an
1416 ///    instruction.
1417 ///    Only comparison with zero is supported.
1418 bool AArch64InstrInfo::optimizeCompareInstr(
1419     MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1420     int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1421   assert(CmpInstr.getParent());
1422   assert(MRI);
1423 
1424   // Replace SUBSWrr with SUBWrr if NZCV is not used.
1425   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1426   if (DeadNZCVIdx != -1) {
1427     if (CmpInstr.definesRegister(AArch64::WZR) ||
1428         CmpInstr.definesRegister(AArch64::XZR)) {
1429       CmpInstr.eraseFromParent();
1430       return true;
1431     }
1432     unsigned Opc = CmpInstr.getOpcode();
1433     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1434     if (NewOpc == Opc)
1435       return false;
1436     const MCInstrDesc &MCID = get(NewOpc);
1437     CmpInstr.setDesc(MCID);
1438     CmpInstr.RemoveOperand(DeadNZCVIdx);
1439     bool succeeded = UpdateOperandRegClass(CmpInstr);
1440     (void)succeeded;
1441     assert(succeeded && "Some operands reg class are incompatible!");
1442     return true;
1443   }
1444 
1445   if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
1446     return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1447 
1448   if (SrcReg2 != 0)
1449     return false;
1450 
1451   // CmpInstr is a Compare instruction if destination register is not used.
1452   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1453     return false;
1454 
1455   if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1456     return true;
1457   return (CmpValue == 0 || CmpValue == 1) &&
1458          removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1459 }
1460 
1461 /// Get opcode of S version of Instr.
1462 /// If Instr is S version its opcode is returned.
1463 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1464 /// or we are not interested in it.
1465 static unsigned sForm(MachineInstr &Instr) {
1466   switch (Instr.getOpcode()) {
1467   default:
1468     return AArch64::INSTRUCTION_LIST_END;
1469 
1470   case AArch64::ADDSWrr:
1471   case AArch64::ADDSWri:
1472   case AArch64::ADDSXrr:
1473   case AArch64::ADDSXri:
1474   case AArch64::SUBSWrr:
1475   case AArch64::SUBSWri:
1476   case AArch64::SUBSXrr:
1477   case AArch64::SUBSXri:
1478     return Instr.getOpcode();
1479 
1480   case AArch64::ADDWrr:
1481     return AArch64::ADDSWrr;
1482   case AArch64::ADDWri:
1483     return AArch64::ADDSWri;
1484   case AArch64::ADDXrr:
1485     return AArch64::ADDSXrr;
1486   case AArch64::ADDXri:
1487     return AArch64::ADDSXri;
1488   case AArch64::ADCWr:
1489     return AArch64::ADCSWr;
1490   case AArch64::ADCXr:
1491     return AArch64::ADCSXr;
1492   case AArch64::SUBWrr:
1493     return AArch64::SUBSWrr;
1494   case AArch64::SUBWri:
1495     return AArch64::SUBSWri;
1496   case AArch64::SUBXrr:
1497     return AArch64::SUBSXrr;
1498   case AArch64::SUBXri:
1499     return AArch64::SUBSXri;
1500   case AArch64::SBCWr:
1501     return AArch64::SBCSWr;
1502   case AArch64::SBCXr:
1503     return AArch64::SBCSXr;
1504   case AArch64::ANDWri:
1505     return AArch64::ANDSWri;
1506   case AArch64::ANDXri:
1507     return AArch64::ANDSXri;
1508   }
1509 }
1510 
1511 /// Check if AArch64::NZCV should be alive in successors of MBB.
1512 static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) {
1513   for (auto *BB : MBB->successors())
1514     if (BB->isLiveIn(AArch64::NZCV))
1515       return true;
1516   return false;
1517 }
1518 
1519 /// \returns The condition code operand index for \p Instr if it is a branch
1520 /// or select and -1 otherwise.
1521 static int
1522 findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
1523   switch (Instr.getOpcode()) {
1524   default:
1525     return -1;
1526 
1527   case AArch64::Bcc: {
1528     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1529     assert(Idx >= 2);
1530     return Idx - 2;
1531   }
1532 
1533   case AArch64::CSINVWr:
1534   case AArch64::CSINVXr:
1535   case AArch64::CSINCWr:
1536   case AArch64::CSINCXr:
1537   case AArch64::CSELWr:
1538   case AArch64::CSELXr:
1539   case AArch64::CSNEGWr:
1540   case AArch64::CSNEGXr:
1541   case AArch64::FCSELSrrr:
1542   case AArch64::FCSELDrrr: {
1543     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1544     assert(Idx >= 1);
1545     return Idx - 1;
1546   }
1547   }
1548 }
1549 
1550 namespace {
1551 
1552 struct UsedNZCV {
1553   bool N = false;
1554   bool Z = false;
1555   bool C = false;
1556   bool V = false;
1557 
1558   UsedNZCV() = default;
1559 
1560   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1561     this->N |= UsedFlags.N;
1562     this->Z |= UsedFlags.Z;
1563     this->C |= UsedFlags.C;
1564     this->V |= UsedFlags.V;
1565     return *this;
1566   }
1567 };
1568 
1569 } // end anonymous namespace
1570 
1571 /// Find a condition code used by the instruction.
1572 /// Returns AArch64CC::Invalid if either the instruction does not use condition
1573 /// codes or we don't optimize CmpInstr in the presence of such instructions.
1574 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1575   int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr);
1576   return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1577                           Instr.getOperand(CCIdx).getImm())
1578                     : AArch64CC::Invalid;
1579 }
1580 
1581 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1582   assert(CC != AArch64CC::Invalid);
1583   UsedNZCV UsedFlags;
1584   switch (CC) {
1585   default:
1586     break;
1587 
1588   case AArch64CC::EQ: // Z set
1589   case AArch64CC::NE: // Z clear
1590     UsedFlags.Z = true;
1591     break;
1592 
1593   case AArch64CC::HI: // Z clear and C set
1594   case AArch64CC::LS: // Z set   or  C clear
1595     UsedFlags.Z = true;
1596     LLVM_FALLTHROUGH;
1597   case AArch64CC::HS: // C set
1598   case AArch64CC::LO: // C clear
1599     UsedFlags.C = true;
1600     break;
1601 
1602   case AArch64CC::MI: // N set
1603   case AArch64CC::PL: // N clear
1604     UsedFlags.N = true;
1605     break;
1606 
1607   case AArch64CC::VS: // V set
1608   case AArch64CC::VC: // V clear
1609     UsedFlags.V = true;
1610     break;
1611 
1612   case AArch64CC::GT: // Z clear, N and V the same
1613   case AArch64CC::LE: // Z set,   N and V differ
1614     UsedFlags.Z = true;
1615     LLVM_FALLTHROUGH;
1616   case AArch64CC::GE: // N and V the same
1617   case AArch64CC::LT: // N and V differ
1618     UsedFlags.N = true;
1619     UsedFlags.V = true;
1620     break;
1621   }
1622   return UsedFlags;
1623 }
1624 
1625 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if they
1626 /// are not containing C or V flags and NZCV flags are not alive in successors
1627 /// of the same \p CmpInstr and \p MI parent. \returns None otherwise.
1628 ///
1629 /// Collect instructions using that flags in \p CCUseInstrs if provided.
1630 static Optional<UsedNZCV>
1631 examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
1632                  const TargetRegisterInfo &TRI,
1633                  SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr) {
1634   MachineBasicBlock *CmpParent = CmpInstr.getParent();
1635   if (MI.getParent() != CmpParent)
1636     return None;
1637 
1638   if (areCFlagsAliveInSuccessors(CmpParent))
1639     return None;
1640 
1641   UsedNZCV NZCVUsedAfterCmp;
1642   for (MachineInstr &Instr : instructionsWithoutDebug(
1643            std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1644     if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1645       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1646       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1647         return None;
1648       NZCVUsedAfterCmp |= getUsedNZCV(CC);
1649       if (CCUseInstrs)
1650         CCUseInstrs->push_back(&Instr);
1651     }
1652     if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1653       break;
1654   }
1655   if (NZCVUsedAfterCmp.C || NZCVUsedAfterCmp.V)
1656     return None;
1657   return NZCVUsedAfterCmp;
1658 }
1659 
1660 static bool isADDSRegImm(unsigned Opcode) {
1661   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1662 }
1663 
1664 static bool isSUBSRegImm(unsigned Opcode) {
1665   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1666 }
1667 
1668 /// Check if CmpInstr can be substituted by MI.
1669 ///
1670 /// CmpInstr can be substituted:
1671 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1672 /// - and, MI and CmpInstr are from the same MachineBB
1673 /// - and, condition flags are not alive in successors of the CmpInstr parent
1674 /// - and, if MI opcode is the S form there must be no defs of flags between
1675 ///        MI and CmpInstr
1676 ///        or if MI opcode is not the S form there must be neither defs of flags
1677 ///        nor uses of flags between MI and CmpInstr.
1678 /// - and  C/V flags are not used after CmpInstr
1679 static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
1680                                        const TargetRegisterInfo &TRI) {
1681   assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1682 
1683   const unsigned CmpOpcode = CmpInstr.getOpcode();
1684   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1685     return false;
1686 
1687   if (!examineCFlagsUse(MI, CmpInstr, TRI))
1688     return false;
1689 
1690   AccessKind AccessToCheck = AK_Write;
1691   if (sForm(MI) != MI.getOpcode())
1692     AccessToCheck = AK_All;
1693   return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1694 }
1695 
1696 /// Substitute an instruction comparing to zero with another instruction
1697 /// which produces needed condition flags.
1698 ///
1699 /// Return true on success.
1700 bool AArch64InstrInfo::substituteCmpToZero(
1701     MachineInstr &CmpInstr, unsigned SrcReg,
1702     const MachineRegisterInfo &MRI) const {
1703   // Get the unique definition of SrcReg.
1704   MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1705   if (!MI)
1706     return false;
1707 
1708   const TargetRegisterInfo &TRI = getRegisterInfo();
1709 
1710   unsigned NewOpc = sForm(*MI);
1711   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1712     return false;
1713 
1714   if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1715     return false;
1716 
1717   // Update the instruction to set NZCV.
1718   MI->setDesc(get(NewOpc));
1719   CmpInstr.eraseFromParent();
1720   bool succeeded = UpdateOperandRegClass(*MI);
1721   (void)succeeded;
1722   assert(succeeded && "Some operands reg class are incompatible!");
1723   MI->addRegisterDefined(AArch64::NZCV, &TRI);
1724   return true;
1725 }
1726 
1727 /// \returns True if \p CmpInstr can be removed.
1728 ///
1729 /// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1730 /// codes used in \p CCUseInstrs must be inverted.
1731 static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
1732                                  int CmpValue, const TargetRegisterInfo &TRI,
1733                                  SmallVectorImpl<MachineInstr *> &CCUseInstrs,
1734                                  bool &IsInvertCC) {
1735   assert((CmpValue == 0 || CmpValue == 1) &&
1736          "Only comparisons to 0 or 1 considered for removal!");
1737 
1738   // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1739   unsigned MIOpc = MI.getOpcode();
1740   if (MIOpc == AArch64::CSINCWr) {
1741     if (MI.getOperand(1).getReg() != AArch64::WZR ||
1742         MI.getOperand(2).getReg() != AArch64::WZR)
1743       return false;
1744   } else if (MIOpc == AArch64::CSINCXr) {
1745     if (MI.getOperand(1).getReg() != AArch64::XZR ||
1746         MI.getOperand(2).getReg() != AArch64::XZR)
1747       return false;
1748   } else {
1749     return false;
1750   }
1751   AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);
1752   if (MICC == AArch64CC::Invalid)
1753     return false;
1754 
1755   // NZCV needs to be defined
1756   if (MI.findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
1757     return false;
1758 
1759   // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1760   const unsigned CmpOpcode = CmpInstr.getOpcode();
1761   bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1762   if (CmpValue && !IsSubsRegImm)
1763     return false;
1764   if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1765     return false;
1766 
1767   // MI conditions allowed: eq, ne, mi, pl
1768   UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1769   if (MIUsedNZCV.C || MIUsedNZCV.V)
1770     return false;
1771 
1772   Optional<UsedNZCV> NZCVUsedAfterCmp =
1773       examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1774   // Condition flags are not used in CmpInstr basic block successors and only
1775   // Z or N flags allowed to be used after CmpInstr within its basic block
1776   if (!NZCVUsedAfterCmp)
1777     return false;
1778   // Z or N flag used after CmpInstr must correspond to the flag used in MI
1779   if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1780       (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1781     return false;
1782   // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1783   if (MIUsedNZCV.N && !CmpValue)
1784     return false;
1785 
1786   // There must be no defs of flags between MI and CmpInstr
1787   if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1788     return false;
1789 
1790   // Condition code is inverted in the following cases:
1791   // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1792   // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1793   IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1794                (!CmpValue && MICC == AArch64CC::NE);
1795   return true;
1796 }
1797 
1798 /// Remove comparision in csinc-cmp sequence
1799 ///
1800 /// Examples:
1801 /// 1. \code
1802 ///   csinc w9, wzr, wzr, ne
1803 ///   cmp   w9, #0
1804 ///   b.eq
1805 ///    \endcode
1806 /// to
1807 ///    \code
1808 ///   csinc w9, wzr, wzr, ne
1809 ///   b.ne
1810 ///    \endcode
1811 ///
1812 /// 2. \code
1813 ///   csinc x2, xzr, xzr, mi
1814 ///   cmp   x2, #1
1815 ///   b.pl
1816 ///    \endcode
1817 /// to
1818 ///    \code
1819 ///   csinc x2, xzr, xzr, mi
1820 ///   b.pl
1821 ///    \endcode
1822 ///
1823 /// \param  CmpInstr comparison instruction
1824 /// \return True when comparison removed
1825 bool AArch64InstrInfo::removeCmpToZeroOrOne(
1826     MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1827     const MachineRegisterInfo &MRI) const {
1828   MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1829   if (!MI)
1830     return false;
1831   const TargetRegisterInfo &TRI = getRegisterInfo();
1832   SmallVector<MachineInstr *, 4> CCUseInstrs;
1833   bool IsInvertCC = false;
1834   if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1835                             IsInvertCC))
1836     return false;
1837   // Make transformation
1838   CmpInstr.eraseFromParent();
1839   if (IsInvertCC) {
1840     // Invert condition codes in CmpInstr CC users
1841     for (MachineInstr *CCUseInstr : CCUseInstrs) {
1842       int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
1843       assert(Idx >= 0 && "Unexpected instruction using CC.");
1844       MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1845       AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
1846           static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1847       CCOperand.setImm(CCUse);
1848     }
1849   }
1850   return true;
1851 }
1852 
1853 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1854   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1855       MI.getOpcode() != AArch64::CATCHRET)
1856     return false;
1857 
1858   MachineBasicBlock &MBB = *MI.getParent();
1859   auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1860   auto TRI = Subtarget.getRegisterInfo();
1861   DebugLoc DL = MI.getDebugLoc();
1862 
1863   if (MI.getOpcode() == AArch64::CATCHRET) {
1864     // Skip to the first instruction before the epilog.
1865     const TargetInstrInfo *TII =
1866       MBB.getParent()->getSubtarget().getInstrInfo();
1867     MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1868     auto MBBI = MachineBasicBlock::iterator(MI);
1869     MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1870     while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1871            FirstEpilogSEH != MBB.begin())
1872       FirstEpilogSEH = std::prev(FirstEpilogSEH);
1873     if (FirstEpilogSEH != MBB.begin())
1874       FirstEpilogSEH = std::next(FirstEpilogSEH);
1875     BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1876         .addReg(AArch64::X0, RegState::Define)
1877         .addMBB(TargetMBB);
1878     BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1879         .addReg(AArch64::X0, RegState::Define)
1880         .addReg(AArch64::X0)
1881         .addMBB(TargetMBB)
1882         .addImm(0);
1883     return true;
1884   }
1885 
1886   Register Reg = MI.getOperand(0).getReg();
1887   Module &M = *MBB.getParent()->getFunction().getParent();
1888   if (M.getStackProtectorGuard() == "sysreg") {
1889     const AArch64SysReg::SysReg *SrcReg =
1890         AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
1891     if (!SrcReg)
1892       report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
1893 
1894     // mrs xN, sysreg
1895     BuildMI(MBB, MI, DL, get(AArch64::MRS))
1896         .addDef(Reg, RegState::Renamable)
1897         .addImm(SrcReg->Encoding);
1898     int Offset = M.getStackProtectorGuardOffset();
1899     if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
1900       // ldr xN, [xN, #offset]
1901       BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1902           .addDef(Reg)
1903           .addUse(Reg, RegState::Kill)
1904           .addImm(Offset / 8);
1905     } else if (Offset >= -256 && Offset <= 255) {
1906       // ldur xN, [xN, #offset]
1907       BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
1908           .addDef(Reg)
1909           .addUse(Reg, RegState::Kill)
1910           .addImm(Offset);
1911     } else if (Offset >= -4095 && Offset <= 4095) {
1912       if (Offset > 0) {
1913         // add xN, xN, #offset
1914         BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
1915             .addDef(Reg)
1916             .addUse(Reg, RegState::Kill)
1917             .addImm(Offset)
1918             .addImm(0);
1919       } else {
1920         // sub xN, xN, #offset
1921         BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
1922             .addDef(Reg)
1923             .addUse(Reg, RegState::Kill)
1924             .addImm(-Offset)
1925             .addImm(0);
1926       }
1927       // ldr xN, [xN]
1928       BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1929           .addDef(Reg)
1930           .addUse(Reg, RegState::Kill)
1931           .addImm(0);
1932     } else {
1933       // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
1934       // than 23760.
1935       // It might be nice to use AArch64::MOVi32imm here, which would get
1936       // expanded in PreSched2 after PostRA, but our lone scratch Reg already
1937       // contains the MRS result. findScratchNonCalleeSaveRegister() in
1938       // AArch64FrameLowering might help us find such a scratch register
1939       // though. If we failed to find a scratch register, we could emit a
1940       // stream of add instructions to build up the immediate. Or, we could try
1941       // to insert a AArch64::MOVi32imm before register allocation so that we
1942       // didn't need to scavenge for a scratch register.
1943       report_fatal_error("Unable to encode Stack Protector Guard Offset");
1944     }
1945     MBB.erase(MI);
1946     return true;
1947   }
1948 
1949   const GlobalValue *GV =
1950       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1951   const TargetMachine &TM = MBB.getParent()->getTarget();
1952   unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1953   const unsigned char MO_NC = AArch64II::MO_NC;
1954 
1955   if ((OpFlags & AArch64II::MO_GOT) != 0) {
1956     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1957         .addGlobalAddress(GV, 0, OpFlags);
1958     if (Subtarget.isTargetILP32()) {
1959       unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
1960       BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
1961           .addDef(Reg32, RegState::Dead)
1962           .addUse(Reg, RegState::Kill)
1963           .addImm(0)
1964           .addMemOperand(*MI.memoperands_begin())
1965           .addDef(Reg, RegState::Implicit);
1966     } else {
1967       BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1968           .addReg(Reg, RegState::Kill)
1969           .addImm(0)
1970           .addMemOperand(*MI.memoperands_begin());
1971     }
1972   } else if (TM.getCodeModel() == CodeModel::Large) {
1973     assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
1974     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1975         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1976         .addImm(0);
1977     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1978         .addReg(Reg, RegState::Kill)
1979         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1980         .addImm(16);
1981     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1982         .addReg(Reg, RegState::Kill)
1983         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1984         .addImm(32);
1985     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1986         .addReg(Reg, RegState::Kill)
1987         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1988         .addImm(48);
1989     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1990         .addReg(Reg, RegState::Kill)
1991         .addImm(0)
1992         .addMemOperand(*MI.memoperands_begin());
1993   } else if (TM.getCodeModel() == CodeModel::Tiny) {
1994     BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1995         .addGlobalAddress(GV, 0, OpFlags);
1996   } else {
1997     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1998         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1999     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2000     if (Subtarget.isTargetILP32()) {
2001       unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2002       BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2003           .addDef(Reg32, RegState::Dead)
2004           .addUse(Reg, RegState::Kill)
2005           .addGlobalAddress(GV, 0, LoFlags)
2006           .addMemOperand(*MI.memoperands_begin())
2007           .addDef(Reg, RegState::Implicit);
2008     } else {
2009       BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2010           .addReg(Reg, RegState::Kill)
2011           .addGlobalAddress(GV, 0, LoFlags)
2012           .addMemOperand(*MI.memoperands_begin());
2013     }
2014   }
2015 
2016   MBB.erase(MI);
2017 
2018   return true;
2019 }
2020 
2021 // Return true if this instruction simply sets its single destination register
2022 // to zero. This is equivalent to a register rename of the zero-register.
2023 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
2024   switch (MI.getOpcode()) {
2025   default:
2026     break;
2027   case AArch64::MOVZWi:
2028   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2029     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2030       assert(MI.getDesc().getNumOperands() == 3 &&
2031              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2032       return true;
2033     }
2034     break;
2035   case AArch64::ANDWri: // and Rd, Rzr, #imm
2036     return MI.getOperand(1).getReg() == AArch64::WZR;
2037   case AArch64::ANDXri:
2038     return MI.getOperand(1).getReg() == AArch64::XZR;
2039   case TargetOpcode::COPY:
2040     return MI.getOperand(1).getReg() == AArch64::WZR;
2041   }
2042   return false;
2043 }
2044 
2045 // Return true if this instruction simply renames a general register without
2046 // modifying bits.
2047 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
2048   switch (MI.getOpcode()) {
2049   default:
2050     break;
2051   case TargetOpcode::COPY: {
2052     // GPR32 copies will by lowered to ORRXrs
2053     Register DstReg = MI.getOperand(0).getReg();
2054     return (AArch64::GPR32RegClass.contains(DstReg) ||
2055             AArch64::GPR64RegClass.contains(DstReg));
2056   }
2057   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2058     if (MI.getOperand(1).getReg() == AArch64::XZR) {
2059       assert(MI.getDesc().getNumOperands() == 4 &&
2060              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2061       return true;
2062     }
2063     break;
2064   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2065     if (MI.getOperand(2).getImm() == 0) {
2066       assert(MI.getDesc().getNumOperands() == 4 &&
2067              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2068       return true;
2069     }
2070     break;
2071   }
2072   return false;
2073 }
2074 
2075 // Return true if this instruction simply renames a general register without
2076 // modifying bits.
2077 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
2078   switch (MI.getOpcode()) {
2079   default:
2080     break;
2081   case TargetOpcode::COPY: {
2082     Register DstReg = MI.getOperand(0).getReg();
2083     return AArch64::FPR128RegClass.contains(DstReg);
2084   }
2085   case AArch64::ORRv16i8:
2086     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2087       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2088              "invalid ORRv16i8 operands");
2089       return true;
2090     }
2091     break;
2092   }
2093   return false;
2094 }
2095 
2096 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
2097                                                int &FrameIndex) const {
2098   switch (MI.getOpcode()) {
2099   default:
2100     break;
2101   case AArch64::LDRWui:
2102   case AArch64::LDRXui:
2103   case AArch64::LDRBui:
2104   case AArch64::LDRHui:
2105   case AArch64::LDRSui:
2106   case AArch64::LDRDui:
2107   case AArch64::LDRQui:
2108     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2109         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2110       FrameIndex = MI.getOperand(1).getIndex();
2111       return MI.getOperand(0).getReg();
2112     }
2113     break;
2114   }
2115 
2116   return 0;
2117 }
2118 
2119 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
2120                                               int &FrameIndex) const {
2121   switch (MI.getOpcode()) {
2122   default:
2123     break;
2124   case AArch64::STRWui:
2125   case AArch64::STRXui:
2126   case AArch64::STRBui:
2127   case AArch64::STRHui:
2128   case AArch64::STRSui:
2129   case AArch64::STRDui:
2130   case AArch64::STRQui:
2131   case AArch64::LDR_PXI:
2132   case AArch64::STR_PXI:
2133     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2134         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2135       FrameIndex = MI.getOperand(1).getIndex();
2136       return MI.getOperand(0).getReg();
2137     }
2138     break;
2139   }
2140   return 0;
2141 }
2142 
2143 /// Check all MachineMemOperands for a hint to suppress pairing.
2144 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
2145   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2146     return MMO->getFlags() & MOSuppressPair;
2147   });
2148 }
2149 
2150 /// Set a flag on the first MachineMemOperand to suppress pairing.
2151 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
2152   if (MI.memoperands_empty())
2153     return;
2154   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2155 }
2156 
2157 /// Check all MachineMemOperands for a hint that the load/store is strided.
2158 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
2159   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2160     return MMO->getFlags() & MOStridedAccess;
2161   });
2162 }
2163 
2164 bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
2165   switch (Opc) {
2166   default:
2167     return false;
2168   case AArch64::STURSi:
2169   case AArch64::STRSpre:
2170   case AArch64::STURDi:
2171   case AArch64::STRDpre:
2172   case AArch64::STURQi:
2173   case AArch64::STRQpre:
2174   case AArch64::STURBBi:
2175   case AArch64::STURHHi:
2176   case AArch64::STURWi:
2177   case AArch64::STRWpre:
2178   case AArch64::STURXi:
2179   case AArch64::STRXpre:
2180   case AArch64::LDURSi:
2181   case AArch64::LDRSpre:
2182   case AArch64::LDURDi:
2183   case AArch64::LDRDpre:
2184   case AArch64::LDURQi:
2185   case AArch64::LDRQpre:
2186   case AArch64::LDURWi:
2187   case AArch64::LDRWpre:
2188   case AArch64::LDURXi:
2189   case AArch64::LDRXpre:
2190   case AArch64::LDURSWi:
2191   case AArch64::LDURHHi:
2192   case AArch64::LDURBBi:
2193   case AArch64::LDURSBWi:
2194   case AArch64::LDURSHWi:
2195     return true;
2196   }
2197 }
2198 
2199 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2200   switch (Opc) {
2201   default: return {};
2202   case AArch64::PRFMui: return AArch64::PRFUMi;
2203   case AArch64::LDRXui: return AArch64::LDURXi;
2204   case AArch64::LDRWui: return AArch64::LDURWi;
2205   case AArch64::LDRBui: return AArch64::LDURBi;
2206   case AArch64::LDRHui: return AArch64::LDURHi;
2207   case AArch64::LDRSui: return AArch64::LDURSi;
2208   case AArch64::LDRDui: return AArch64::LDURDi;
2209   case AArch64::LDRQui: return AArch64::LDURQi;
2210   case AArch64::LDRBBui: return AArch64::LDURBBi;
2211   case AArch64::LDRHHui: return AArch64::LDURHHi;
2212   case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2213   case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2214   case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2215   case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2216   case AArch64::LDRSWui: return AArch64::LDURSWi;
2217   case AArch64::STRXui: return AArch64::STURXi;
2218   case AArch64::STRWui: return AArch64::STURWi;
2219   case AArch64::STRBui: return AArch64::STURBi;
2220   case AArch64::STRHui: return AArch64::STURHi;
2221   case AArch64::STRSui: return AArch64::STURSi;
2222   case AArch64::STRDui: return AArch64::STURDi;
2223   case AArch64::STRQui: return AArch64::STURQi;
2224   case AArch64::STRBBui: return AArch64::STURBBi;
2225   case AArch64::STRHHui: return AArch64::STURHHi;
2226   }
2227 }
2228 
2229 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
2230   switch (Opc) {
2231   default:
2232     return 2;
2233   case AArch64::LDPXi:
2234   case AArch64::LDPDi:
2235   case AArch64::STPXi:
2236   case AArch64::STPDi:
2237   case AArch64::LDNPXi:
2238   case AArch64::LDNPDi:
2239   case AArch64::STNPXi:
2240   case AArch64::STNPDi:
2241   case AArch64::LDPQi:
2242   case AArch64::STPQi:
2243   case AArch64::LDNPQi:
2244   case AArch64::STNPQi:
2245   case AArch64::LDPWi:
2246   case AArch64::LDPSi:
2247   case AArch64::STPWi:
2248   case AArch64::STPSi:
2249   case AArch64::LDNPWi:
2250   case AArch64::LDNPSi:
2251   case AArch64::STNPWi:
2252   case AArch64::STNPSi:
2253   case AArch64::LDG:
2254   case AArch64::STGPi:
2255 
2256   case AArch64::LD1B_IMM:
2257   case AArch64::LD1B_H_IMM:
2258   case AArch64::LD1B_S_IMM:
2259   case AArch64::LD1B_D_IMM:
2260   case AArch64::LD1SB_H_IMM:
2261   case AArch64::LD1SB_S_IMM:
2262   case AArch64::LD1SB_D_IMM:
2263   case AArch64::LD1H_IMM:
2264   case AArch64::LD1H_S_IMM:
2265   case AArch64::LD1H_D_IMM:
2266   case AArch64::LD1SH_S_IMM:
2267   case AArch64::LD1SH_D_IMM:
2268   case AArch64::LD1W_IMM:
2269   case AArch64::LD1W_D_IMM:
2270   case AArch64::LD1SW_D_IMM:
2271   case AArch64::LD1D_IMM:
2272 
2273   case AArch64::ST1B_IMM:
2274   case AArch64::ST1B_H_IMM:
2275   case AArch64::ST1B_S_IMM:
2276   case AArch64::ST1B_D_IMM:
2277   case AArch64::ST1H_IMM:
2278   case AArch64::ST1H_S_IMM:
2279   case AArch64::ST1H_D_IMM:
2280   case AArch64::ST1W_IMM:
2281   case AArch64::ST1W_D_IMM:
2282   case AArch64::ST1D_IMM:
2283 
2284   case AArch64::LD1RB_IMM:
2285   case AArch64::LD1RB_H_IMM:
2286   case AArch64::LD1RB_S_IMM:
2287   case AArch64::LD1RB_D_IMM:
2288   case AArch64::LD1RSB_H_IMM:
2289   case AArch64::LD1RSB_S_IMM:
2290   case AArch64::LD1RSB_D_IMM:
2291   case AArch64::LD1RH_IMM:
2292   case AArch64::LD1RH_S_IMM:
2293   case AArch64::LD1RH_D_IMM:
2294   case AArch64::LD1RSH_S_IMM:
2295   case AArch64::LD1RSH_D_IMM:
2296   case AArch64::LD1RW_IMM:
2297   case AArch64::LD1RW_D_IMM:
2298   case AArch64::LD1RSW_IMM:
2299   case AArch64::LD1RD_IMM:
2300 
2301   case AArch64::LDNT1B_ZRI:
2302   case AArch64::LDNT1H_ZRI:
2303   case AArch64::LDNT1W_ZRI:
2304   case AArch64::LDNT1D_ZRI:
2305   case AArch64::STNT1B_ZRI:
2306   case AArch64::STNT1H_ZRI:
2307   case AArch64::STNT1W_ZRI:
2308   case AArch64::STNT1D_ZRI:
2309 
2310   case AArch64::LDNF1B_IMM:
2311   case AArch64::LDNF1B_H_IMM:
2312   case AArch64::LDNF1B_S_IMM:
2313   case AArch64::LDNF1B_D_IMM:
2314   case AArch64::LDNF1SB_H_IMM:
2315   case AArch64::LDNF1SB_S_IMM:
2316   case AArch64::LDNF1SB_D_IMM:
2317   case AArch64::LDNF1H_IMM:
2318   case AArch64::LDNF1H_S_IMM:
2319   case AArch64::LDNF1H_D_IMM:
2320   case AArch64::LDNF1SH_S_IMM:
2321   case AArch64::LDNF1SH_D_IMM:
2322   case AArch64::LDNF1W_IMM:
2323   case AArch64::LDNF1W_D_IMM:
2324   case AArch64::LDNF1SW_D_IMM:
2325   case AArch64::LDNF1D_IMM:
2326     return 3;
2327   case AArch64::ADDG:
2328   case AArch64::STGOffset:
2329   case AArch64::LDR_PXI:
2330   case AArch64::STR_PXI:
2331     return 2;
2332   }
2333 }
2334 
2335 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
2336   switch (MI.getOpcode()) {
2337   default:
2338     return false;
2339   // Scaled instructions.
2340   case AArch64::STRSui:
2341   case AArch64::STRDui:
2342   case AArch64::STRQui:
2343   case AArch64::STRXui:
2344   case AArch64::STRWui:
2345   case AArch64::LDRSui:
2346   case AArch64::LDRDui:
2347   case AArch64::LDRQui:
2348   case AArch64::LDRXui:
2349   case AArch64::LDRWui:
2350   case AArch64::LDRSWui:
2351   // Unscaled instructions.
2352   case AArch64::STURSi:
2353   case AArch64::STRSpre:
2354   case AArch64::STURDi:
2355   case AArch64::STRDpre:
2356   case AArch64::STURQi:
2357   case AArch64::STRQpre:
2358   case AArch64::STURWi:
2359   case AArch64::STRWpre:
2360   case AArch64::STURXi:
2361   case AArch64::STRXpre:
2362   case AArch64::LDURSi:
2363   case AArch64::LDRSpre:
2364   case AArch64::LDURDi:
2365   case AArch64::LDRDpre:
2366   case AArch64::LDURQi:
2367   case AArch64::LDRQpre:
2368   case AArch64::LDURWi:
2369   case AArch64::LDRWpre:
2370   case AArch64::LDURXi:
2371   case AArch64::LDRXpre:
2372   case AArch64::LDURSWi:
2373     return true;
2374   }
2375 }
2376 
2377 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
2378                                                    bool &Is64Bit) {
2379   switch (Opc) {
2380   default:
2381     llvm_unreachable("Opcode has no flag setting equivalent!");
2382   // 32-bit cases:
2383   case AArch64::ADDWri:
2384     Is64Bit = false;
2385     return AArch64::ADDSWri;
2386   case AArch64::ADDWrr:
2387     Is64Bit = false;
2388     return AArch64::ADDSWrr;
2389   case AArch64::ADDWrs:
2390     Is64Bit = false;
2391     return AArch64::ADDSWrs;
2392   case AArch64::ADDWrx:
2393     Is64Bit = false;
2394     return AArch64::ADDSWrx;
2395   case AArch64::ANDWri:
2396     Is64Bit = false;
2397     return AArch64::ANDSWri;
2398   case AArch64::ANDWrr:
2399     Is64Bit = false;
2400     return AArch64::ANDSWrr;
2401   case AArch64::ANDWrs:
2402     Is64Bit = false;
2403     return AArch64::ANDSWrs;
2404   case AArch64::BICWrr:
2405     Is64Bit = false;
2406     return AArch64::BICSWrr;
2407   case AArch64::BICWrs:
2408     Is64Bit = false;
2409     return AArch64::BICSWrs;
2410   case AArch64::SUBWri:
2411     Is64Bit = false;
2412     return AArch64::SUBSWri;
2413   case AArch64::SUBWrr:
2414     Is64Bit = false;
2415     return AArch64::SUBSWrr;
2416   case AArch64::SUBWrs:
2417     Is64Bit = false;
2418     return AArch64::SUBSWrs;
2419   case AArch64::SUBWrx:
2420     Is64Bit = false;
2421     return AArch64::SUBSWrx;
2422   // 64-bit cases:
2423   case AArch64::ADDXri:
2424     Is64Bit = true;
2425     return AArch64::ADDSXri;
2426   case AArch64::ADDXrr:
2427     Is64Bit = true;
2428     return AArch64::ADDSXrr;
2429   case AArch64::ADDXrs:
2430     Is64Bit = true;
2431     return AArch64::ADDSXrs;
2432   case AArch64::ADDXrx:
2433     Is64Bit = true;
2434     return AArch64::ADDSXrx;
2435   case AArch64::ANDXri:
2436     Is64Bit = true;
2437     return AArch64::ANDSXri;
2438   case AArch64::ANDXrr:
2439     Is64Bit = true;
2440     return AArch64::ANDSXrr;
2441   case AArch64::ANDXrs:
2442     Is64Bit = true;
2443     return AArch64::ANDSXrs;
2444   case AArch64::BICXrr:
2445     Is64Bit = true;
2446     return AArch64::BICSXrr;
2447   case AArch64::BICXrs:
2448     Is64Bit = true;
2449     return AArch64::BICSXrs;
2450   case AArch64::SUBXri:
2451     Is64Bit = true;
2452     return AArch64::SUBSXri;
2453   case AArch64::SUBXrr:
2454     Is64Bit = true;
2455     return AArch64::SUBSXrr;
2456   case AArch64::SUBXrs:
2457     Is64Bit = true;
2458     return AArch64::SUBSXrs;
2459   case AArch64::SUBXrx:
2460     Is64Bit = true;
2461     return AArch64::SUBSXrx;
2462   }
2463 }
2464 
2465 // Is this a candidate for ld/st merging or pairing?  For example, we don't
2466 // touch volatiles or load/stores that have a hint to avoid pair formation.
2467 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
2468 
2469   bool IsPreLdSt = isPreLdSt(MI);
2470 
2471   // If this is a volatile load/store, don't mess with it.
2472   if (MI.hasOrderedMemoryRef())
2473     return false;
2474 
2475   // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2476   // For Pre-inc LD/ST, the operand is shifted by one.
2477   assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2478           MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2479          "Expected a reg or frame index operand.");
2480 
2481   // For Pre-indexed addressing quadword instructions, the third operand is the
2482   // immediate value.
2483   bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2484 
2485   if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2486     return false;
2487 
2488   // Can't merge/pair if the instruction modifies the base register.
2489   // e.g., ldr x0, [x0]
2490   // This case will never occur with an FI base.
2491   // However, if the instruction is an LDR/STR<S,D,Q,W,X>pre, it can be merged.
2492   // For example:
2493   //   ldr q0, [x11, #32]!
2494   //   ldr q1, [x11, #16]
2495   //   to
2496   //   ldp q0, q1, [x11, #32]!
2497   if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2498     Register BaseReg = MI.getOperand(1).getReg();
2499     const TargetRegisterInfo *TRI = &getRegisterInfo();
2500     if (MI.modifiesRegister(BaseReg, TRI))
2501       return false;
2502   }
2503 
2504   // Check if this load/store has a hint to avoid pair formation.
2505   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2506   if (isLdStPairSuppressed(MI))
2507     return false;
2508 
2509   // Do not pair any callee-save store/reload instructions in the
2510   // prologue/epilogue if the CFI information encoded the operations as separate
2511   // instructions, as that will cause the size of the actual prologue to mismatch
2512   // with the prologue size recorded in the Windows CFI.
2513   const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2514   bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2515                      MI.getMF()->getFunction().needsUnwindTableEntry();
2516   if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2517                       MI.getFlag(MachineInstr::FrameDestroy)))
2518     return false;
2519 
2520   // On some CPUs quad load/store pairs are slower than two single load/stores.
2521   if (Subtarget.isPaired128Slow()) {
2522     switch (MI.getOpcode()) {
2523     default:
2524       break;
2525     case AArch64::LDURQi:
2526     case AArch64::STURQi:
2527     case AArch64::LDRQui:
2528     case AArch64::STRQui:
2529       return false;
2530     }
2531   }
2532 
2533   return true;
2534 }
2535 
2536 bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
2537     const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
2538     int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
2539     const TargetRegisterInfo *TRI) const {
2540   if (!LdSt.mayLoadOrStore())
2541     return false;
2542 
2543   const MachineOperand *BaseOp;
2544   if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2545                                     Width, TRI))
2546     return false;
2547   BaseOps.push_back(BaseOp);
2548   return true;
2549 }
2550 
2551 Optional<ExtAddrMode>
2552 AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
2553                                           const TargetRegisterInfo *TRI) const {
2554   const MachineOperand *Base; // Filled with the base operand of MI.
2555   int64_t Offset;             // Filled with the offset of MI.
2556   bool OffsetIsScalable;
2557   if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2558     return None;
2559 
2560   if (!Base->isReg())
2561     return None;
2562   ExtAddrMode AM;
2563   AM.BaseReg = Base->getReg();
2564   AM.Displacement = Offset;
2565   AM.ScaledReg = 0;
2566   AM.Scale = 0;
2567   return AM;
2568 }
2569 
2570 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
2571     const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
2572     bool &OffsetIsScalable, unsigned &Width,
2573     const TargetRegisterInfo *TRI) const {
2574   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2575   // Handle only loads/stores with base register followed by immediate offset.
2576   if (LdSt.getNumExplicitOperands() == 3) {
2577     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2578     if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
2579         !LdSt.getOperand(2).isImm())
2580       return false;
2581   } else if (LdSt.getNumExplicitOperands() == 4) {
2582     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2583     if (!LdSt.getOperand(1).isReg() ||
2584         (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
2585         !LdSt.getOperand(3).isImm())
2586       return false;
2587   } else
2588     return false;
2589 
2590   // Get the scaling factor for the instruction and set the width for the
2591   // instruction.
2592   TypeSize Scale(0U, false);
2593   int64_t Dummy1, Dummy2;
2594 
2595   // If this returns false, then it's an instruction we don't want to handle.
2596   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2597     return false;
2598 
2599   // Compute the offset. Offset is calculated as the immediate operand
2600   // multiplied by the scaling factor. Unscaled instructions have scaling factor
2601   // set to 1.
2602   if (LdSt.getNumExplicitOperands() == 3) {
2603     BaseOp = &LdSt.getOperand(1);
2604     Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize();
2605   } else {
2606     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2607     BaseOp = &LdSt.getOperand(2);
2608     Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize();
2609   }
2610   OffsetIsScalable = Scale.isScalable();
2611 
2612   if (!BaseOp->isReg() && !BaseOp->isFI())
2613     return false;
2614 
2615   return true;
2616 }
2617 
2618 MachineOperand &
2619 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2620   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2621   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2622   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2623   return OfsOp;
2624 }
2625 
2626 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
2627                                     unsigned &Width, int64_t &MinOffset,
2628                                     int64_t &MaxOffset) {
2629   const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
2630   switch (Opcode) {
2631   // Not a memory operation or something we want to handle.
2632   default:
2633     Scale = TypeSize::Fixed(0);
2634     Width = 0;
2635     MinOffset = MaxOffset = 0;
2636     return false;
2637   case AArch64::STRWpost:
2638   case AArch64::LDRWpost:
2639     Width = 32;
2640     Scale = TypeSize::Fixed(4);
2641     MinOffset = -256;
2642     MaxOffset = 255;
2643     break;
2644   case AArch64::LDURQi:
2645   case AArch64::STURQi:
2646     Width = 16;
2647     Scale = TypeSize::Fixed(1);
2648     MinOffset = -256;
2649     MaxOffset = 255;
2650     break;
2651   case AArch64::PRFUMi:
2652   case AArch64::LDURXi:
2653   case AArch64::LDURDi:
2654   case AArch64::STURXi:
2655   case AArch64::STURDi:
2656     Width = 8;
2657     Scale = TypeSize::Fixed(1);
2658     MinOffset = -256;
2659     MaxOffset = 255;
2660     break;
2661   case AArch64::LDURWi:
2662   case AArch64::LDURSi:
2663   case AArch64::LDURSWi:
2664   case AArch64::STURWi:
2665   case AArch64::STURSi:
2666     Width = 4;
2667     Scale = TypeSize::Fixed(1);
2668     MinOffset = -256;
2669     MaxOffset = 255;
2670     break;
2671   case AArch64::LDURHi:
2672   case AArch64::LDURHHi:
2673   case AArch64::LDURSHXi:
2674   case AArch64::LDURSHWi:
2675   case AArch64::STURHi:
2676   case AArch64::STURHHi:
2677     Width = 2;
2678     Scale = TypeSize::Fixed(1);
2679     MinOffset = -256;
2680     MaxOffset = 255;
2681     break;
2682   case AArch64::LDURBi:
2683   case AArch64::LDURBBi:
2684   case AArch64::LDURSBXi:
2685   case AArch64::LDURSBWi:
2686   case AArch64::STURBi:
2687   case AArch64::STURBBi:
2688     Width = 1;
2689     Scale = TypeSize::Fixed(1);
2690     MinOffset = -256;
2691     MaxOffset = 255;
2692     break;
2693   case AArch64::LDPQi:
2694   case AArch64::LDNPQi:
2695   case AArch64::STPQi:
2696   case AArch64::STNPQi:
2697     Scale = TypeSize::Fixed(16);
2698     Width = 32;
2699     MinOffset = -64;
2700     MaxOffset = 63;
2701     break;
2702   case AArch64::LDRQui:
2703   case AArch64::STRQui:
2704     Scale = TypeSize::Fixed(16);
2705     Width = 16;
2706     MinOffset = 0;
2707     MaxOffset = 4095;
2708     break;
2709   case AArch64::LDPXi:
2710   case AArch64::LDPDi:
2711   case AArch64::LDNPXi:
2712   case AArch64::LDNPDi:
2713   case AArch64::STPXi:
2714   case AArch64::STPDi:
2715   case AArch64::STNPXi:
2716   case AArch64::STNPDi:
2717     Scale = TypeSize::Fixed(8);
2718     Width = 16;
2719     MinOffset = -64;
2720     MaxOffset = 63;
2721     break;
2722   case AArch64::PRFMui:
2723   case AArch64::LDRXui:
2724   case AArch64::LDRDui:
2725   case AArch64::STRXui:
2726   case AArch64::STRDui:
2727     Scale = TypeSize::Fixed(8);
2728     Width = 8;
2729     MinOffset = 0;
2730     MaxOffset = 4095;
2731     break;
2732   case AArch64::StoreSwiftAsyncContext:
2733     // Store is an STRXui, but there might be an ADDXri in the expansion too.
2734     Scale = TypeSize::Fixed(1);
2735     Width = 8;
2736     MinOffset = 0;
2737     MaxOffset = 4095;
2738     break;
2739   case AArch64::LDPWi:
2740   case AArch64::LDPSi:
2741   case AArch64::LDNPWi:
2742   case AArch64::LDNPSi:
2743   case AArch64::STPWi:
2744   case AArch64::STPSi:
2745   case AArch64::STNPWi:
2746   case AArch64::STNPSi:
2747     Scale = TypeSize::Fixed(4);
2748     Width = 8;
2749     MinOffset = -64;
2750     MaxOffset = 63;
2751     break;
2752   case AArch64::LDRWui:
2753   case AArch64::LDRSui:
2754   case AArch64::LDRSWui:
2755   case AArch64::STRWui:
2756   case AArch64::STRSui:
2757     Scale = TypeSize::Fixed(4);
2758     Width = 4;
2759     MinOffset = 0;
2760     MaxOffset = 4095;
2761     break;
2762   case AArch64::LDRHui:
2763   case AArch64::LDRHHui:
2764   case AArch64::LDRSHWui:
2765   case AArch64::LDRSHXui:
2766   case AArch64::STRHui:
2767   case AArch64::STRHHui:
2768     Scale = TypeSize::Fixed(2);
2769     Width = 2;
2770     MinOffset = 0;
2771     MaxOffset = 4095;
2772     break;
2773   case AArch64::LDRBui:
2774   case AArch64::LDRBBui:
2775   case AArch64::LDRSBWui:
2776   case AArch64::LDRSBXui:
2777   case AArch64::STRBui:
2778   case AArch64::STRBBui:
2779     Scale = TypeSize::Fixed(1);
2780     Width = 1;
2781     MinOffset = 0;
2782     MaxOffset = 4095;
2783     break;
2784   case AArch64::STPXpre:
2785   case AArch64::LDPXpost:
2786   case AArch64::STPDpre:
2787   case AArch64::LDPDpost:
2788     Scale = TypeSize::Fixed(8);
2789     Width = 8;
2790     MinOffset = -512;
2791     MaxOffset = 504;
2792     break;
2793   case AArch64::STPQpre:
2794   case AArch64::LDPQpost:
2795     Scale = TypeSize::Fixed(16);
2796     Width = 16;
2797     MinOffset = -1024;
2798     MaxOffset = 1008;
2799     break;
2800   case AArch64::STRXpre:
2801   case AArch64::STRDpre:
2802   case AArch64::LDRXpost:
2803   case AArch64::LDRDpost:
2804     Scale = TypeSize::Fixed(1);
2805     Width = 8;
2806     MinOffset = -256;
2807     MaxOffset = 255;
2808     break;
2809   case AArch64::STRQpre:
2810   case AArch64::LDRQpost:
2811     Scale = TypeSize::Fixed(1);
2812     Width = 16;
2813     MinOffset = -256;
2814     MaxOffset = 255;
2815     break;
2816   case AArch64::ADDG:
2817     Scale = TypeSize::Fixed(16);
2818     Width = 0;
2819     MinOffset = 0;
2820     MaxOffset = 63;
2821     break;
2822   case AArch64::TAGPstack:
2823     Scale = TypeSize::Fixed(16);
2824     Width = 0;
2825     // TAGP with a negative offset turns into SUBP, which has a maximum offset
2826     // of 63 (not 64!).
2827     MinOffset = -63;
2828     MaxOffset = 63;
2829     break;
2830   case AArch64::LDG:
2831   case AArch64::STGOffset:
2832   case AArch64::STZGOffset:
2833     Scale = TypeSize::Fixed(16);
2834     Width = 16;
2835     MinOffset = -256;
2836     MaxOffset = 255;
2837     break;
2838   case AArch64::STR_ZZZZXI:
2839   case AArch64::LDR_ZZZZXI:
2840     Scale = TypeSize::Scalable(16);
2841     Width = SVEMaxBytesPerVector * 4;
2842     MinOffset = -256;
2843     MaxOffset = 252;
2844     break;
2845   case AArch64::STR_ZZZXI:
2846   case AArch64::LDR_ZZZXI:
2847     Scale = TypeSize::Scalable(16);
2848     Width = SVEMaxBytesPerVector * 3;
2849     MinOffset = -256;
2850     MaxOffset = 253;
2851     break;
2852   case AArch64::STR_ZZXI:
2853   case AArch64::LDR_ZZXI:
2854     Scale = TypeSize::Scalable(16);
2855     Width = SVEMaxBytesPerVector * 2;
2856     MinOffset = -256;
2857     MaxOffset = 254;
2858     break;
2859   case AArch64::LDR_PXI:
2860   case AArch64::STR_PXI:
2861     Scale = TypeSize::Scalable(2);
2862     Width = SVEMaxBytesPerVector / 8;
2863     MinOffset = -256;
2864     MaxOffset = 255;
2865     break;
2866   case AArch64::LDR_ZXI:
2867   case AArch64::STR_ZXI:
2868     Scale = TypeSize::Scalable(16);
2869     Width = SVEMaxBytesPerVector;
2870     MinOffset = -256;
2871     MaxOffset = 255;
2872     break;
2873   case AArch64::LD1B_IMM:
2874   case AArch64::LD1H_IMM:
2875   case AArch64::LD1W_IMM:
2876   case AArch64::LD1D_IMM:
2877   case AArch64::LDNT1B_ZRI:
2878   case AArch64::LDNT1H_ZRI:
2879   case AArch64::LDNT1W_ZRI:
2880   case AArch64::LDNT1D_ZRI:
2881   case AArch64::ST1B_IMM:
2882   case AArch64::ST1H_IMM:
2883   case AArch64::ST1W_IMM:
2884   case AArch64::ST1D_IMM:
2885   case AArch64::STNT1B_ZRI:
2886   case AArch64::STNT1H_ZRI:
2887   case AArch64::STNT1W_ZRI:
2888   case AArch64::STNT1D_ZRI:
2889   case AArch64::LDNF1B_IMM:
2890   case AArch64::LDNF1H_IMM:
2891   case AArch64::LDNF1W_IMM:
2892   case AArch64::LDNF1D_IMM:
2893     // A full vectors worth of data
2894     // Width = mbytes * elements
2895     Scale = TypeSize::Scalable(16);
2896     Width = SVEMaxBytesPerVector;
2897     MinOffset = -8;
2898     MaxOffset = 7;
2899     break;
2900   case AArch64::LD1B_H_IMM:
2901   case AArch64::LD1SB_H_IMM:
2902   case AArch64::LD1H_S_IMM:
2903   case AArch64::LD1SH_S_IMM:
2904   case AArch64::LD1W_D_IMM:
2905   case AArch64::LD1SW_D_IMM:
2906   case AArch64::ST1B_H_IMM:
2907   case AArch64::ST1H_S_IMM:
2908   case AArch64::ST1W_D_IMM:
2909   case AArch64::LDNF1B_H_IMM:
2910   case AArch64::LDNF1SB_H_IMM:
2911   case AArch64::LDNF1H_S_IMM:
2912   case AArch64::LDNF1SH_S_IMM:
2913   case AArch64::LDNF1W_D_IMM:
2914   case AArch64::LDNF1SW_D_IMM:
2915     // A half vector worth of data
2916     // Width = mbytes * elements
2917     Scale = TypeSize::Scalable(8);
2918     Width = SVEMaxBytesPerVector / 2;
2919     MinOffset = -8;
2920     MaxOffset = 7;
2921     break;
2922   case AArch64::LD1B_S_IMM:
2923   case AArch64::LD1SB_S_IMM:
2924   case AArch64::LD1H_D_IMM:
2925   case AArch64::LD1SH_D_IMM:
2926   case AArch64::ST1B_S_IMM:
2927   case AArch64::ST1H_D_IMM:
2928   case AArch64::LDNF1B_S_IMM:
2929   case AArch64::LDNF1SB_S_IMM:
2930   case AArch64::LDNF1H_D_IMM:
2931   case AArch64::LDNF1SH_D_IMM:
2932     // A quarter vector worth of data
2933     // Width = mbytes * elements
2934     Scale = TypeSize::Scalable(4);
2935     Width = SVEMaxBytesPerVector / 4;
2936     MinOffset = -8;
2937     MaxOffset = 7;
2938     break;
2939   case AArch64::LD1B_D_IMM:
2940   case AArch64::LD1SB_D_IMM:
2941   case AArch64::ST1B_D_IMM:
2942   case AArch64::LDNF1B_D_IMM:
2943   case AArch64::LDNF1SB_D_IMM:
2944     // A eighth vector worth of data
2945     // Width = mbytes * elements
2946     Scale = TypeSize::Scalable(2);
2947     Width = SVEMaxBytesPerVector / 8;
2948     MinOffset = -8;
2949     MaxOffset = 7;
2950     break;
2951   case AArch64::ST2GOffset:
2952   case AArch64::STZ2GOffset:
2953     Scale = TypeSize::Fixed(16);
2954     Width = 32;
2955     MinOffset = -256;
2956     MaxOffset = 255;
2957     break;
2958   case AArch64::STGPi:
2959     Scale = TypeSize::Fixed(16);
2960     Width = 16;
2961     MinOffset = -64;
2962     MaxOffset = 63;
2963     break;
2964   case AArch64::LD1RB_IMM:
2965   case AArch64::LD1RB_H_IMM:
2966   case AArch64::LD1RB_S_IMM:
2967   case AArch64::LD1RB_D_IMM:
2968   case AArch64::LD1RSB_H_IMM:
2969   case AArch64::LD1RSB_S_IMM:
2970   case AArch64::LD1RSB_D_IMM:
2971     Scale = TypeSize::Fixed(1);
2972     Width = 1;
2973     MinOffset = 0;
2974     MaxOffset = 63;
2975     break;
2976   case AArch64::LD1RH_IMM:
2977   case AArch64::LD1RH_S_IMM:
2978   case AArch64::LD1RH_D_IMM:
2979   case AArch64::LD1RSH_S_IMM:
2980   case AArch64::LD1RSH_D_IMM:
2981     Scale = TypeSize::Fixed(2);
2982     Width = 2;
2983     MinOffset = 0;
2984     MaxOffset = 63;
2985     break;
2986   case AArch64::LD1RW_IMM:
2987   case AArch64::LD1RW_D_IMM:
2988   case AArch64::LD1RSW_IMM:
2989     Scale = TypeSize::Fixed(4);
2990     Width = 4;
2991     MinOffset = 0;
2992     MaxOffset = 63;
2993     break;
2994   case AArch64::LD1RD_IMM:
2995     Scale = TypeSize::Fixed(8);
2996     Width = 8;
2997     MinOffset = 0;
2998     MaxOffset = 63;
2999     break;
3000   }
3001 
3002   return true;
3003 }
3004 
3005 // Scaling factor for unscaled load or store.
3006 int AArch64InstrInfo::getMemScale(unsigned Opc) {
3007   switch (Opc) {
3008   default:
3009     llvm_unreachable("Opcode has unknown scale!");
3010   case AArch64::LDRBBui:
3011   case AArch64::LDURBBi:
3012   case AArch64::LDRSBWui:
3013   case AArch64::LDURSBWi:
3014   case AArch64::STRBBui:
3015   case AArch64::STURBBi:
3016     return 1;
3017   case AArch64::LDRHHui:
3018   case AArch64::LDURHHi:
3019   case AArch64::LDRSHWui:
3020   case AArch64::LDURSHWi:
3021   case AArch64::STRHHui:
3022   case AArch64::STURHHi:
3023     return 2;
3024   case AArch64::LDRSui:
3025   case AArch64::LDURSi:
3026   case AArch64::LDRSpre:
3027   case AArch64::LDRSWui:
3028   case AArch64::LDURSWi:
3029   case AArch64::LDRWpre:
3030   case AArch64::LDRWui:
3031   case AArch64::LDURWi:
3032   case AArch64::STRSui:
3033   case AArch64::STURSi:
3034   case AArch64::STRSpre:
3035   case AArch64::STRWui:
3036   case AArch64::STURWi:
3037   case AArch64::STRWpre:
3038   case AArch64::LDPSi:
3039   case AArch64::LDPSWi:
3040   case AArch64::LDPWi:
3041   case AArch64::STPSi:
3042   case AArch64::STPWi:
3043     return 4;
3044   case AArch64::LDRDui:
3045   case AArch64::LDURDi:
3046   case AArch64::LDRDpre:
3047   case AArch64::LDRXui:
3048   case AArch64::LDURXi:
3049   case AArch64::LDRXpre:
3050   case AArch64::STRDui:
3051   case AArch64::STURDi:
3052   case AArch64::STRDpre:
3053   case AArch64::STRXui:
3054   case AArch64::STURXi:
3055   case AArch64::STRXpre:
3056   case AArch64::LDPDi:
3057   case AArch64::LDPXi:
3058   case AArch64::STPDi:
3059   case AArch64::STPXi:
3060     return 8;
3061   case AArch64::LDRQui:
3062   case AArch64::LDURQi:
3063   case AArch64::STRQui:
3064   case AArch64::STURQi:
3065   case AArch64::STRQpre:
3066   case AArch64::LDPQi:
3067   case AArch64::LDRQpre:
3068   case AArch64::STPQi:
3069   case AArch64::STGOffset:
3070   case AArch64::STZGOffset:
3071   case AArch64::ST2GOffset:
3072   case AArch64::STZ2GOffset:
3073   case AArch64::STGPi:
3074     return 16;
3075   }
3076 }
3077 
3078 bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
3079   switch (MI.getOpcode()) {
3080   default:
3081     return false;
3082   case AArch64::LDRWpre:
3083   case AArch64::LDRXpre:
3084   case AArch64::LDRSpre:
3085   case AArch64::LDRDpre:
3086   case AArch64::LDRQpre:
3087     return true;
3088   }
3089 }
3090 
3091 bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
3092   switch (MI.getOpcode()) {
3093   default:
3094     return false;
3095   case AArch64::STRWpre:
3096   case AArch64::STRXpre:
3097   case AArch64::STRSpre:
3098   case AArch64::STRDpre:
3099   case AArch64::STRQpre:
3100     return true;
3101   }
3102 }
3103 
3104 bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
3105   return isPreLd(MI) || isPreSt(MI);
3106 }
3107 
3108 // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
3109 // scaled.
3110 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
3111   int Scale = AArch64InstrInfo::getMemScale(Opc);
3112 
3113   // If the byte-offset isn't a multiple of the stride, we can't scale this
3114   // offset.
3115   if (Offset % Scale != 0)
3116     return false;
3117 
3118   // Convert the byte-offset used by unscaled into an "element" offset used
3119   // by the scaled pair load/store instructions.
3120   Offset /= Scale;
3121   return true;
3122 }
3123 
3124 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
3125   if (FirstOpc == SecondOpc)
3126     return true;
3127   // We can also pair sign-ext and zero-ext instructions.
3128   switch (FirstOpc) {
3129   default:
3130     return false;
3131   case AArch64::LDRWui:
3132   case AArch64::LDURWi:
3133     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
3134   case AArch64::LDRSWui:
3135   case AArch64::LDURSWi:
3136     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
3137   }
3138   // These instructions can't be paired based on their opcodes.
3139   return false;
3140 }
3141 
3142 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
3143                             int64_t Offset1, unsigned Opcode1, int FI2,
3144                             int64_t Offset2, unsigned Opcode2) {
3145   // Accesses through fixed stack object frame indices may access a different
3146   // fixed stack slot. Check that the object offsets + offsets match.
3147   if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
3148     int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
3149     int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
3150     assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
3151     // Convert to scaled object offsets.
3152     int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
3153     if (ObjectOffset1 % Scale1 != 0)
3154       return false;
3155     ObjectOffset1 /= Scale1;
3156     int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
3157     if (ObjectOffset2 % Scale2 != 0)
3158       return false;
3159     ObjectOffset2 /= Scale2;
3160     ObjectOffset1 += Offset1;
3161     ObjectOffset2 += Offset2;
3162     return ObjectOffset1 + 1 == ObjectOffset2;
3163   }
3164 
3165   return FI1 == FI2;
3166 }
3167 
3168 /// Detect opportunities for ldp/stp formation.
3169 ///
3170 /// Only called for LdSt for which getMemOperandWithOffset returns true.
3171 bool AArch64InstrInfo::shouldClusterMemOps(
3172     ArrayRef<const MachineOperand *> BaseOps1,
3173     ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
3174     unsigned NumBytes) const {
3175   assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
3176   const MachineOperand &BaseOp1 = *BaseOps1.front();
3177   const MachineOperand &BaseOp2 = *BaseOps2.front();
3178   const MachineInstr &FirstLdSt = *BaseOp1.getParent();
3179   const MachineInstr &SecondLdSt = *BaseOp2.getParent();
3180   if (BaseOp1.getType() != BaseOp2.getType())
3181     return false;
3182 
3183   assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
3184          "Only base registers and frame indices are supported.");
3185 
3186   // Check for both base regs and base FI.
3187   if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
3188     return false;
3189 
3190   // Only cluster up to a single pair.
3191   if (NumLoads > 2)
3192     return false;
3193 
3194   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
3195     return false;
3196 
3197   // Can we pair these instructions based on their opcodes?
3198   unsigned FirstOpc = FirstLdSt.getOpcode();
3199   unsigned SecondOpc = SecondLdSt.getOpcode();
3200   if (!canPairLdStOpc(FirstOpc, SecondOpc))
3201     return false;
3202 
3203   // Can't merge volatiles or load/stores that have a hint to avoid pair
3204   // formation, for example.
3205   if (!isCandidateToMergeOrPair(FirstLdSt) ||
3206       !isCandidateToMergeOrPair(SecondLdSt))
3207     return false;
3208 
3209   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
3210   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
3211   if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
3212     return false;
3213 
3214   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
3215   if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
3216     return false;
3217 
3218   // Pairwise instructions have a 7-bit signed offset field.
3219   if (Offset1 > 63 || Offset1 < -64)
3220     return false;
3221 
3222   // The caller should already have ordered First/SecondLdSt by offset.
3223   // Note: except for non-equal frame index bases
3224   if (BaseOp1.isFI()) {
3225     assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
3226            "Caller should have ordered offsets.");
3227 
3228     const MachineFrameInfo &MFI =
3229         FirstLdSt.getParent()->getParent()->getFrameInfo();
3230     return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
3231                            BaseOp2.getIndex(), Offset2, SecondOpc);
3232   }
3233 
3234   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
3235 
3236   return Offset1 + 1 == Offset2;
3237 }
3238 
3239 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
3240                                             unsigned Reg, unsigned SubIdx,
3241                                             unsigned State,
3242                                             const TargetRegisterInfo *TRI) {
3243   if (!SubIdx)
3244     return MIB.addReg(Reg, State);
3245 
3246   if (Register::isPhysicalRegister(Reg))
3247     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
3248   return MIB.addReg(Reg, State, SubIdx);
3249 }
3250 
3251 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
3252                                         unsigned NumRegs) {
3253   // We really want the positive remainder mod 32 here, that happens to be
3254   // easily obtainable with a mask.
3255   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
3256 }
3257 
3258 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
3259                                         MachineBasicBlock::iterator I,
3260                                         const DebugLoc &DL, MCRegister DestReg,
3261                                         MCRegister SrcReg, bool KillSrc,
3262                                         unsigned Opcode,
3263                                         ArrayRef<unsigned> Indices) const {
3264   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
3265   const TargetRegisterInfo *TRI = &getRegisterInfo();
3266   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
3267   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
3268   unsigned NumRegs = Indices.size();
3269 
3270   int SubReg = 0, End = NumRegs, Incr = 1;
3271   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
3272     SubReg = NumRegs - 1;
3273     End = -1;
3274     Incr = -1;
3275   }
3276 
3277   for (; SubReg != End; SubReg += Incr) {
3278     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
3279     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
3280     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
3281     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
3282   }
3283 }
3284 
3285 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
3286                                        MachineBasicBlock::iterator I,
3287                                        DebugLoc DL, unsigned DestReg,
3288                                        unsigned SrcReg, bool KillSrc,
3289                                        unsigned Opcode, unsigned ZeroReg,
3290                                        llvm::ArrayRef<unsigned> Indices) const {
3291   const TargetRegisterInfo *TRI = &getRegisterInfo();
3292   unsigned NumRegs = Indices.size();
3293 
3294 #ifndef NDEBUG
3295   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
3296   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
3297   assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
3298          "GPR reg sequences should not be able to overlap");
3299 #endif
3300 
3301   for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
3302     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
3303     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
3304     MIB.addReg(ZeroReg);
3305     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
3306     MIB.addImm(0);
3307   }
3308 }
3309 
3310 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
3311                                    MachineBasicBlock::iterator I,
3312                                    const DebugLoc &DL, MCRegister DestReg,
3313                                    MCRegister SrcReg, bool KillSrc) const {
3314   if (AArch64::GPR32spRegClass.contains(DestReg) &&
3315       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
3316     const TargetRegisterInfo *TRI = &getRegisterInfo();
3317 
3318     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
3319       // If either operand is WSP, expand to ADD #0.
3320       if (Subtarget.hasZeroCycleRegMove()) {
3321         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
3322         MCRegister DestRegX = TRI->getMatchingSuperReg(
3323             DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3324         MCRegister SrcRegX = TRI->getMatchingSuperReg(
3325             SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3326         // This instruction is reading and writing X registers.  This may upset
3327         // the register scavenger and machine verifier, so we need to indicate
3328         // that we are reading an undefined value from SrcRegX, but a proper
3329         // value from SrcReg.
3330         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
3331             .addReg(SrcRegX, RegState::Undef)
3332             .addImm(0)
3333             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
3334             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
3335       } else {
3336         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
3337             .addReg(SrcReg, getKillRegState(KillSrc))
3338             .addImm(0)
3339             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
3340       }
3341     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
3342       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
3343           .addImm(0)
3344           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
3345     } else {
3346       if (Subtarget.hasZeroCycleRegMove()) {
3347         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
3348         MCRegister DestRegX = TRI->getMatchingSuperReg(
3349             DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3350         MCRegister SrcRegX = TRI->getMatchingSuperReg(
3351             SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
3352         // This instruction is reading and writing X registers.  This may upset
3353         // the register scavenger and machine verifier, so we need to indicate
3354         // that we are reading an undefined value from SrcRegX, but a proper
3355         // value from SrcReg.
3356         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
3357             .addReg(AArch64::XZR)
3358             .addReg(SrcRegX, RegState::Undef)
3359             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
3360       } else {
3361         // Otherwise, expand to ORR WZR.
3362         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
3363             .addReg(AArch64::WZR)
3364             .addReg(SrcReg, getKillRegState(KillSrc));
3365       }
3366     }
3367     return;
3368   }
3369 
3370   // Copy a Predicate register by ORRing with itself.
3371   if (AArch64::PPRRegClass.contains(DestReg) &&
3372       AArch64::PPRRegClass.contains(SrcReg)) {
3373     assert(Subtarget.hasSVE() && "Unexpected SVE register.");
3374     BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
3375       .addReg(SrcReg) // Pg
3376       .addReg(SrcReg)
3377       .addReg(SrcReg, getKillRegState(KillSrc));
3378     return;
3379   }
3380 
3381   // Copy a Z register by ORRing with itself.
3382   if (AArch64::ZPRRegClass.contains(DestReg) &&
3383       AArch64::ZPRRegClass.contains(SrcReg)) {
3384     assert(Subtarget.hasSVE() && "Unexpected SVE register.");
3385     BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
3386       .addReg(SrcReg)
3387       .addReg(SrcReg, getKillRegState(KillSrc));
3388     return;
3389   }
3390 
3391   // Copy a Z register pair by copying the individual sub-registers.
3392   if (AArch64::ZPR2RegClass.contains(DestReg) &&
3393       AArch64::ZPR2RegClass.contains(SrcReg)) {
3394     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
3395     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
3396                      Indices);
3397     return;
3398   }
3399 
3400   // Copy a Z register triple by copying the individual sub-registers.
3401   if (AArch64::ZPR3RegClass.contains(DestReg) &&
3402       AArch64::ZPR3RegClass.contains(SrcReg)) {
3403     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
3404                                        AArch64::zsub2};
3405     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
3406                      Indices);
3407     return;
3408   }
3409 
3410   // Copy a Z register quad by copying the individual sub-registers.
3411   if (AArch64::ZPR4RegClass.contains(DestReg) &&
3412       AArch64::ZPR4RegClass.contains(SrcReg)) {
3413     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
3414                                        AArch64::zsub2, AArch64::zsub3};
3415     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
3416                      Indices);
3417     return;
3418   }
3419 
3420   if (AArch64::GPR64spRegClass.contains(DestReg) &&
3421       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
3422     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
3423       // If either operand is SP, expand to ADD #0.
3424       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
3425           .addReg(SrcReg, getKillRegState(KillSrc))
3426           .addImm(0)
3427           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
3428     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
3429       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
3430           .addImm(0)
3431           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
3432     } else {
3433       // Otherwise, expand to ORR XZR.
3434       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
3435           .addReg(AArch64::XZR)
3436           .addReg(SrcReg, getKillRegState(KillSrc));
3437     }
3438     return;
3439   }
3440 
3441   // Copy a DDDD register quad by copying the individual sub-registers.
3442   if (AArch64::DDDDRegClass.contains(DestReg) &&
3443       AArch64::DDDDRegClass.contains(SrcReg)) {
3444     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
3445                                        AArch64::dsub2, AArch64::dsub3};
3446     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
3447                      Indices);
3448     return;
3449   }
3450 
3451   // Copy a DDD register triple by copying the individual sub-registers.
3452   if (AArch64::DDDRegClass.contains(DestReg) &&
3453       AArch64::DDDRegClass.contains(SrcReg)) {
3454     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
3455                                        AArch64::dsub2};
3456     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
3457                      Indices);
3458     return;
3459   }
3460 
3461   // Copy a DD register pair by copying the individual sub-registers.
3462   if (AArch64::DDRegClass.contains(DestReg) &&
3463       AArch64::DDRegClass.contains(SrcReg)) {
3464     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
3465     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
3466                      Indices);
3467     return;
3468   }
3469 
3470   // Copy a QQQQ register quad by copying the individual sub-registers.
3471   if (AArch64::QQQQRegClass.contains(DestReg) &&
3472       AArch64::QQQQRegClass.contains(SrcReg)) {
3473     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
3474                                        AArch64::qsub2, AArch64::qsub3};
3475     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
3476                      Indices);
3477     return;
3478   }
3479 
3480   // Copy a QQQ register triple by copying the individual sub-registers.
3481   if (AArch64::QQQRegClass.contains(DestReg) &&
3482       AArch64::QQQRegClass.contains(SrcReg)) {
3483     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
3484                                        AArch64::qsub2};
3485     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
3486                      Indices);
3487     return;
3488   }
3489 
3490   // Copy a QQ register pair by copying the individual sub-registers.
3491   if (AArch64::QQRegClass.contains(DestReg) &&
3492       AArch64::QQRegClass.contains(SrcReg)) {
3493     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
3494     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
3495                      Indices);
3496     return;
3497   }
3498 
3499   if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
3500       AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
3501     static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
3502     copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
3503                     AArch64::XZR, Indices);
3504     return;
3505   }
3506 
3507   if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
3508       AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
3509     static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
3510     copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
3511                     AArch64::WZR, Indices);
3512     return;
3513   }
3514 
3515   if (AArch64::FPR128RegClass.contains(DestReg) &&
3516       AArch64::FPR128RegClass.contains(SrcReg)) {
3517     if (Subtarget.hasNEON()) {
3518       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
3519           .addReg(SrcReg)
3520           .addReg(SrcReg, getKillRegState(KillSrc));
3521     } else {
3522       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
3523           .addReg(AArch64::SP, RegState::Define)
3524           .addReg(SrcReg, getKillRegState(KillSrc))
3525           .addReg(AArch64::SP)
3526           .addImm(-16);
3527       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
3528           .addReg(AArch64::SP, RegState::Define)
3529           .addReg(DestReg, RegState::Define)
3530           .addReg(AArch64::SP)
3531           .addImm(16);
3532     }
3533     return;
3534   }
3535 
3536   if (AArch64::FPR64RegClass.contains(DestReg) &&
3537       AArch64::FPR64RegClass.contains(SrcReg)) {
3538     BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
3539         .addReg(SrcReg, getKillRegState(KillSrc));
3540     return;
3541   }
3542 
3543   if (AArch64::FPR32RegClass.contains(DestReg) &&
3544       AArch64::FPR32RegClass.contains(SrcReg)) {
3545     BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
3546         .addReg(SrcReg, getKillRegState(KillSrc));
3547     return;
3548   }
3549 
3550   if (AArch64::FPR16RegClass.contains(DestReg) &&
3551       AArch64::FPR16RegClass.contains(SrcReg)) {
3552     DestReg =
3553         RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
3554     SrcReg =
3555         RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
3556     BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
3557         .addReg(SrcReg, getKillRegState(KillSrc));
3558     return;
3559   }
3560 
3561   if (AArch64::FPR8RegClass.contains(DestReg) &&
3562       AArch64::FPR8RegClass.contains(SrcReg)) {
3563     DestReg =
3564         RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
3565     SrcReg =
3566         RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
3567     BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
3568         .addReg(SrcReg, getKillRegState(KillSrc));
3569     return;
3570   }
3571 
3572   // Copies between GPR64 and FPR64.
3573   if (AArch64::FPR64RegClass.contains(DestReg) &&
3574       AArch64::GPR64RegClass.contains(SrcReg)) {
3575     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
3576         .addReg(SrcReg, getKillRegState(KillSrc));
3577     return;
3578   }
3579   if (AArch64::GPR64RegClass.contains(DestReg) &&
3580       AArch64::FPR64RegClass.contains(SrcReg)) {
3581     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
3582         .addReg(SrcReg, getKillRegState(KillSrc));
3583     return;
3584   }
3585   // Copies between GPR32 and FPR32.
3586   if (AArch64::FPR32RegClass.contains(DestReg) &&
3587       AArch64::GPR32RegClass.contains(SrcReg)) {
3588     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
3589         .addReg(SrcReg, getKillRegState(KillSrc));
3590     return;
3591   }
3592   if (AArch64::GPR32RegClass.contains(DestReg) &&
3593       AArch64::FPR32RegClass.contains(SrcReg)) {
3594     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
3595         .addReg(SrcReg, getKillRegState(KillSrc));
3596     return;
3597   }
3598 
3599   if (DestReg == AArch64::NZCV) {
3600     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
3601     BuildMI(MBB, I, DL, get(AArch64::MSR))
3602         .addImm(AArch64SysReg::NZCV)
3603         .addReg(SrcReg, getKillRegState(KillSrc))
3604         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
3605     return;
3606   }
3607 
3608   if (SrcReg == AArch64::NZCV) {
3609     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
3610     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
3611         .addImm(AArch64SysReg::NZCV)
3612         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
3613     return;
3614   }
3615 
3616 #ifndef NDEBUG
3617   const TargetRegisterInfo &TRI = getRegisterInfo();
3618   errs() << TRI.getRegAsmName(DestReg) << " = COPY "
3619          << TRI.getRegAsmName(SrcReg) << "\n";
3620 #endif
3621   llvm_unreachable("unimplemented reg-to-reg copy");
3622 }
3623 
3624 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
3625                                     MachineBasicBlock &MBB,
3626                                     MachineBasicBlock::iterator InsertBefore,
3627                                     const MCInstrDesc &MCID,
3628                                     Register SrcReg, bool IsKill,
3629                                     unsigned SubIdx0, unsigned SubIdx1, int FI,
3630                                     MachineMemOperand *MMO) {
3631   Register SrcReg0 = SrcReg;
3632   Register SrcReg1 = SrcReg;
3633   if (Register::isPhysicalRegister(SrcReg)) {
3634     SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
3635     SubIdx0 = 0;
3636     SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
3637     SubIdx1 = 0;
3638   }
3639   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
3640       .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
3641       .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
3642       .addFrameIndex(FI)
3643       .addImm(0)
3644       .addMemOperand(MMO);
3645 }
3646 
3647 void AArch64InstrInfo::storeRegToStackSlot(
3648     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
3649     bool isKill, int FI, const TargetRegisterClass *RC,
3650     const TargetRegisterInfo *TRI) const {
3651   MachineFunction &MF = *MBB.getParent();
3652   MachineFrameInfo &MFI = MF.getFrameInfo();
3653 
3654   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
3655   MachineMemOperand *MMO =
3656       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
3657                               MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
3658   unsigned Opc = 0;
3659   bool Offset = true;
3660   unsigned StackID = TargetStackID::Default;
3661   switch (TRI->getSpillSize(*RC)) {
3662   case 1:
3663     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3664       Opc = AArch64::STRBui;
3665     break;
3666   case 2:
3667     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3668       Opc = AArch64::STRHui;
3669     else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3670       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3671       Opc = AArch64::STR_PXI;
3672       StackID = TargetStackID::ScalableVector;
3673     }
3674     break;
3675   case 4:
3676     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3677       Opc = AArch64::STRWui;
3678       if (Register::isVirtualRegister(SrcReg))
3679         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
3680       else
3681         assert(SrcReg != AArch64::WSP);
3682     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3683       Opc = AArch64::STRSui;
3684     break;
3685   case 8:
3686     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3687       Opc = AArch64::STRXui;
3688       if (Register::isVirtualRegister(SrcReg))
3689         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3690       else
3691         assert(SrcReg != AArch64::SP);
3692     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3693       Opc = AArch64::STRDui;
3694     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3695       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
3696                               get(AArch64::STPWi), SrcReg, isKill,
3697                               AArch64::sube32, AArch64::subo32, FI, MMO);
3698       return;
3699     }
3700     break;
3701   case 16:
3702     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3703       Opc = AArch64::STRQui;
3704     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3705       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3706       Opc = AArch64::ST1Twov1d;
3707       Offset = false;
3708     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3709       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
3710                               get(AArch64::STPXi), SrcReg, isKill,
3711                               AArch64::sube64, AArch64::subo64, FI, MMO);
3712       return;
3713     } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3714       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3715       Opc = AArch64::STR_ZXI;
3716       StackID = TargetStackID::ScalableVector;
3717     }
3718     break;
3719   case 24:
3720     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3721       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3722       Opc = AArch64::ST1Threev1d;
3723       Offset = false;
3724     }
3725     break;
3726   case 32:
3727     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3728       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3729       Opc = AArch64::ST1Fourv1d;
3730       Offset = false;
3731     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3732       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3733       Opc = AArch64::ST1Twov2d;
3734       Offset = false;
3735     } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
3736       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3737       Opc = AArch64::STR_ZZXI;
3738       StackID = TargetStackID::ScalableVector;
3739     }
3740     break;
3741   case 48:
3742     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3743       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3744       Opc = AArch64::ST1Threev2d;
3745       Offset = false;
3746     } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
3747       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3748       Opc = AArch64::STR_ZZZXI;
3749       StackID = TargetStackID::ScalableVector;
3750     }
3751     break;
3752   case 64:
3753     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3754       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
3755       Opc = AArch64::ST1Fourv2d;
3756       Offset = false;
3757     } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
3758       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
3759       Opc = AArch64::STR_ZZZZXI;
3760       StackID = TargetStackID::ScalableVector;
3761     }
3762     break;
3763   }
3764   assert(Opc && "Unknown register class");
3765   MFI.setStackID(FI, StackID);
3766 
3767   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3768                                      .addReg(SrcReg, getKillRegState(isKill))
3769                                      .addFrameIndex(FI);
3770 
3771   if (Offset)
3772     MI.addImm(0);
3773   MI.addMemOperand(MMO);
3774 }
3775 
3776 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
3777                                      MachineBasicBlock &MBB,
3778                                      MachineBasicBlock::iterator InsertBefore,
3779                                      const MCInstrDesc &MCID,
3780                                      Register DestReg, unsigned SubIdx0,
3781                                      unsigned SubIdx1, int FI,
3782                                      MachineMemOperand *MMO) {
3783   Register DestReg0 = DestReg;
3784   Register DestReg1 = DestReg;
3785   bool IsUndef = true;
3786   if (Register::isPhysicalRegister(DestReg)) {
3787     DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
3788     SubIdx0 = 0;
3789     DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
3790     SubIdx1 = 0;
3791     IsUndef = false;
3792   }
3793   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
3794       .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
3795       .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
3796       .addFrameIndex(FI)
3797       .addImm(0)
3798       .addMemOperand(MMO);
3799 }
3800 
3801 void AArch64InstrInfo::loadRegFromStackSlot(
3802     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
3803     int FI, const TargetRegisterClass *RC,
3804     const TargetRegisterInfo *TRI) const {
3805   MachineFunction &MF = *MBB.getParent();
3806   MachineFrameInfo &MFI = MF.getFrameInfo();
3807   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
3808   MachineMemOperand *MMO =
3809       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
3810                               MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
3811 
3812   unsigned Opc = 0;
3813   bool Offset = true;
3814   unsigned StackID = TargetStackID::Default;
3815   switch (TRI->getSpillSize(*RC)) {
3816   case 1:
3817     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
3818       Opc = AArch64::LDRBui;
3819     break;
3820   case 2:
3821     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
3822       Opc = AArch64::LDRHui;
3823     else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
3824       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3825       Opc = AArch64::LDR_PXI;
3826       StackID = TargetStackID::ScalableVector;
3827     }
3828     break;
3829   case 4:
3830     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
3831       Opc = AArch64::LDRWui;
3832       if (Register::isVirtualRegister(DestReg))
3833         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
3834       else
3835         assert(DestReg != AArch64::WSP);
3836     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
3837       Opc = AArch64::LDRSui;
3838     break;
3839   case 8:
3840     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
3841       Opc = AArch64::LDRXui;
3842       if (Register::isVirtualRegister(DestReg))
3843         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
3844       else
3845         assert(DestReg != AArch64::SP);
3846     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
3847       Opc = AArch64::LDRDui;
3848     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
3849       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3850                                get(AArch64::LDPWi), DestReg, AArch64::sube32,
3851                                AArch64::subo32, FI, MMO);
3852       return;
3853     }
3854     break;
3855   case 16:
3856     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
3857       Opc = AArch64::LDRQui;
3858     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
3859       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3860       Opc = AArch64::LD1Twov1d;
3861       Offset = false;
3862     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
3863       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
3864                                get(AArch64::LDPXi), DestReg, AArch64::sube64,
3865                                AArch64::subo64, FI, MMO);
3866       return;
3867     } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
3868       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3869       Opc = AArch64::LDR_ZXI;
3870       StackID = TargetStackID::ScalableVector;
3871     }
3872     break;
3873   case 24:
3874     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
3875       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3876       Opc = AArch64::LD1Threev1d;
3877       Offset = false;
3878     }
3879     break;
3880   case 32:
3881     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
3882       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3883       Opc = AArch64::LD1Fourv1d;
3884       Offset = false;
3885     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
3886       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3887       Opc = AArch64::LD1Twov2d;
3888       Offset = false;
3889     } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
3890       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3891       Opc = AArch64::LDR_ZZXI;
3892       StackID = TargetStackID::ScalableVector;
3893     }
3894     break;
3895   case 48:
3896     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
3897       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3898       Opc = AArch64::LD1Threev2d;
3899       Offset = false;
3900     } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
3901       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3902       Opc = AArch64::LDR_ZZZXI;
3903       StackID = TargetStackID::ScalableVector;
3904     }
3905     break;
3906   case 64:
3907     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
3908       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
3909       Opc = AArch64::LD1Fourv2d;
3910       Offset = false;
3911     } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
3912       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
3913       Opc = AArch64::LDR_ZZZZXI;
3914       StackID = TargetStackID::ScalableVector;
3915     }
3916     break;
3917   }
3918 
3919   assert(Opc && "Unknown register class");
3920   MFI.setStackID(FI, StackID);
3921 
3922   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
3923                                      .addReg(DestReg, getDefRegState(true))
3924                                      .addFrameIndex(FI);
3925   if (Offset)
3926     MI.addImm(0);
3927   MI.addMemOperand(MMO);
3928 }
3929 
3930 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
3931                                            const MachineInstr &UseMI,
3932                                            const TargetRegisterInfo *TRI) {
3933   return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
3934                                          UseMI.getIterator()),
3935                 [TRI](const MachineInstr &I) {
3936                   return I.modifiesRegister(AArch64::NZCV, TRI) ||
3937                          I.readsRegister(AArch64::NZCV, TRI);
3938                 });
3939 }
3940 
3941 void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
3942     const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
3943   // The smallest scalable element supported by scaled SVE addressing
3944   // modes are predicates, which are 2 scalable bytes in size. So the scalable
3945   // byte offset must always be a multiple of 2.
3946   assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
3947 
3948   // VGSized offsets are divided by '2', because the VG register is the
3949   // the number of 64bit granules as opposed to 128bit vector chunks,
3950   // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
3951   // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
3952   // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
3953   ByteSized = Offset.getFixed();
3954   VGSized = Offset.getScalable() / 2;
3955 }
3956 
3957 /// Returns the offset in parts to which this frame offset can be
3958 /// decomposed for the purpose of describing a frame offset.
3959 /// For non-scalable offsets this is simply its byte size.
3960 void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
3961     const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
3962     int64_t &NumDataVectors) {
3963   // The smallest scalable element supported by scaled SVE addressing
3964   // modes are predicates, which are 2 scalable bytes in size. So the scalable
3965   // byte offset must always be a multiple of 2.
3966   assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
3967 
3968   NumBytes = Offset.getFixed();
3969   NumDataVectors = 0;
3970   NumPredicateVectors = Offset.getScalable() / 2;
3971   // This method is used to get the offsets to adjust the frame offset.
3972   // If the function requires ADDPL to be used and needs more than two ADDPL
3973   // instructions, part of the offset is folded into NumDataVectors so that it
3974   // uses ADDVL for part of it, reducing the number of ADDPL instructions.
3975   if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
3976       NumPredicateVectors > 62) {
3977     NumDataVectors = NumPredicateVectors / 8;
3978     NumPredicateVectors -= NumDataVectors * 8;
3979   }
3980 }
3981 
3982 // Helper function to emit a frame offset adjustment from a given
3983 // pointer (SrcReg), stored into DestReg. This function is explicit
3984 // in that it requires the opcode.
3985 static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
3986                                MachineBasicBlock::iterator MBBI,
3987                                const DebugLoc &DL, unsigned DestReg,
3988                                unsigned SrcReg, int64_t Offset, unsigned Opc,
3989                                const TargetInstrInfo *TII,
3990                                MachineInstr::MIFlag Flag, bool NeedsWinCFI,
3991                                bool *HasWinCFI) {
3992   int Sign = 1;
3993   unsigned MaxEncoding, ShiftSize;
3994   switch (Opc) {
3995   case AArch64::ADDXri:
3996   case AArch64::ADDSXri:
3997   case AArch64::SUBXri:
3998   case AArch64::SUBSXri:
3999     MaxEncoding = 0xfff;
4000     ShiftSize = 12;
4001     break;
4002   case AArch64::ADDVL_XXI:
4003   case AArch64::ADDPL_XXI:
4004     MaxEncoding = 31;
4005     ShiftSize = 0;
4006     if (Offset < 0) {
4007       MaxEncoding = 32;
4008       Sign = -1;
4009       Offset = -Offset;
4010     }
4011     break;
4012   default:
4013     llvm_unreachable("Unsupported opcode");
4014   }
4015 
4016   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
4017   // scratch register.  If DestReg is a virtual register, use it as the
4018   // scratch register; otherwise, create a new virtual register (to be
4019   // replaced by the scavenger at the end of PEI).  That case can be optimized
4020   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
4021   // register can be loaded with offset%8 and the add/sub can use an extending
4022   // instruction with LSL#3.
4023   // Currently the function handles any offsets but generates a poor sequence
4024   // of code.
4025   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
4026 
4027   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
4028   Register TmpReg = DestReg;
4029   if (TmpReg == AArch64::XZR)
4030     TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
4031         &AArch64::GPR64RegClass);
4032   do {
4033     uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
4034     unsigned LocalShiftSize = 0;
4035     if (ThisVal > MaxEncoding) {
4036       ThisVal = ThisVal >> ShiftSize;
4037       LocalShiftSize = ShiftSize;
4038     }
4039     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
4040            "Encoding cannot handle value that big");
4041 
4042     Offset -= ThisVal << LocalShiftSize;
4043     if (Offset == 0)
4044       TmpReg = DestReg;
4045     auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
4046                    .addReg(SrcReg)
4047                    .addImm(Sign * (int)ThisVal);
4048     if (ShiftSize)
4049       MBI = MBI.addImm(
4050           AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
4051     MBI = MBI.setMIFlag(Flag);
4052 
4053     if (NeedsWinCFI) {
4054       assert(Sign == 1 && "SEH directives should always have a positive sign");
4055       int Imm = (int)(ThisVal << LocalShiftSize);
4056       if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
4057           (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
4058         if (HasWinCFI)
4059           *HasWinCFI = true;
4060         if (Imm == 0)
4061           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
4062         else
4063           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
4064               .addImm(Imm)
4065               .setMIFlag(Flag);
4066         assert(Offset == 0 && "Expected remaining offset to be zero to "
4067                               "emit a single SEH directive");
4068       } else if (DestReg == AArch64::SP) {
4069         if (HasWinCFI)
4070           *HasWinCFI = true;
4071         assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
4072         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
4073             .addImm(Imm)
4074             .setMIFlag(Flag);
4075       }
4076       if (HasWinCFI)
4077         *HasWinCFI = true;
4078     }
4079 
4080     SrcReg = TmpReg;
4081   } while (Offset);
4082 }
4083 
4084 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
4085                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
4086                            unsigned DestReg, unsigned SrcReg,
4087                            StackOffset Offset, const TargetInstrInfo *TII,
4088                            MachineInstr::MIFlag Flag, bool SetNZCV,
4089                            bool NeedsWinCFI, bool *HasWinCFI) {
4090   int64_t Bytes, NumPredicateVectors, NumDataVectors;
4091   AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
4092       Offset, Bytes, NumPredicateVectors, NumDataVectors);
4093 
4094   // First emit non-scalable frame offsets, or a simple 'mov'.
4095   if (Bytes || (!Offset && SrcReg != DestReg)) {
4096     assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
4097            "SP increment/decrement not 8-byte aligned");
4098     unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
4099     if (Bytes < 0) {
4100       Bytes = -Bytes;
4101       Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
4102     }
4103     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
4104                        NeedsWinCFI, HasWinCFI);
4105     SrcReg = DestReg;
4106   }
4107 
4108   assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
4109          "SetNZCV not supported with SVE vectors");
4110   assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
4111          "WinCFI not supported with SVE vectors");
4112 
4113   if (NumDataVectors) {
4114     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
4115                        AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
4116     SrcReg = DestReg;
4117   }
4118 
4119   if (NumPredicateVectors) {
4120     assert(DestReg != AArch64::SP && "Unaligned access to SP");
4121     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
4122                        AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
4123   }
4124 }
4125 
4126 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
4127     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
4128     MachineBasicBlock::iterator InsertPt, int FrameIndex,
4129     LiveIntervals *LIS, VirtRegMap *VRM) const {
4130   // This is a bit of a hack. Consider this instruction:
4131   //
4132   //   %0 = COPY %sp; GPR64all:%0
4133   //
4134   // We explicitly chose GPR64all for the virtual register so such a copy might
4135   // be eliminated by RegisterCoalescer. However, that may not be possible, and
4136   // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
4137   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
4138   //
4139   // To prevent that, we are going to constrain the %0 register class here.
4140   //
4141   // <rdar://problem/11522048>
4142   //
4143   if (MI.isFullCopy()) {
4144     Register DstReg = MI.getOperand(0).getReg();
4145     Register SrcReg = MI.getOperand(1).getReg();
4146     if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) {
4147       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
4148       return nullptr;
4149     }
4150     if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) {
4151       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4152       return nullptr;
4153     }
4154   }
4155 
4156   // Handle the case where a copy is being spilled or filled but the source
4157   // and destination register class don't match.  For example:
4158   //
4159   //   %0 = COPY %xzr; GPR64common:%0
4160   //
4161   // In this case we can still safely fold away the COPY and generate the
4162   // following spill code:
4163   //
4164   //   STRXui %xzr, %stack.0
4165   //
4166   // This also eliminates spilled cross register class COPYs (e.g. between x and
4167   // d regs) of the same size.  For example:
4168   //
4169   //   %0 = COPY %1; GPR64:%0, FPR64:%1
4170   //
4171   // will be filled as
4172   //
4173   //   LDRDui %0, fi<#0>
4174   //
4175   // instead of
4176   //
4177   //   LDRXui %Temp, fi<#0>
4178   //   %0 = FMOV %Temp
4179   //
4180   if (MI.isCopy() && Ops.size() == 1 &&
4181       // Make sure we're only folding the explicit COPY defs/uses.
4182       (Ops[0] == 0 || Ops[0] == 1)) {
4183     bool IsSpill = Ops[0] == 0;
4184     bool IsFill = !IsSpill;
4185     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
4186     const MachineRegisterInfo &MRI = MF.getRegInfo();
4187     MachineBasicBlock &MBB = *MI.getParent();
4188     const MachineOperand &DstMO = MI.getOperand(0);
4189     const MachineOperand &SrcMO = MI.getOperand(1);
4190     Register DstReg = DstMO.getReg();
4191     Register SrcReg = SrcMO.getReg();
4192     // This is slightly expensive to compute for physical regs since
4193     // getMinimalPhysRegClass is slow.
4194     auto getRegClass = [&](unsigned Reg) {
4195       return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
4196                                               : TRI.getMinimalPhysRegClass(Reg);
4197     };
4198 
4199     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
4200       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
4201                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
4202              "Mismatched register size in non subreg COPY");
4203       if (IsSpill)
4204         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
4205                             getRegClass(SrcReg), &TRI);
4206       else
4207         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
4208                              getRegClass(DstReg), &TRI);
4209       return &*--InsertPt;
4210     }
4211 
4212     // Handle cases like spilling def of:
4213     //
4214     //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
4215     //
4216     // where the physical register source can be widened and stored to the full
4217     // virtual reg destination stack slot, in this case producing:
4218     //
4219     //   STRXui %xzr, %stack.0
4220     //
4221     if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) {
4222       assert(SrcMO.getSubReg() == 0 &&
4223              "Unexpected subreg on physical register");
4224       const TargetRegisterClass *SpillRC;
4225       unsigned SpillSubreg;
4226       switch (DstMO.getSubReg()) {
4227       default:
4228         SpillRC = nullptr;
4229         break;
4230       case AArch64::sub_32:
4231       case AArch64::ssub:
4232         if (AArch64::GPR32RegClass.contains(SrcReg)) {
4233           SpillRC = &AArch64::GPR64RegClass;
4234           SpillSubreg = AArch64::sub_32;
4235         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
4236           SpillRC = &AArch64::FPR64RegClass;
4237           SpillSubreg = AArch64::ssub;
4238         } else
4239           SpillRC = nullptr;
4240         break;
4241       case AArch64::dsub:
4242         if (AArch64::FPR64RegClass.contains(SrcReg)) {
4243           SpillRC = &AArch64::FPR128RegClass;
4244           SpillSubreg = AArch64::dsub;
4245         } else
4246           SpillRC = nullptr;
4247         break;
4248       }
4249 
4250       if (SpillRC)
4251         if (unsigned WidenedSrcReg =
4252                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
4253           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
4254                               FrameIndex, SpillRC, &TRI);
4255           return &*--InsertPt;
4256         }
4257     }
4258 
4259     // Handle cases like filling use of:
4260     //
4261     //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
4262     //
4263     // where we can load the full virtual reg source stack slot, into the subreg
4264     // destination, in this case producing:
4265     //
4266     //   LDRWui %0:sub_32<def,read-undef>, %stack.0
4267     //
4268     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
4269       const TargetRegisterClass *FillRC;
4270       switch (DstMO.getSubReg()) {
4271       default:
4272         FillRC = nullptr;
4273         break;
4274       case AArch64::sub_32:
4275         FillRC = &AArch64::GPR32RegClass;
4276         break;
4277       case AArch64::ssub:
4278         FillRC = &AArch64::FPR32RegClass;
4279         break;
4280       case AArch64::dsub:
4281         FillRC = &AArch64::FPR64RegClass;
4282         break;
4283       }
4284 
4285       if (FillRC) {
4286         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
4287                    TRI.getRegSizeInBits(*FillRC) &&
4288                "Mismatched regclass size on folded subreg COPY");
4289         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
4290         MachineInstr &LoadMI = *--InsertPt;
4291         MachineOperand &LoadDst = LoadMI.getOperand(0);
4292         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
4293         LoadDst.setSubReg(DstMO.getSubReg());
4294         LoadDst.setIsUndef();
4295         return &LoadMI;
4296       }
4297     }
4298   }
4299 
4300   // Cannot fold.
4301   return nullptr;
4302 }
4303 
4304 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
4305                                     StackOffset &SOffset,
4306                                     bool *OutUseUnscaledOp,
4307                                     unsigned *OutUnscaledOp,
4308                                     int64_t *EmittableOffset) {
4309   // Set output values in case of early exit.
4310   if (EmittableOffset)
4311     *EmittableOffset = 0;
4312   if (OutUseUnscaledOp)
4313     *OutUseUnscaledOp = false;
4314   if (OutUnscaledOp)
4315     *OutUnscaledOp = 0;
4316 
4317   // Exit early for structured vector spills/fills as they can't take an
4318   // immediate offset.
4319   switch (MI.getOpcode()) {
4320   default:
4321     break;
4322   case AArch64::LD1Twov2d:
4323   case AArch64::LD1Threev2d:
4324   case AArch64::LD1Fourv2d:
4325   case AArch64::LD1Twov1d:
4326   case AArch64::LD1Threev1d:
4327   case AArch64::LD1Fourv1d:
4328   case AArch64::ST1Twov2d:
4329   case AArch64::ST1Threev2d:
4330   case AArch64::ST1Fourv2d:
4331   case AArch64::ST1Twov1d:
4332   case AArch64::ST1Threev1d:
4333   case AArch64::ST1Fourv1d:
4334   case AArch64::ST1i8:
4335   case AArch64::ST1i16:
4336   case AArch64::ST1i32:
4337   case AArch64::ST1i64:
4338   case AArch64::IRG:
4339   case AArch64::IRGstack:
4340   case AArch64::STGloop:
4341   case AArch64::STZGloop:
4342     return AArch64FrameOffsetCannotUpdate;
4343   }
4344 
4345   // Get the min/max offset and the scale.
4346   TypeSize ScaleValue(0U, false);
4347   unsigned Width;
4348   int64_t MinOff, MaxOff;
4349   if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
4350                                       MaxOff))
4351     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
4352 
4353   // Construct the complete offset.
4354   bool IsMulVL = ScaleValue.isScalable();
4355   unsigned Scale = ScaleValue.getKnownMinSize();
4356   int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
4357 
4358   const MachineOperand &ImmOpnd =
4359       MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
4360   Offset += ImmOpnd.getImm() * Scale;
4361 
4362   // If the offset doesn't match the scale, we rewrite the instruction to
4363   // use the unscaled instruction instead. Likewise, if we have a negative
4364   // offset and there is an unscaled op to use.
4365   Optional<unsigned> UnscaledOp =
4366       AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
4367   bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
4368   if (useUnscaledOp &&
4369       !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
4370                                       MaxOff))
4371     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
4372 
4373   Scale = ScaleValue.getKnownMinSize();
4374   assert(IsMulVL == ScaleValue.isScalable() &&
4375          "Unscaled opcode has different value for scalable");
4376 
4377   int64_t Remainder = Offset % Scale;
4378   assert(!(Remainder && useUnscaledOp) &&
4379          "Cannot have remainder when using unscaled op");
4380 
4381   assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
4382   int64_t NewOffset = Offset / Scale;
4383   if (MinOff <= NewOffset && NewOffset <= MaxOff)
4384     Offset = Remainder;
4385   else {
4386     NewOffset = NewOffset < 0 ? MinOff : MaxOff;
4387     Offset = Offset - NewOffset * Scale + Remainder;
4388   }
4389 
4390   if (EmittableOffset)
4391     *EmittableOffset = NewOffset;
4392   if (OutUseUnscaledOp)
4393     *OutUseUnscaledOp = useUnscaledOp;
4394   if (OutUnscaledOp && UnscaledOp)
4395     *OutUnscaledOp = *UnscaledOp;
4396 
4397   if (IsMulVL)
4398     SOffset = StackOffset::get(SOffset.getFixed(), Offset);
4399   else
4400     SOffset = StackOffset::get(Offset, SOffset.getScalable());
4401   return AArch64FrameOffsetCanUpdate |
4402          (SOffset ? 0 : AArch64FrameOffsetIsLegal);
4403 }
4404 
4405 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
4406                                     unsigned FrameReg, StackOffset &Offset,
4407                                     const AArch64InstrInfo *TII) {
4408   unsigned Opcode = MI.getOpcode();
4409   unsigned ImmIdx = FrameRegIdx + 1;
4410 
4411   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
4412     Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
4413     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
4414                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
4415                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
4416     MI.eraseFromParent();
4417     Offset = StackOffset();
4418     return true;
4419   }
4420 
4421   int64_t NewOffset;
4422   unsigned UnscaledOp;
4423   bool UseUnscaledOp;
4424   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
4425                                          &UnscaledOp, &NewOffset);
4426   if (Status & AArch64FrameOffsetCanUpdate) {
4427     if (Status & AArch64FrameOffsetIsLegal)
4428       // Replace the FrameIndex with FrameReg.
4429       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
4430     if (UseUnscaledOp)
4431       MI.setDesc(TII->get(UnscaledOp));
4432 
4433     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
4434     return !Offset;
4435   }
4436 
4437   return false;
4438 }
4439 
4440 MCInst AArch64InstrInfo::getNop() const {
4441   return MCInstBuilder(AArch64::HINT).addImm(0);
4442 }
4443 
4444 // AArch64 supports MachineCombiner.
4445 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
4446 
4447 // True when Opc sets flag
4448 static bool isCombineInstrSettingFlag(unsigned Opc) {
4449   switch (Opc) {
4450   case AArch64::ADDSWrr:
4451   case AArch64::ADDSWri:
4452   case AArch64::ADDSXrr:
4453   case AArch64::ADDSXri:
4454   case AArch64::SUBSWrr:
4455   case AArch64::SUBSXrr:
4456   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
4457   case AArch64::SUBSWri:
4458   case AArch64::SUBSXri:
4459     return true;
4460   default:
4461     break;
4462   }
4463   return false;
4464 }
4465 
4466 // 32b Opcodes that can be combined with a MUL
4467 static bool isCombineInstrCandidate32(unsigned Opc) {
4468   switch (Opc) {
4469   case AArch64::ADDWrr:
4470   case AArch64::ADDWri:
4471   case AArch64::SUBWrr:
4472   case AArch64::ADDSWrr:
4473   case AArch64::ADDSWri:
4474   case AArch64::SUBSWrr:
4475   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
4476   case AArch64::SUBWri:
4477   case AArch64::SUBSWri:
4478     return true;
4479   default:
4480     break;
4481   }
4482   return false;
4483 }
4484 
4485 // 64b Opcodes that can be combined with a MUL
4486 static bool isCombineInstrCandidate64(unsigned Opc) {
4487   switch (Opc) {
4488   case AArch64::ADDXrr:
4489   case AArch64::ADDXri:
4490   case AArch64::SUBXrr:
4491   case AArch64::ADDSXrr:
4492   case AArch64::ADDSXri:
4493   case AArch64::SUBSXrr:
4494   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
4495   case AArch64::SUBXri:
4496   case AArch64::SUBSXri:
4497   case AArch64::ADDv8i8:
4498   case AArch64::ADDv16i8:
4499   case AArch64::ADDv4i16:
4500   case AArch64::ADDv8i16:
4501   case AArch64::ADDv2i32:
4502   case AArch64::ADDv4i32:
4503   case AArch64::SUBv8i8:
4504   case AArch64::SUBv16i8:
4505   case AArch64::SUBv4i16:
4506   case AArch64::SUBv8i16:
4507   case AArch64::SUBv2i32:
4508   case AArch64::SUBv4i32:
4509     return true;
4510   default:
4511     break;
4512   }
4513   return false;
4514 }
4515 
4516 // FP Opcodes that can be combined with a FMUL.
4517 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
4518   switch (Inst.getOpcode()) {
4519   default:
4520     break;
4521   case AArch64::FADDHrr:
4522   case AArch64::FADDSrr:
4523   case AArch64::FADDDrr:
4524   case AArch64::FADDv4f16:
4525   case AArch64::FADDv8f16:
4526   case AArch64::FADDv2f32:
4527   case AArch64::FADDv2f64:
4528   case AArch64::FADDv4f32:
4529   case AArch64::FSUBHrr:
4530   case AArch64::FSUBSrr:
4531   case AArch64::FSUBDrr:
4532   case AArch64::FSUBv4f16:
4533   case AArch64::FSUBv8f16:
4534   case AArch64::FSUBv2f32:
4535   case AArch64::FSUBv2f64:
4536   case AArch64::FSUBv4f32:
4537     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
4538     // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
4539     // the target options or if FADD/FSUB has the contract fast-math flag.
4540     return Options.UnsafeFPMath ||
4541            Options.AllowFPOpFusion == FPOpFusion::Fast ||
4542            Inst.getFlag(MachineInstr::FmContract);
4543     return true;
4544   }
4545   return false;
4546 }
4547 
4548 // Opcodes that can be combined with a MUL
4549 static bool isCombineInstrCandidate(unsigned Opc) {
4550   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
4551 }
4552 
4553 //
4554 // Utility routine that checks if \param MO is defined by an
4555 // \param CombineOpc instruction in the basic block \param MBB
4556 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
4557                        unsigned CombineOpc, unsigned ZeroReg = 0,
4558                        bool CheckZeroReg = false) {
4559   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4560   MachineInstr *MI = nullptr;
4561 
4562   if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
4563     MI = MRI.getUniqueVRegDef(MO.getReg());
4564   // And it needs to be in the trace (otherwise, it won't have a depth).
4565   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
4566     return false;
4567   // Must only used by the user we combine with.
4568   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
4569     return false;
4570 
4571   if (CheckZeroReg) {
4572     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
4573            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
4574            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
4575     // The third input reg must be zero.
4576     if (MI->getOperand(3).getReg() != ZeroReg)
4577       return false;
4578   }
4579 
4580   return true;
4581 }
4582 
4583 //
4584 // Is \param MO defined by an integer multiply and can be combined?
4585 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
4586                               unsigned MulOpc, unsigned ZeroReg) {
4587   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
4588 }
4589 
4590 //
4591 // Is \param MO defined by a floating-point multiply and can be combined?
4592 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
4593                                unsigned MulOpc) {
4594   return canCombine(MBB, MO, MulOpc);
4595 }
4596 
4597 // TODO: There are many more machine instruction opcodes to match:
4598 //       1. Other data types (integer, vectors)
4599 //       2. Other math / logic operations (xor, or)
4600 //       3. Other forms of the same operation (intrinsics and other variants)
4601 bool AArch64InstrInfo::isAssociativeAndCommutative(
4602     const MachineInstr &Inst) const {
4603   switch (Inst.getOpcode()) {
4604   case AArch64::FADDDrr:
4605   case AArch64::FADDSrr:
4606   case AArch64::FADDv2f32:
4607   case AArch64::FADDv2f64:
4608   case AArch64::FADDv4f32:
4609   case AArch64::FMULDrr:
4610   case AArch64::FMULSrr:
4611   case AArch64::FMULX32:
4612   case AArch64::FMULX64:
4613   case AArch64::FMULXv2f32:
4614   case AArch64::FMULXv2f64:
4615   case AArch64::FMULXv4f32:
4616   case AArch64::FMULv2f32:
4617   case AArch64::FMULv2f64:
4618   case AArch64::FMULv4f32:
4619     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
4620   default:
4621     return false;
4622   }
4623 }
4624 
4625 /// Find instructions that can be turned into madd.
4626 static bool getMaddPatterns(MachineInstr &Root,
4627                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
4628   unsigned Opc = Root.getOpcode();
4629   MachineBasicBlock &MBB = *Root.getParent();
4630   bool Found = false;
4631 
4632   if (!isCombineInstrCandidate(Opc))
4633     return false;
4634   if (isCombineInstrSettingFlag(Opc)) {
4635     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
4636     // When NZCV is live bail out.
4637     if (Cmp_NZCV == -1)
4638       return false;
4639     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
4640     // When opcode can't change bail out.
4641     // CHECKME: do we miss any cases for opcode conversion?
4642     if (NewOpc == Opc)
4643       return false;
4644     Opc = NewOpc;
4645   }
4646 
4647   auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
4648                       MachineCombinerPattern Pattern) {
4649     if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
4650       Patterns.push_back(Pattern);
4651       Found = true;
4652     }
4653   };
4654 
4655   auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
4656     if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
4657       Patterns.push_back(Pattern);
4658       Found = true;
4659     }
4660   };
4661 
4662   typedef MachineCombinerPattern MCP;
4663 
4664   switch (Opc) {
4665   default:
4666     break;
4667   case AArch64::ADDWrr:
4668     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
4669            "ADDWrr does not have register operands");
4670     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
4671     setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
4672     break;
4673   case AArch64::ADDXrr:
4674     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
4675     setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
4676     break;
4677   case AArch64::SUBWrr:
4678     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
4679     setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
4680     break;
4681   case AArch64::SUBXrr:
4682     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
4683     setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
4684     break;
4685   case AArch64::ADDWri:
4686     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
4687     break;
4688   case AArch64::ADDXri:
4689     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
4690     break;
4691   case AArch64::SUBWri:
4692     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
4693     break;
4694   case AArch64::SUBXri:
4695     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
4696     break;
4697   case AArch64::ADDv8i8:
4698     setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
4699     setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
4700     break;
4701   case AArch64::ADDv16i8:
4702     setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
4703     setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
4704     break;
4705   case AArch64::ADDv4i16:
4706     setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
4707     setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
4708     setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
4709     setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
4710     break;
4711   case AArch64::ADDv8i16:
4712     setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
4713     setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
4714     setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
4715     setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
4716     break;
4717   case AArch64::ADDv2i32:
4718     setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
4719     setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
4720     setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
4721     setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
4722     break;
4723   case AArch64::ADDv4i32:
4724     setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
4725     setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
4726     setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
4727     setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
4728     break;
4729   case AArch64::SUBv8i8:
4730     setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
4731     setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
4732     break;
4733   case AArch64::SUBv16i8:
4734     setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
4735     setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
4736     break;
4737   case AArch64::SUBv4i16:
4738     setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
4739     setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
4740     setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
4741     setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
4742     break;
4743   case AArch64::SUBv8i16:
4744     setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
4745     setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
4746     setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
4747     setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
4748     break;
4749   case AArch64::SUBv2i32:
4750     setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
4751     setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
4752     setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
4753     setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
4754     break;
4755   case AArch64::SUBv4i32:
4756     setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
4757     setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
4758     setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
4759     setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
4760     break;
4761   }
4762   return Found;
4763 }
4764 /// Floating-Point Support
4765 
4766 /// Find instructions that can be turned into madd.
4767 static bool getFMAPatterns(MachineInstr &Root,
4768                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
4769 
4770   if (!isCombineInstrCandidateFP(Root))
4771     return false;
4772 
4773   MachineBasicBlock &MBB = *Root.getParent();
4774   bool Found = false;
4775 
4776   auto Match = [&](int Opcode, int Operand,
4777                    MachineCombinerPattern Pattern) -> bool {
4778     if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
4779       Patterns.push_back(Pattern);
4780       return true;
4781     }
4782     return false;
4783   };
4784 
4785   typedef MachineCombinerPattern MCP;
4786 
4787   switch (Root.getOpcode()) {
4788   default:
4789     assert(false && "Unsupported FP instruction in combiner\n");
4790     break;
4791   case AArch64::FADDHrr:
4792     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
4793            "FADDHrr does not have register operands");
4794 
4795     Found  = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
4796     Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
4797     break;
4798   case AArch64::FADDSrr:
4799     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
4800            "FADDSrr does not have register operands");
4801 
4802     Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
4803              Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
4804 
4805     Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
4806              Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
4807     break;
4808   case AArch64::FADDDrr:
4809     Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
4810              Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
4811 
4812     Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
4813              Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
4814     break;
4815   case AArch64::FADDv4f16:
4816     Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
4817              Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
4818 
4819     Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
4820              Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
4821     break;
4822   case AArch64::FADDv8f16:
4823     Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
4824              Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
4825 
4826     Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
4827              Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
4828     break;
4829   case AArch64::FADDv2f32:
4830     Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
4831              Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
4832 
4833     Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
4834              Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
4835     break;
4836   case AArch64::FADDv2f64:
4837     Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
4838              Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
4839 
4840     Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
4841              Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
4842     break;
4843   case AArch64::FADDv4f32:
4844     Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
4845              Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
4846 
4847     Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
4848              Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
4849     break;
4850   case AArch64::FSUBHrr:
4851     Found  = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
4852     Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
4853     Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
4854     break;
4855   case AArch64::FSUBSrr:
4856     Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
4857 
4858     Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
4859              Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
4860 
4861     Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
4862     break;
4863   case AArch64::FSUBDrr:
4864     Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
4865 
4866     Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
4867              Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
4868 
4869     Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
4870     break;
4871   case AArch64::FSUBv4f16:
4872     Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
4873              Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
4874 
4875     Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
4876              Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
4877     break;
4878   case AArch64::FSUBv8f16:
4879     Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
4880              Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
4881 
4882     Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
4883              Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
4884     break;
4885   case AArch64::FSUBv2f32:
4886     Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
4887              Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
4888 
4889     Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
4890              Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
4891     break;
4892   case AArch64::FSUBv2f64:
4893     Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
4894              Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
4895 
4896     Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
4897              Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
4898     break;
4899   case AArch64::FSUBv4f32:
4900     Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
4901              Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
4902 
4903     Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
4904              Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
4905     break;
4906   }
4907   return Found;
4908 }
4909 
4910 static bool getFMULPatterns(MachineInstr &Root,
4911                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
4912   MachineBasicBlock &MBB = *Root.getParent();
4913   bool Found = false;
4914 
4915   auto Match = [&](unsigned Opcode, int Operand,
4916                    MachineCombinerPattern Pattern) -> bool {
4917     MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4918     MachineOperand &MO = Root.getOperand(Operand);
4919     MachineInstr *MI = nullptr;
4920     if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
4921       MI = MRI.getUniqueVRegDef(MO.getReg());
4922     if (MI && MI->getOpcode() == Opcode) {
4923       Patterns.push_back(Pattern);
4924       return true;
4925     }
4926     return false;
4927   };
4928 
4929   typedef MachineCombinerPattern MCP;
4930 
4931   switch (Root.getOpcode()) {
4932   default:
4933     return false;
4934   case AArch64::FMULv2f32:
4935     Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
4936     Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
4937     break;
4938   case AArch64::FMULv2f64:
4939     Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
4940     Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
4941     break;
4942   case AArch64::FMULv4f16:
4943     Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
4944     Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
4945     break;
4946   case AArch64::FMULv4f32:
4947     Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
4948     Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
4949     break;
4950   case AArch64::FMULv8f16:
4951     Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
4952     Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
4953     break;
4954   }
4955 
4956   return Found;
4957 }
4958 
4959 /// Return true when a code sequence can improve throughput. It
4960 /// should be called only for instructions in loops.
4961 /// \param Pattern - combiner pattern
4962 bool AArch64InstrInfo::isThroughputPattern(
4963     MachineCombinerPattern Pattern) const {
4964   switch (Pattern) {
4965   default:
4966     break;
4967   case MachineCombinerPattern::FMULADDH_OP1:
4968   case MachineCombinerPattern::FMULADDH_OP2:
4969   case MachineCombinerPattern::FMULSUBH_OP1:
4970   case MachineCombinerPattern::FMULSUBH_OP2:
4971   case MachineCombinerPattern::FMULADDS_OP1:
4972   case MachineCombinerPattern::FMULADDS_OP2:
4973   case MachineCombinerPattern::FMULSUBS_OP1:
4974   case MachineCombinerPattern::FMULSUBS_OP2:
4975   case MachineCombinerPattern::FMULADDD_OP1:
4976   case MachineCombinerPattern::FMULADDD_OP2:
4977   case MachineCombinerPattern::FMULSUBD_OP1:
4978   case MachineCombinerPattern::FMULSUBD_OP2:
4979   case MachineCombinerPattern::FNMULSUBH_OP1:
4980   case MachineCombinerPattern::FNMULSUBS_OP1:
4981   case MachineCombinerPattern::FNMULSUBD_OP1:
4982   case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
4983   case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
4984   case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
4985   case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
4986   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4987   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4988   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4989   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4990   case MachineCombinerPattern::FMLAv4f16_OP2:
4991   case MachineCombinerPattern::FMLAv4f16_OP1:
4992   case MachineCombinerPattern::FMLAv8f16_OP1:
4993   case MachineCombinerPattern::FMLAv8f16_OP2:
4994   case MachineCombinerPattern::FMLAv2f32_OP2:
4995   case MachineCombinerPattern::FMLAv2f32_OP1:
4996   case MachineCombinerPattern::FMLAv2f64_OP1:
4997   case MachineCombinerPattern::FMLAv2f64_OP2:
4998   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4999   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
5000   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
5001   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
5002   case MachineCombinerPattern::FMLAv4f32_OP1:
5003   case MachineCombinerPattern::FMLAv4f32_OP2:
5004   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
5005   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
5006   case MachineCombinerPattern::FMLSv4i16_indexed_OP1:
5007   case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
5008   case MachineCombinerPattern::FMLSv8i16_indexed_OP1:
5009   case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
5010   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
5011   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
5012   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
5013   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
5014   case MachineCombinerPattern::FMLSv4f16_OP1:
5015   case MachineCombinerPattern::FMLSv4f16_OP2:
5016   case MachineCombinerPattern::FMLSv8f16_OP1:
5017   case MachineCombinerPattern::FMLSv8f16_OP2:
5018   case MachineCombinerPattern::FMLSv2f32_OP2:
5019   case MachineCombinerPattern::FMLSv2f64_OP2:
5020   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
5021   case MachineCombinerPattern::FMLSv4f32_OP2:
5022   case MachineCombinerPattern::FMULv2i32_indexed_OP1:
5023   case MachineCombinerPattern::FMULv2i32_indexed_OP2:
5024   case MachineCombinerPattern::FMULv2i64_indexed_OP1:
5025   case MachineCombinerPattern::FMULv2i64_indexed_OP2:
5026   case MachineCombinerPattern::FMULv4i16_indexed_OP1:
5027   case MachineCombinerPattern::FMULv4i16_indexed_OP2:
5028   case MachineCombinerPattern::FMULv4i32_indexed_OP1:
5029   case MachineCombinerPattern::FMULv4i32_indexed_OP2:
5030   case MachineCombinerPattern::FMULv8i16_indexed_OP1:
5031   case MachineCombinerPattern::FMULv8i16_indexed_OP2:
5032   case MachineCombinerPattern::MULADDv8i8_OP1:
5033   case MachineCombinerPattern::MULADDv8i8_OP2:
5034   case MachineCombinerPattern::MULADDv16i8_OP1:
5035   case MachineCombinerPattern::MULADDv16i8_OP2:
5036   case MachineCombinerPattern::MULADDv4i16_OP1:
5037   case MachineCombinerPattern::MULADDv4i16_OP2:
5038   case MachineCombinerPattern::MULADDv8i16_OP1:
5039   case MachineCombinerPattern::MULADDv8i16_OP2:
5040   case MachineCombinerPattern::MULADDv2i32_OP1:
5041   case MachineCombinerPattern::MULADDv2i32_OP2:
5042   case MachineCombinerPattern::MULADDv4i32_OP1:
5043   case MachineCombinerPattern::MULADDv4i32_OP2:
5044   case MachineCombinerPattern::MULSUBv8i8_OP1:
5045   case MachineCombinerPattern::MULSUBv8i8_OP2:
5046   case MachineCombinerPattern::MULSUBv16i8_OP1:
5047   case MachineCombinerPattern::MULSUBv16i8_OP2:
5048   case MachineCombinerPattern::MULSUBv4i16_OP1:
5049   case MachineCombinerPattern::MULSUBv4i16_OP2:
5050   case MachineCombinerPattern::MULSUBv8i16_OP1:
5051   case MachineCombinerPattern::MULSUBv8i16_OP2:
5052   case MachineCombinerPattern::MULSUBv2i32_OP1:
5053   case MachineCombinerPattern::MULSUBv2i32_OP2:
5054   case MachineCombinerPattern::MULSUBv4i32_OP1:
5055   case MachineCombinerPattern::MULSUBv4i32_OP2:
5056   case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
5057   case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
5058   case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
5059   case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
5060   case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
5061   case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
5062   case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
5063   case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
5064   case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
5065   case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
5066   case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
5067   case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
5068   case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
5069   case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
5070   case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
5071   case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
5072     return true;
5073   } // end switch (Pattern)
5074   return false;
5075 }
5076 /// Return true when there is potentially a faster code sequence for an
5077 /// instruction chain ending in \p Root. All potential patterns are listed in
5078 /// the \p Pattern vector. Pattern should be sorted in priority order since the
5079 /// pattern evaluator stops checking as soon as it finds a faster sequence.
5080 
5081 bool AArch64InstrInfo::getMachineCombinerPatterns(
5082     MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
5083     bool DoRegPressureReduce) const {
5084   // Integer patterns
5085   if (getMaddPatterns(Root, Patterns))
5086     return true;
5087   // Floating point patterns
5088   if (getFMULPatterns(Root, Patterns))
5089     return true;
5090   if (getFMAPatterns(Root, Patterns))
5091     return true;
5092 
5093   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
5094                                                      DoRegPressureReduce);
5095 }
5096 
5097 enum class FMAInstKind { Default, Indexed, Accumulator };
5098 /// genFusedMultiply - Generate fused multiply instructions.
5099 /// This function supports both integer and floating point instructions.
5100 /// A typical example:
5101 ///  F|MUL I=A,B,0
5102 ///  F|ADD R,I,C
5103 ///  ==> F|MADD R,A,B,C
5104 /// \param MF Containing MachineFunction
5105 /// \param MRI Register information
5106 /// \param TII Target information
5107 /// \param Root is the F|ADD instruction
5108 /// \param [out] InsInstrs is a vector of machine instructions and will
5109 /// contain the generated madd instruction
5110 /// \param IdxMulOpd is index of operand in Root that is the result of
5111 /// the F|MUL. In the example above IdxMulOpd is 1.
5112 /// \param MaddOpc the opcode fo the f|madd instruction
5113 /// \param RC Register class of operands
5114 /// \param kind of fma instruction (addressing mode) to be generated
5115 /// \param ReplacedAddend is the result register from the instruction
5116 /// replacing the non-combined operand, if any.
5117 static MachineInstr *
5118 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
5119                  const TargetInstrInfo *TII, MachineInstr &Root,
5120                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
5121                  unsigned MaddOpc, const TargetRegisterClass *RC,
5122                  FMAInstKind kind = FMAInstKind::Default,
5123                  const Register *ReplacedAddend = nullptr) {
5124   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
5125 
5126   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
5127   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
5128   Register ResultReg = Root.getOperand(0).getReg();
5129   Register SrcReg0 = MUL->getOperand(1).getReg();
5130   bool Src0IsKill = MUL->getOperand(1).isKill();
5131   Register SrcReg1 = MUL->getOperand(2).getReg();
5132   bool Src1IsKill = MUL->getOperand(2).isKill();
5133 
5134   unsigned SrcReg2;
5135   bool Src2IsKill;
5136   if (ReplacedAddend) {
5137     // If we just generated a new addend, we must be it's only use.
5138     SrcReg2 = *ReplacedAddend;
5139     Src2IsKill = true;
5140   } else {
5141     SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
5142     Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
5143   }
5144 
5145   if (Register::isVirtualRegister(ResultReg))
5146     MRI.constrainRegClass(ResultReg, RC);
5147   if (Register::isVirtualRegister(SrcReg0))
5148     MRI.constrainRegClass(SrcReg0, RC);
5149   if (Register::isVirtualRegister(SrcReg1))
5150     MRI.constrainRegClass(SrcReg1, RC);
5151   if (Register::isVirtualRegister(SrcReg2))
5152     MRI.constrainRegClass(SrcReg2, RC);
5153 
5154   MachineInstrBuilder MIB;
5155   if (kind == FMAInstKind::Default)
5156     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
5157               .addReg(SrcReg0, getKillRegState(Src0IsKill))
5158               .addReg(SrcReg1, getKillRegState(Src1IsKill))
5159               .addReg(SrcReg2, getKillRegState(Src2IsKill));
5160   else if (kind == FMAInstKind::Indexed)
5161     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
5162               .addReg(SrcReg2, getKillRegState(Src2IsKill))
5163               .addReg(SrcReg0, getKillRegState(Src0IsKill))
5164               .addReg(SrcReg1, getKillRegState(Src1IsKill))
5165               .addImm(MUL->getOperand(3).getImm());
5166   else if (kind == FMAInstKind::Accumulator)
5167     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
5168               .addReg(SrcReg2, getKillRegState(Src2IsKill))
5169               .addReg(SrcReg0, getKillRegState(Src0IsKill))
5170               .addReg(SrcReg1, getKillRegState(Src1IsKill));
5171   else
5172     assert(false && "Invalid FMA instruction kind \n");
5173   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
5174   InsInstrs.push_back(MIB);
5175   return MUL;
5176 }
5177 
5178 /// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
5179 static MachineInstr *
5180 genIndexedMultiply(MachineInstr &Root,
5181                    SmallVectorImpl<MachineInstr *> &InsInstrs,
5182                    unsigned IdxDupOp, unsigned MulOpc,
5183                    const TargetRegisterClass *RC, MachineRegisterInfo &MRI) {
5184   assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
5185          "Invalid index of FMUL operand");
5186 
5187   MachineFunction &MF = *Root.getMF();
5188   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
5189 
5190   MachineInstr *Dup =
5191       MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
5192 
5193   Register DupSrcReg = Dup->getOperand(1).getReg();
5194   MRI.clearKillFlags(DupSrcReg);
5195   MRI.constrainRegClass(DupSrcReg, RC);
5196 
5197   unsigned DupSrcLane = Dup->getOperand(2).getImm();
5198 
5199   unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
5200   MachineOperand &MulOp = Root.getOperand(IdxMulOp);
5201 
5202   Register ResultReg = Root.getOperand(0).getReg();
5203 
5204   MachineInstrBuilder MIB;
5205   MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MulOpc), ResultReg)
5206             .add(MulOp)
5207             .addReg(DupSrcReg)
5208             .addImm(DupSrcLane);
5209 
5210   InsInstrs.push_back(MIB);
5211   return &Root;
5212 }
5213 
5214 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
5215 /// instructions.
5216 ///
5217 /// \see genFusedMultiply
5218 static MachineInstr *genFusedMultiplyAcc(
5219     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
5220     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
5221     unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
5222   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
5223                           FMAInstKind::Accumulator);
5224 }
5225 
5226 /// genNeg - Helper to generate an intermediate negation of the second operand
5227 /// of Root
5228 static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
5229                        const TargetInstrInfo *TII, MachineInstr &Root,
5230                        SmallVectorImpl<MachineInstr *> &InsInstrs,
5231                        DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
5232                        unsigned MnegOpc, const TargetRegisterClass *RC) {
5233   Register NewVR = MRI.createVirtualRegister(RC);
5234   MachineInstrBuilder MIB =
5235       BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR)
5236           .add(Root.getOperand(2));
5237   InsInstrs.push_back(MIB);
5238 
5239   assert(InstrIdxForVirtReg.empty());
5240   InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5241 
5242   return NewVR;
5243 }
5244 
5245 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
5246 /// instructions with an additional negation of the accumulator
5247 static MachineInstr *genFusedMultiplyAccNeg(
5248     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
5249     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
5250     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
5251     unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
5252   assert(IdxMulOpd == 1);
5253 
5254   Register NewVR =
5255       genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
5256   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
5257                           FMAInstKind::Accumulator, &NewVR);
5258 }
5259 
5260 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
5261 /// instructions.
5262 ///
5263 /// \see genFusedMultiply
5264 static MachineInstr *genFusedMultiplyIdx(
5265     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
5266     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
5267     unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
5268   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
5269                           FMAInstKind::Indexed);
5270 }
5271 
5272 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
5273 /// instructions with an additional negation of the accumulator
5274 static MachineInstr *genFusedMultiplyIdxNeg(
5275     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
5276     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
5277     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
5278     unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
5279   assert(IdxMulOpd == 1);
5280 
5281   Register NewVR =
5282       genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
5283 
5284   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
5285                           FMAInstKind::Indexed, &NewVR);
5286 }
5287 
5288 /// genMaddR - Generate madd instruction and combine mul and add using
5289 /// an extra virtual register
5290 /// Example - an ADD intermediate needs to be stored in a register:
5291 ///   MUL I=A,B,0
5292 ///   ADD R,I,Imm
5293 ///   ==> ORR  V, ZR, Imm
5294 ///   ==> MADD R,A,B,V
5295 /// \param MF Containing MachineFunction
5296 /// \param MRI Register information
5297 /// \param TII Target information
5298 /// \param Root is the ADD instruction
5299 /// \param [out] InsInstrs is a vector of machine instructions and will
5300 /// contain the generated madd instruction
5301 /// \param IdxMulOpd is index of operand in Root that is the result of
5302 /// the MUL. In the example above IdxMulOpd is 1.
5303 /// \param MaddOpc the opcode fo the madd instruction
5304 /// \param VR is a virtual register that holds the value of an ADD operand
5305 /// (V in the example above).
5306 /// \param RC Register class of operands
5307 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
5308                               const TargetInstrInfo *TII, MachineInstr &Root,
5309                               SmallVectorImpl<MachineInstr *> &InsInstrs,
5310                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
5311                               const TargetRegisterClass *RC) {
5312   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
5313 
5314   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
5315   Register ResultReg = Root.getOperand(0).getReg();
5316   Register SrcReg0 = MUL->getOperand(1).getReg();
5317   bool Src0IsKill = MUL->getOperand(1).isKill();
5318   Register SrcReg1 = MUL->getOperand(2).getReg();
5319   bool Src1IsKill = MUL->getOperand(2).isKill();
5320 
5321   if (Register::isVirtualRegister(ResultReg))
5322     MRI.constrainRegClass(ResultReg, RC);
5323   if (Register::isVirtualRegister(SrcReg0))
5324     MRI.constrainRegClass(SrcReg0, RC);
5325   if (Register::isVirtualRegister(SrcReg1))
5326     MRI.constrainRegClass(SrcReg1, RC);
5327   if (Register::isVirtualRegister(VR))
5328     MRI.constrainRegClass(VR, RC);
5329 
5330   MachineInstrBuilder MIB =
5331       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
5332           .addReg(SrcReg0, getKillRegState(Src0IsKill))
5333           .addReg(SrcReg1, getKillRegState(Src1IsKill))
5334           .addReg(VR);
5335   // Insert the MADD
5336   InsInstrs.push_back(MIB);
5337   return MUL;
5338 }
5339 
5340 /// When getMachineCombinerPatterns() finds potential patterns,
5341 /// this function generates the instructions that could replace the
5342 /// original code sequence
5343 void AArch64InstrInfo::genAlternativeCodeSequence(
5344     MachineInstr &Root, MachineCombinerPattern Pattern,
5345     SmallVectorImpl<MachineInstr *> &InsInstrs,
5346     SmallVectorImpl<MachineInstr *> &DelInstrs,
5347     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
5348   MachineBasicBlock &MBB = *Root.getParent();
5349   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
5350   MachineFunction &MF = *MBB.getParent();
5351   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
5352 
5353   MachineInstr *MUL = nullptr;
5354   const TargetRegisterClass *RC;
5355   unsigned Opc;
5356   switch (Pattern) {
5357   default:
5358     // Reassociate instructions.
5359     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
5360                                                 DelInstrs, InstrIdxForVirtReg);
5361     return;
5362   case MachineCombinerPattern::MULADDW_OP1:
5363   case MachineCombinerPattern::MULADDX_OP1:
5364     // MUL I=A,B,0
5365     // ADD R,I,C
5366     // ==> MADD R,A,B,C
5367     // --- Create(MADD);
5368     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
5369       Opc = AArch64::MADDWrrr;
5370       RC = &AArch64::GPR32RegClass;
5371     } else {
5372       Opc = AArch64::MADDXrrr;
5373       RC = &AArch64::GPR64RegClass;
5374     }
5375     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5376     break;
5377   case MachineCombinerPattern::MULADDW_OP2:
5378   case MachineCombinerPattern::MULADDX_OP2:
5379     // MUL I=A,B,0
5380     // ADD R,C,I
5381     // ==> MADD R,A,B,C
5382     // --- Create(MADD);
5383     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
5384       Opc = AArch64::MADDWrrr;
5385       RC = &AArch64::GPR32RegClass;
5386     } else {
5387       Opc = AArch64::MADDXrrr;
5388       RC = &AArch64::GPR64RegClass;
5389     }
5390     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5391     break;
5392   case MachineCombinerPattern::MULADDWI_OP1:
5393   case MachineCombinerPattern::MULADDXI_OP1: {
5394     // MUL I=A,B,0
5395     // ADD R,I,Imm
5396     // ==> ORR  V, ZR, Imm
5397     // ==> MADD R,A,B,V
5398     // --- Create(MADD);
5399     const TargetRegisterClass *OrrRC;
5400     unsigned BitSize, OrrOpc, ZeroReg;
5401     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
5402       OrrOpc = AArch64::ORRWri;
5403       OrrRC = &AArch64::GPR32spRegClass;
5404       BitSize = 32;
5405       ZeroReg = AArch64::WZR;
5406       Opc = AArch64::MADDWrrr;
5407       RC = &AArch64::GPR32RegClass;
5408     } else {
5409       OrrOpc = AArch64::ORRXri;
5410       OrrRC = &AArch64::GPR64spRegClass;
5411       BitSize = 64;
5412       ZeroReg = AArch64::XZR;
5413       Opc = AArch64::MADDXrrr;
5414       RC = &AArch64::GPR64RegClass;
5415     }
5416     Register NewVR = MRI.createVirtualRegister(OrrRC);
5417     uint64_t Imm = Root.getOperand(2).getImm();
5418 
5419     if (Root.getOperand(3).isImm()) {
5420       unsigned Val = Root.getOperand(3).getImm();
5421       Imm = Imm << Val;
5422     }
5423     uint64_t UImm = SignExtend64(Imm, BitSize);
5424     uint64_t Encoding;
5425     if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
5426       return;
5427     MachineInstrBuilder MIB1 =
5428         BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
5429             .addReg(ZeroReg)
5430             .addImm(Encoding);
5431     InsInstrs.push_back(MIB1);
5432     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5433     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
5434     break;
5435   }
5436   case MachineCombinerPattern::MULSUBW_OP1:
5437   case MachineCombinerPattern::MULSUBX_OP1: {
5438     // MUL I=A,B,0
5439     // SUB R,I, C
5440     // ==> SUB  V, 0, C
5441     // ==> MADD R,A,B,V // = -C + A*B
5442     // --- Create(MADD);
5443     const TargetRegisterClass *SubRC;
5444     unsigned SubOpc, ZeroReg;
5445     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
5446       SubOpc = AArch64::SUBWrr;
5447       SubRC = &AArch64::GPR32spRegClass;
5448       ZeroReg = AArch64::WZR;
5449       Opc = AArch64::MADDWrrr;
5450       RC = &AArch64::GPR32RegClass;
5451     } else {
5452       SubOpc = AArch64::SUBXrr;
5453       SubRC = &AArch64::GPR64spRegClass;
5454       ZeroReg = AArch64::XZR;
5455       Opc = AArch64::MADDXrrr;
5456       RC = &AArch64::GPR64RegClass;
5457     }
5458     Register NewVR = MRI.createVirtualRegister(SubRC);
5459     // SUB NewVR, 0, C
5460     MachineInstrBuilder MIB1 =
5461         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
5462             .addReg(ZeroReg)
5463             .add(Root.getOperand(2));
5464     InsInstrs.push_back(MIB1);
5465     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5466     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
5467     break;
5468   }
5469   case MachineCombinerPattern::MULSUBW_OP2:
5470   case MachineCombinerPattern::MULSUBX_OP2:
5471     // MUL I=A,B,0
5472     // SUB R,C,I
5473     // ==> MSUB R,A,B,C (computes C - A*B)
5474     // --- Create(MSUB);
5475     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
5476       Opc = AArch64::MSUBWrrr;
5477       RC = &AArch64::GPR32RegClass;
5478     } else {
5479       Opc = AArch64::MSUBXrrr;
5480       RC = &AArch64::GPR64RegClass;
5481     }
5482     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5483     break;
5484   case MachineCombinerPattern::MULSUBWI_OP1:
5485   case MachineCombinerPattern::MULSUBXI_OP1: {
5486     // MUL I=A,B,0
5487     // SUB R,I, Imm
5488     // ==> ORR  V, ZR, -Imm
5489     // ==> MADD R,A,B,V // = -Imm + A*B
5490     // --- Create(MADD);
5491     const TargetRegisterClass *OrrRC;
5492     unsigned BitSize, OrrOpc, ZeroReg;
5493     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
5494       OrrOpc = AArch64::ORRWri;
5495       OrrRC = &AArch64::GPR32spRegClass;
5496       BitSize = 32;
5497       ZeroReg = AArch64::WZR;
5498       Opc = AArch64::MADDWrrr;
5499       RC = &AArch64::GPR32RegClass;
5500     } else {
5501       OrrOpc = AArch64::ORRXri;
5502       OrrRC = &AArch64::GPR64spRegClass;
5503       BitSize = 64;
5504       ZeroReg = AArch64::XZR;
5505       Opc = AArch64::MADDXrrr;
5506       RC = &AArch64::GPR64RegClass;
5507     }
5508     Register NewVR = MRI.createVirtualRegister(OrrRC);
5509     uint64_t Imm = Root.getOperand(2).getImm();
5510     if (Root.getOperand(3).isImm()) {
5511       unsigned Val = Root.getOperand(3).getImm();
5512       Imm = Imm << Val;
5513     }
5514     uint64_t UImm = SignExtend64(-Imm, BitSize);
5515     uint64_t Encoding;
5516     if (!AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding))
5517       return;
5518     MachineInstrBuilder MIB1 =
5519         BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
5520             .addReg(ZeroReg)
5521             .addImm(Encoding);
5522     InsInstrs.push_back(MIB1);
5523     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
5524     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
5525     break;
5526   }
5527 
5528   case MachineCombinerPattern::MULADDv8i8_OP1:
5529     Opc = AArch64::MLAv8i8;
5530     RC = &AArch64::FPR64RegClass;
5531     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5532     break;
5533   case MachineCombinerPattern::MULADDv8i8_OP2:
5534     Opc = AArch64::MLAv8i8;
5535     RC = &AArch64::FPR64RegClass;
5536     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5537     break;
5538   case MachineCombinerPattern::MULADDv16i8_OP1:
5539     Opc = AArch64::MLAv16i8;
5540     RC = &AArch64::FPR128RegClass;
5541     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5542     break;
5543   case MachineCombinerPattern::MULADDv16i8_OP2:
5544     Opc = AArch64::MLAv16i8;
5545     RC = &AArch64::FPR128RegClass;
5546     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5547     break;
5548   case MachineCombinerPattern::MULADDv4i16_OP1:
5549     Opc = AArch64::MLAv4i16;
5550     RC = &AArch64::FPR64RegClass;
5551     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5552     break;
5553   case MachineCombinerPattern::MULADDv4i16_OP2:
5554     Opc = AArch64::MLAv4i16;
5555     RC = &AArch64::FPR64RegClass;
5556     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5557     break;
5558   case MachineCombinerPattern::MULADDv8i16_OP1:
5559     Opc = AArch64::MLAv8i16;
5560     RC = &AArch64::FPR128RegClass;
5561     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5562     break;
5563   case MachineCombinerPattern::MULADDv8i16_OP2:
5564     Opc = AArch64::MLAv8i16;
5565     RC = &AArch64::FPR128RegClass;
5566     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5567     break;
5568   case MachineCombinerPattern::MULADDv2i32_OP1:
5569     Opc = AArch64::MLAv2i32;
5570     RC = &AArch64::FPR64RegClass;
5571     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5572     break;
5573   case MachineCombinerPattern::MULADDv2i32_OP2:
5574     Opc = AArch64::MLAv2i32;
5575     RC = &AArch64::FPR64RegClass;
5576     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5577     break;
5578   case MachineCombinerPattern::MULADDv4i32_OP1:
5579     Opc = AArch64::MLAv4i32;
5580     RC = &AArch64::FPR128RegClass;
5581     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5582     break;
5583   case MachineCombinerPattern::MULADDv4i32_OP2:
5584     Opc = AArch64::MLAv4i32;
5585     RC = &AArch64::FPR128RegClass;
5586     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5587     break;
5588 
5589   case MachineCombinerPattern::MULSUBv8i8_OP1:
5590     Opc = AArch64::MLAv8i8;
5591     RC = &AArch64::FPR64RegClass;
5592     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
5593                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
5594                                  RC);
5595     break;
5596   case MachineCombinerPattern::MULSUBv8i8_OP2:
5597     Opc = AArch64::MLSv8i8;
5598     RC = &AArch64::FPR64RegClass;
5599     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5600     break;
5601   case MachineCombinerPattern::MULSUBv16i8_OP1:
5602     Opc = AArch64::MLAv16i8;
5603     RC = &AArch64::FPR128RegClass;
5604     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
5605                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
5606                                  RC);
5607     break;
5608   case MachineCombinerPattern::MULSUBv16i8_OP2:
5609     Opc = AArch64::MLSv16i8;
5610     RC = &AArch64::FPR128RegClass;
5611     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5612     break;
5613   case MachineCombinerPattern::MULSUBv4i16_OP1:
5614     Opc = AArch64::MLAv4i16;
5615     RC = &AArch64::FPR64RegClass;
5616     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
5617                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
5618                                  RC);
5619     break;
5620   case MachineCombinerPattern::MULSUBv4i16_OP2:
5621     Opc = AArch64::MLSv4i16;
5622     RC = &AArch64::FPR64RegClass;
5623     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5624     break;
5625   case MachineCombinerPattern::MULSUBv8i16_OP1:
5626     Opc = AArch64::MLAv8i16;
5627     RC = &AArch64::FPR128RegClass;
5628     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
5629                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
5630                                  RC);
5631     break;
5632   case MachineCombinerPattern::MULSUBv8i16_OP2:
5633     Opc = AArch64::MLSv8i16;
5634     RC = &AArch64::FPR128RegClass;
5635     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5636     break;
5637   case MachineCombinerPattern::MULSUBv2i32_OP1:
5638     Opc = AArch64::MLAv2i32;
5639     RC = &AArch64::FPR64RegClass;
5640     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
5641                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
5642                                  RC);
5643     break;
5644   case MachineCombinerPattern::MULSUBv2i32_OP2:
5645     Opc = AArch64::MLSv2i32;
5646     RC = &AArch64::FPR64RegClass;
5647     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5648     break;
5649   case MachineCombinerPattern::MULSUBv4i32_OP1:
5650     Opc = AArch64::MLAv4i32;
5651     RC = &AArch64::FPR128RegClass;
5652     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
5653                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
5654                                  RC);
5655     break;
5656   case MachineCombinerPattern::MULSUBv4i32_OP2:
5657     Opc = AArch64::MLSv4i32;
5658     RC = &AArch64::FPR128RegClass;
5659     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5660     break;
5661 
5662   case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
5663     Opc = AArch64::MLAv4i16_indexed;
5664     RC = &AArch64::FPR64RegClass;
5665     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5666     break;
5667   case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
5668     Opc = AArch64::MLAv4i16_indexed;
5669     RC = &AArch64::FPR64RegClass;
5670     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5671     break;
5672   case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
5673     Opc = AArch64::MLAv8i16_indexed;
5674     RC = &AArch64::FPR128RegClass;
5675     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5676     break;
5677   case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
5678     Opc = AArch64::MLAv8i16_indexed;
5679     RC = &AArch64::FPR128RegClass;
5680     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5681     break;
5682   case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
5683     Opc = AArch64::MLAv2i32_indexed;
5684     RC = &AArch64::FPR64RegClass;
5685     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5686     break;
5687   case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
5688     Opc = AArch64::MLAv2i32_indexed;
5689     RC = &AArch64::FPR64RegClass;
5690     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5691     break;
5692   case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
5693     Opc = AArch64::MLAv4i32_indexed;
5694     RC = &AArch64::FPR128RegClass;
5695     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5696     break;
5697   case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
5698     Opc = AArch64::MLAv4i32_indexed;
5699     RC = &AArch64::FPR128RegClass;
5700     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5701     break;
5702 
5703   case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
5704     Opc = AArch64::MLAv4i16_indexed;
5705     RC = &AArch64::FPR64RegClass;
5706     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
5707                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
5708                                  RC);
5709     break;
5710   case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
5711     Opc = AArch64::MLSv4i16_indexed;
5712     RC = &AArch64::FPR64RegClass;
5713     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5714     break;
5715   case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
5716     Opc = AArch64::MLAv8i16_indexed;
5717     RC = &AArch64::FPR128RegClass;
5718     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
5719                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
5720                                  RC);
5721     break;
5722   case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
5723     Opc = AArch64::MLSv8i16_indexed;
5724     RC = &AArch64::FPR128RegClass;
5725     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5726     break;
5727   case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
5728     Opc = AArch64::MLAv2i32_indexed;
5729     RC = &AArch64::FPR64RegClass;
5730     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
5731                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
5732                                  RC);
5733     break;
5734   case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
5735     Opc = AArch64::MLSv2i32_indexed;
5736     RC = &AArch64::FPR64RegClass;
5737     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5738     break;
5739   case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
5740     Opc = AArch64::MLAv4i32_indexed;
5741     RC = &AArch64::FPR128RegClass;
5742     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
5743                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
5744                                  RC);
5745     break;
5746   case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
5747     Opc = AArch64::MLSv4i32_indexed;
5748     RC = &AArch64::FPR128RegClass;
5749     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5750     break;
5751 
5752   // Floating Point Support
5753   case MachineCombinerPattern::FMULADDH_OP1:
5754     Opc = AArch64::FMADDHrrr;
5755     RC = &AArch64::FPR16RegClass;
5756     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5757     break;
5758   case MachineCombinerPattern::FMULADDS_OP1:
5759     Opc = AArch64::FMADDSrrr;
5760     RC = &AArch64::FPR32RegClass;
5761     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5762     break;
5763   case MachineCombinerPattern::FMULADDD_OP1:
5764     Opc = AArch64::FMADDDrrr;
5765     RC = &AArch64::FPR64RegClass;
5766     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5767     break;
5768 
5769   case MachineCombinerPattern::FMULADDH_OP2:
5770     Opc = AArch64::FMADDHrrr;
5771     RC = &AArch64::FPR16RegClass;
5772     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5773     break;
5774   case MachineCombinerPattern::FMULADDS_OP2:
5775     Opc = AArch64::FMADDSrrr;
5776     RC = &AArch64::FPR32RegClass;
5777     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5778     break;
5779   case MachineCombinerPattern::FMULADDD_OP2:
5780     Opc = AArch64::FMADDDrrr;
5781     RC = &AArch64::FPR64RegClass;
5782     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5783     break;
5784 
5785   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
5786     Opc = AArch64::FMLAv1i32_indexed;
5787     RC = &AArch64::FPR32RegClass;
5788     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5789                            FMAInstKind::Indexed);
5790     break;
5791   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
5792     Opc = AArch64::FMLAv1i32_indexed;
5793     RC = &AArch64::FPR32RegClass;
5794     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5795                            FMAInstKind::Indexed);
5796     break;
5797 
5798   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
5799     Opc = AArch64::FMLAv1i64_indexed;
5800     RC = &AArch64::FPR64RegClass;
5801     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5802                            FMAInstKind::Indexed);
5803     break;
5804   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
5805     Opc = AArch64::FMLAv1i64_indexed;
5806     RC = &AArch64::FPR64RegClass;
5807     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5808                            FMAInstKind::Indexed);
5809     break;
5810 
5811   case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
5812     RC = &AArch64::FPR64RegClass;
5813     Opc = AArch64::FMLAv4i16_indexed;
5814     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5815                            FMAInstKind::Indexed);
5816     break;
5817   case MachineCombinerPattern::FMLAv4f16_OP1:
5818     RC = &AArch64::FPR64RegClass;
5819     Opc = AArch64::FMLAv4f16;
5820     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5821                            FMAInstKind::Accumulator);
5822     break;
5823   case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
5824     RC = &AArch64::FPR64RegClass;
5825     Opc = AArch64::FMLAv4i16_indexed;
5826     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5827                            FMAInstKind::Indexed);
5828     break;
5829   case MachineCombinerPattern::FMLAv4f16_OP2:
5830     RC = &AArch64::FPR64RegClass;
5831     Opc = AArch64::FMLAv4f16;
5832     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5833                            FMAInstKind::Accumulator);
5834     break;
5835 
5836   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
5837   case MachineCombinerPattern::FMLAv2f32_OP1:
5838     RC = &AArch64::FPR64RegClass;
5839     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
5840       Opc = AArch64::FMLAv2i32_indexed;
5841       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5842                              FMAInstKind::Indexed);
5843     } else {
5844       Opc = AArch64::FMLAv2f32;
5845       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5846                              FMAInstKind::Accumulator);
5847     }
5848     break;
5849   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
5850   case MachineCombinerPattern::FMLAv2f32_OP2:
5851     RC = &AArch64::FPR64RegClass;
5852     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
5853       Opc = AArch64::FMLAv2i32_indexed;
5854       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5855                              FMAInstKind::Indexed);
5856     } else {
5857       Opc = AArch64::FMLAv2f32;
5858       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5859                              FMAInstKind::Accumulator);
5860     }
5861     break;
5862 
5863   case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
5864     RC = &AArch64::FPR128RegClass;
5865     Opc = AArch64::FMLAv8i16_indexed;
5866     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5867                            FMAInstKind::Indexed);
5868     break;
5869   case MachineCombinerPattern::FMLAv8f16_OP1:
5870     RC = &AArch64::FPR128RegClass;
5871     Opc = AArch64::FMLAv8f16;
5872     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5873                            FMAInstKind::Accumulator);
5874     break;
5875   case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
5876     RC = &AArch64::FPR128RegClass;
5877     Opc = AArch64::FMLAv8i16_indexed;
5878     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5879                            FMAInstKind::Indexed);
5880     break;
5881   case MachineCombinerPattern::FMLAv8f16_OP2:
5882     RC = &AArch64::FPR128RegClass;
5883     Opc = AArch64::FMLAv8f16;
5884     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5885                            FMAInstKind::Accumulator);
5886     break;
5887 
5888   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
5889   case MachineCombinerPattern::FMLAv2f64_OP1:
5890     RC = &AArch64::FPR128RegClass;
5891     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
5892       Opc = AArch64::FMLAv2i64_indexed;
5893       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5894                              FMAInstKind::Indexed);
5895     } else {
5896       Opc = AArch64::FMLAv2f64;
5897       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5898                              FMAInstKind::Accumulator);
5899     }
5900     break;
5901   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
5902   case MachineCombinerPattern::FMLAv2f64_OP2:
5903     RC = &AArch64::FPR128RegClass;
5904     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
5905       Opc = AArch64::FMLAv2i64_indexed;
5906       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5907                              FMAInstKind::Indexed);
5908     } else {
5909       Opc = AArch64::FMLAv2f64;
5910       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5911                              FMAInstKind::Accumulator);
5912     }
5913     break;
5914 
5915   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
5916   case MachineCombinerPattern::FMLAv4f32_OP1:
5917     RC = &AArch64::FPR128RegClass;
5918     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
5919       Opc = AArch64::FMLAv4i32_indexed;
5920       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5921                              FMAInstKind::Indexed);
5922     } else {
5923       Opc = AArch64::FMLAv4f32;
5924       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
5925                              FMAInstKind::Accumulator);
5926     }
5927     break;
5928 
5929   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
5930   case MachineCombinerPattern::FMLAv4f32_OP2:
5931     RC = &AArch64::FPR128RegClass;
5932     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
5933       Opc = AArch64::FMLAv4i32_indexed;
5934       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5935                              FMAInstKind::Indexed);
5936     } else {
5937       Opc = AArch64::FMLAv4f32;
5938       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5939                              FMAInstKind::Accumulator);
5940     }
5941     break;
5942 
5943   case MachineCombinerPattern::FMULSUBH_OP1:
5944     Opc = AArch64::FNMSUBHrrr;
5945     RC = &AArch64::FPR16RegClass;
5946     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5947     break;
5948   case MachineCombinerPattern::FMULSUBS_OP1:
5949     Opc = AArch64::FNMSUBSrrr;
5950     RC = &AArch64::FPR32RegClass;
5951     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5952     break;
5953   case MachineCombinerPattern::FMULSUBD_OP1:
5954     Opc = AArch64::FNMSUBDrrr;
5955     RC = &AArch64::FPR64RegClass;
5956     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5957     break;
5958 
5959   case MachineCombinerPattern::FNMULSUBH_OP1:
5960     Opc = AArch64::FNMADDHrrr;
5961     RC = &AArch64::FPR16RegClass;
5962     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5963     break;
5964   case MachineCombinerPattern::FNMULSUBS_OP1:
5965     Opc = AArch64::FNMADDSrrr;
5966     RC = &AArch64::FPR32RegClass;
5967     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5968     break;
5969   case MachineCombinerPattern::FNMULSUBD_OP1:
5970     Opc = AArch64::FNMADDDrrr;
5971     RC = &AArch64::FPR64RegClass;
5972     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
5973     break;
5974 
5975   case MachineCombinerPattern::FMULSUBH_OP2:
5976     Opc = AArch64::FMSUBHrrr;
5977     RC = &AArch64::FPR16RegClass;
5978     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5979     break;
5980   case MachineCombinerPattern::FMULSUBS_OP2:
5981     Opc = AArch64::FMSUBSrrr;
5982     RC = &AArch64::FPR32RegClass;
5983     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5984     break;
5985   case MachineCombinerPattern::FMULSUBD_OP2:
5986     Opc = AArch64::FMSUBDrrr;
5987     RC = &AArch64::FPR64RegClass;
5988     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
5989     break;
5990 
5991   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
5992     Opc = AArch64::FMLSv1i32_indexed;
5993     RC = &AArch64::FPR32RegClass;
5994     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
5995                            FMAInstKind::Indexed);
5996     break;
5997 
5998   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
5999     Opc = AArch64::FMLSv1i64_indexed;
6000     RC = &AArch64::FPR64RegClass;
6001     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6002                            FMAInstKind::Indexed);
6003     break;
6004 
6005   case MachineCombinerPattern::FMLSv4f16_OP1:
6006   case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
6007     RC = &AArch64::FPR64RegClass;
6008     Register NewVR = MRI.createVirtualRegister(RC);
6009     MachineInstrBuilder MIB1 =
6010         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR)
6011             .add(Root.getOperand(2));
6012     InsInstrs.push_back(MIB1);
6013     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6014     if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
6015       Opc = AArch64::FMLAv4f16;
6016       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
6017                              FMAInstKind::Accumulator, &NewVR);
6018     } else {
6019       Opc = AArch64::FMLAv4i16_indexed;
6020       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
6021                              FMAInstKind::Indexed, &NewVR);
6022     }
6023     break;
6024   }
6025   case MachineCombinerPattern::FMLSv4f16_OP2:
6026     RC = &AArch64::FPR64RegClass;
6027     Opc = AArch64::FMLSv4f16;
6028     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6029                            FMAInstKind::Accumulator);
6030     break;
6031   case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
6032     RC = &AArch64::FPR64RegClass;
6033     Opc = AArch64::FMLSv4i16_indexed;
6034     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6035                            FMAInstKind::Indexed);
6036     break;
6037 
6038   case MachineCombinerPattern::FMLSv2f32_OP2:
6039   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
6040     RC = &AArch64::FPR64RegClass;
6041     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
6042       Opc = AArch64::FMLSv2i32_indexed;
6043       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6044                              FMAInstKind::Indexed);
6045     } else {
6046       Opc = AArch64::FMLSv2f32;
6047       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6048                              FMAInstKind::Accumulator);
6049     }
6050     break;
6051 
6052   case MachineCombinerPattern::FMLSv8f16_OP1:
6053   case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
6054     RC = &AArch64::FPR128RegClass;
6055     Register NewVR = MRI.createVirtualRegister(RC);
6056     MachineInstrBuilder MIB1 =
6057         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR)
6058             .add(Root.getOperand(2));
6059     InsInstrs.push_back(MIB1);
6060     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6061     if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
6062       Opc = AArch64::FMLAv8f16;
6063       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
6064                              FMAInstKind::Accumulator, &NewVR);
6065     } else {
6066       Opc = AArch64::FMLAv8i16_indexed;
6067       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
6068                              FMAInstKind::Indexed, &NewVR);
6069     }
6070     break;
6071   }
6072   case MachineCombinerPattern::FMLSv8f16_OP2:
6073     RC = &AArch64::FPR128RegClass;
6074     Opc = AArch64::FMLSv8f16;
6075     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6076                            FMAInstKind::Accumulator);
6077     break;
6078   case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
6079     RC = &AArch64::FPR128RegClass;
6080     Opc = AArch64::FMLSv8i16_indexed;
6081     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6082                            FMAInstKind::Indexed);
6083     break;
6084 
6085   case MachineCombinerPattern::FMLSv2f64_OP2:
6086   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
6087     RC = &AArch64::FPR128RegClass;
6088     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
6089       Opc = AArch64::FMLSv2i64_indexed;
6090       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6091                              FMAInstKind::Indexed);
6092     } else {
6093       Opc = AArch64::FMLSv2f64;
6094       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6095                              FMAInstKind::Accumulator);
6096     }
6097     break;
6098 
6099   case MachineCombinerPattern::FMLSv4f32_OP2:
6100   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
6101     RC = &AArch64::FPR128RegClass;
6102     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
6103       Opc = AArch64::FMLSv4i32_indexed;
6104       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6105                              FMAInstKind::Indexed);
6106     } else {
6107       Opc = AArch64::FMLSv4f32;
6108       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
6109                              FMAInstKind::Accumulator);
6110     }
6111     break;
6112   case MachineCombinerPattern::FMLSv2f32_OP1:
6113   case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
6114     RC = &AArch64::FPR64RegClass;
6115     Register NewVR = MRI.createVirtualRegister(RC);
6116     MachineInstrBuilder MIB1 =
6117         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
6118             .add(Root.getOperand(2));
6119     InsInstrs.push_back(MIB1);
6120     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6121     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
6122       Opc = AArch64::FMLAv2i32_indexed;
6123       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
6124                              FMAInstKind::Indexed, &NewVR);
6125     } else {
6126       Opc = AArch64::FMLAv2f32;
6127       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
6128                              FMAInstKind::Accumulator, &NewVR);
6129     }
6130     break;
6131   }
6132   case MachineCombinerPattern::FMLSv4f32_OP1:
6133   case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
6134     RC = &AArch64::FPR128RegClass;
6135     Register NewVR = MRI.createVirtualRegister(RC);
6136     MachineInstrBuilder MIB1 =
6137         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
6138             .add(Root.getOperand(2));
6139     InsInstrs.push_back(MIB1);
6140     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6141     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
6142       Opc = AArch64::FMLAv4i32_indexed;
6143       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
6144                              FMAInstKind::Indexed, &NewVR);
6145     } else {
6146       Opc = AArch64::FMLAv4f32;
6147       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
6148                              FMAInstKind::Accumulator, &NewVR);
6149     }
6150     break;
6151   }
6152   case MachineCombinerPattern::FMLSv2f64_OP1:
6153   case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
6154     RC = &AArch64::FPR128RegClass;
6155     Register NewVR = MRI.createVirtualRegister(RC);
6156     MachineInstrBuilder MIB1 =
6157         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
6158             .add(Root.getOperand(2));
6159     InsInstrs.push_back(MIB1);
6160     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6161     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
6162       Opc = AArch64::FMLAv2i64_indexed;
6163       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
6164                              FMAInstKind::Indexed, &NewVR);
6165     } else {
6166       Opc = AArch64::FMLAv2f64;
6167       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
6168                              FMAInstKind::Accumulator, &NewVR);
6169     }
6170     break;
6171   }
6172   case MachineCombinerPattern::FMULv2i32_indexed_OP1:
6173   case MachineCombinerPattern::FMULv2i32_indexed_OP2: {
6174     unsigned IdxDupOp =
6175         (Pattern == MachineCombinerPattern::FMULv2i32_indexed_OP1) ? 1 : 2;
6176     genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
6177                        &AArch64::FPR128RegClass, MRI);
6178     break;
6179   }
6180   case MachineCombinerPattern::FMULv2i64_indexed_OP1:
6181   case MachineCombinerPattern::FMULv2i64_indexed_OP2: {
6182     unsigned IdxDupOp =
6183         (Pattern == MachineCombinerPattern::FMULv2i64_indexed_OP1) ? 1 : 2;
6184     genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
6185                        &AArch64::FPR128RegClass, MRI);
6186     break;
6187   }
6188   case MachineCombinerPattern::FMULv4i16_indexed_OP1:
6189   case MachineCombinerPattern::FMULv4i16_indexed_OP2: {
6190     unsigned IdxDupOp =
6191         (Pattern == MachineCombinerPattern::FMULv4i16_indexed_OP1) ? 1 : 2;
6192     genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
6193                        &AArch64::FPR128_loRegClass, MRI);
6194     break;
6195   }
6196   case MachineCombinerPattern::FMULv4i32_indexed_OP1:
6197   case MachineCombinerPattern::FMULv4i32_indexed_OP2: {
6198     unsigned IdxDupOp =
6199         (Pattern == MachineCombinerPattern::FMULv4i32_indexed_OP1) ? 1 : 2;
6200     genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
6201                        &AArch64::FPR128RegClass, MRI);
6202     break;
6203   }
6204   case MachineCombinerPattern::FMULv8i16_indexed_OP1:
6205   case MachineCombinerPattern::FMULv8i16_indexed_OP2: {
6206     unsigned IdxDupOp =
6207         (Pattern == MachineCombinerPattern::FMULv8i16_indexed_OP1) ? 1 : 2;
6208     genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
6209                        &AArch64::FPR128_loRegClass, MRI);
6210     break;
6211   }
6212   } // end switch (Pattern)
6213   // Record MUL and ADD/SUB for deletion
6214   if (MUL)
6215     DelInstrs.push_back(MUL);
6216   DelInstrs.push_back(&Root);
6217 }
6218 
6219 /// Replace csincr-branch sequence by simple conditional branch
6220 ///
6221 /// Examples:
6222 /// 1. \code
6223 ///   csinc  w9, wzr, wzr, <condition code>
6224 ///   tbnz   w9, #0, 0x44
6225 ///    \endcode
6226 /// to
6227 ///    \code
6228 ///   b.<inverted condition code>
6229 ///    \endcode
6230 ///
6231 /// 2. \code
6232 ///   csinc w9, wzr, wzr, <condition code>
6233 ///   tbz   w9, #0, 0x44
6234 ///    \endcode
6235 /// to
6236 ///    \code
6237 ///   b.<condition code>
6238 ///    \endcode
6239 ///
6240 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
6241 /// compare's constant operand is power of 2.
6242 ///
6243 /// Examples:
6244 ///    \code
6245 ///   and  w8, w8, #0x400
6246 ///   cbnz w8, L1
6247 ///    \endcode
6248 /// to
6249 ///    \code
6250 ///   tbnz w8, #10, L1
6251 ///    \endcode
6252 ///
6253 /// \param  MI Conditional Branch
6254 /// \return True when the simple conditional branch is generated
6255 ///
6256 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
6257   bool IsNegativeBranch = false;
6258   bool IsTestAndBranch = false;
6259   unsigned TargetBBInMI = 0;
6260   switch (MI.getOpcode()) {
6261   default:
6262     llvm_unreachable("Unknown branch instruction?");
6263   case AArch64::Bcc:
6264     return false;
6265   case AArch64::CBZW:
6266   case AArch64::CBZX:
6267     TargetBBInMI = 1;
6268     break;
6269   case AArch64::CBNZW:
6270   case AArch64::CBNZX:
6271     TargetBBInMI = 1;
6272     IsNegativeBranch = true;
6273     break;
6274   case AArch64::TBZW:
6275   case AArch64::TBZX:
6276     TargetBBInMI = 2;
6277     IsTestAndBranch = true;
6278     break;
6279   case AArch64::TBNZW:
6280   case AArch64::TBNZX:
6281     TargetBBInMI = 2;
6282     IsNegativeBranch = true;
6283     IsTestAndBranch = true;
6284     break;
6285   }
6286   // So we increment a zero register and test for bits other
6287   // than bit 0? Conservatively bail out in case the verifier
6288   // missed this case.
6289   if (IsTestAndBranch && MI.getOperand(1).getImm())
6290     return false;
6291 
6292   // Find Definition.
6293   assert(MI.getParent() && "Incomplete machine instruciton\n");
6294   MachineBasicBlock *MBB = MI.getParent();
6295   MachineFunction *MF = MBB->getParent();
6296   MachineRegisterInfo *MRI = &MF->getRegInfo();
6297   Register VReg = MI.getOperand(0).getReg();
6298   if (!Register::isVirtualRegister(VReg))
6299     return false;
6300 
6301   MachineInstr *DefMI = MRI->getVRegDef(VReg);
6302 
6303   // Look through COPY instructions to find definition.
6304   while (DefMI->isCopy()) {
6305     Register CopyVReg = DefMI->getOperand(1).getReg();
6306     if (!MRI->hasOneNonDBGUse(CopyVReg))
6307       return false;
6308     if (!MRI->hasOneDef(CopyVReg))
6309       return false;
6310     DefMI = MRI->getVRegDef(CopyVReg);
6311   }
6312 
6313   switch (DefMI->getOpcode()) {
6314   default:
6315     return false;
6316   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
6317   case AArch64::ANDWri:
6318   case AArch64::ANDXri: {
6319     if (IsTestAndBranch)
6320       return false;
6321     if (DefMI->getParent() != MBB)
6322       return false;
6323     if (!MRI->hasOneNonDBGUse(VReg))
6324       return false;
6325 
6326     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
6327     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
6328         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
6329     if (!isPowerOf2_64(Mask))
6330       return false;
6331 
6332     MachineOperand &MO = DefMI->getOperand(1);
6333     Register NewReg = MO.getReg();
6334     if (!Register::isVirtualRegister(NewReg))
6335       return false;
6336 
6337     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
6338 
6339     MachineBasicBlock &RefToMBB = *MBB;
6340     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
6341     DebugLoc DL = MI.getDebugLoc();
6342     unsigned Imm = Log2_64(Mask);
6343     unsigned Opc = (Imm < 32)
6344                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
6345                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
6346     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
6347                               .addReg(NewReg)
6348                               .addImm(Imm)
6349                               .addMBB(TBB);
6350     // Register lives on to the CBZ now.
6351     MO.setIsKill(false);
6352 
6353     // For immediate smaller than 32, we need to use the 32-bit
6354     // variant (W) in all cases. Indeed the 64-bit variant does not
6355     // allow to encode them.
6356     // Therefore, if the input register is 64-bit, we need to take the
6357     // 32-bit sub-part.
6358     if (!Is32Bit && Imm < 32)
6359       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
6360     MI.eraseFromParent();
6361     return true;
6362   }
6363   // Look for CSINC
6364   case AArch64::CSINCWr:
6365   case AArch64::CSINCXr: {
6366     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
6367           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
6368         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
6369           DefMI->getOperand(2).getReg() == AArch64::XZR))
6370       return false;
6371 
6372     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
6373       return false;
6374 
6375     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
6376     // Convert only when the condition code is not modified between
6377     // the CSINC and the branch. The CC may be used by other
6378     // instructions in between.
6379     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
6380       return false;
6381     MachineBasicBlock &RefToMBB = *MBB;
6382     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
6383     DebugLoc DL = MI.getDebugLoc();
6384     if (IsNegativeBranch)
6385       CC = AArch64CC::getInvertedCondCode(CC);
6386     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
6387     MI.eraseFromParent();
6388     return true;
6389   }
6390   }
6391 }
6392 
6393 std::pair<unsigned, unsigned>
6394 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
6395   const unsigned Mask = AArch64II::MO_FRAGMENT;
6396   return std::make_pair(TF & Mask, TF & ~Mask);
6397 }
6398 
6399 ArrayRef<std::pair<unsigned, const char *>>
6400 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
6401   using namespace AArch64II;
6402 
6403   static const std::pair<unsigned, const char *> TargetFlags[] = {
6404       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
6405       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
6406       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
6407       {MO_HI12, "aarch64-hi12"}};
6408   return makeArrayRef(TargetFlags);
6409 }
6410 
6411 ArrayRef<std::pair<unsigned, const char *>>
6412 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
6413   using namespace AArch64II;
6414 
6415   static const std::pair<unsigned, const char *> TargetFlags[] = {
6416       {MO_COFFSTUB, "aarch64-coffstub"},
6417       {MO_GOT, "aarch64-got"},
6418       {MO_NC, "aarch64-nc"},
6419       {MO_S, "aarch64-s"},
6420       {MO_TLS, "aarch64-tls"},
6421       {MO_DLLIMPORT, "aarch64-dllimport"},
6422       {MO_PREL, "aarch64-prel"},
6423       {MO_TAGGED, "aarch64-tagged"}};
6424   return makeArrayRef(TargetFlags);
6425 }
6426 
6427 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
6428 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
6429   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
6430       {{MOSuppressPair, "aarch64-suppress-pair"},
6431        {MOStridedAccess, "aarch64-strided-access"}};
6432   return makeArrayRef(TargetFlags);
6433 }
6434 
6435 /// Constants defining how certain sequences should be outlined.
6436 /// This encompasses how an outlined function should be called, and what kind of
6437 /// frame should be emitted for that outlined function.
6438 ///
6439 /// \p MachineOutlinerDefault implies that the function should be called with
6440 /// a save and restore of LR to the stack.
6441 ///
6442 /// That is,
6443 ///
6444 /// I1     Save LR                    OUTLINED_FUNCTION:
6445 /// I2 --> BL OUTLINED_FUNCTION       I1
6446 /// I3     Restore LR                 I2
6447 ///                                   I3
6448 ///                                   RET
6449 ///
6450 /// * Call construction overhead: 3 (save + BL + restore)
6451 /// * Frame construction overhead: 1 (ret)
6452 /// * Requires stack fixups? Yes
6453 ///
6454 /// \p MachineOutlinerTailCall implies that the function is being created from
6455 /// a sequence of instructions ending in a return.
6456 ///
6457 /// That is,
6458 ///
6459 /// I1                             OUTLINED_FUNCTION:
6460 /// I2 --> B OUTLINED_FUNCTION     I1
6461 /// RET                            I2
6462 ///                                RET
6463 ///
6464 /// * Call construction overhead: 1 (B)
6465 /// * Frame construction overhead: 0 (Return included in sequence)
6466 /// * Requires stack fixups? No
6467 ///
6468 /// \p MachineOutlinerNoLRSave implies that the function should be called using
6469 /// a BL instruction, but doesn't require LR to be saved and restored. This
6470 /// happens when LR is known to be dead.
6471 ///
6472 /// That is,
6473 ///
6474 /// I1                                OUTLINED_FUNCTION:
6475 /// I2 --> BL OUTLINED_FUNCTION       I1
6476 /// I3                                I2
6477 ///                                   I3
6478 ///                                   RET
6479 ///
6480 /// * Call construction overhead: 1 (BL)
6481 /// * Frame construction overhead: 1 (RET)
6482 /// * Requires stack fixups? No
6483 ///
6484 /// \p MachineOutlinerThunk implies that the function is being created from
6485 /// a sequence of instructions ending in a call. The outlined function is
6486 /// called with a BL instruction, and the outlined function tail-calls the
6487 /// original call destination.
6488 ///
6489 /// That is,
6490 ///
6491 /// I1                                OUTLINED_FUNCTION:
6492 /// I2 --> BL OUTLINED_FUNCTION       I1
6493 /// BL f                              I2
6494 ///                                   B f
6495 /// * Call construction overhead: 1 (BL)
6496 /// * Frame construction overhead: 0
6497 /// * Requires stack fixups? No
6498 ///
6499 /// \p MachineOutlinerRegSave implies that the function should be called with a
6500 /// save and restore of LR to an available register. This allows us to avoid
6501 /// stack fixups. Note that this outlining variant is compatible with the
6502 /// NoLRSave case.
6503 ///
6504 /// That is,
6505 ///
6506 /// I1     Save LR                    OUTLINED_FUNCTION:
6507 /// I2 --> BL OUTLINED_FUNCTION       I1
6508 /// I3     Restore LR                 I2
6509 ///                                   I3
6510 ///                                   RET
6511 ///
6512 /// * Call construction overhead: 3 (save + BL + restore)
6513 /// * Frame construction overhead: 1 (ret)
6514 /// * Requires stack fixups? No
6515 enum MachineOutlinerClass {
6516   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
6517   MachineOutlinerTailCall, /// Only emit a branch.
6518   MachineOutlinerNoLRSave, /// Emit a call and return.
6519   MachineOutlinerThunk,    /// Emit a call and tail-call.
6520   MachineOutlinerRegSave   /// Same as default, but save to a register.
6521 };
6522 
6523 enum MachineOutlinerMBBFlags {
6524   LRUnavailableSomewhere = 0x2,
6525   HasCalls = 0x4,
6526   UnsafeRegsDead = 0x8
6527 };
6528 
6529 unsigned
6530 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
6531   assert(C.LRUWasSet && "LRU wasn't set?");
6532   MachineFunction *MF = C.getMF();
6533   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
6534       MF->getSubtarget().getRegisterInfo());
6535 
6536   // Check if there is an available register across the sequence that we can
6537   // use.
6538   for (unsigned Reg : AArch64::GPR64RegClass) {
6539     if (!ARI->isReservedReg(*MF, Reg) &&
6540         Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
6541         Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
6542         Reg != AArch64::X17 && // Ditto for X17.
6543         C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
6544       return Reg;
6545   }
6546 
6547   // No suitable register. Return 0.
6548   return 0u;
6549 }
6550 
6551 static bool
6552 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
6553                                          const outliner::Candidate &b) {
6554   const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
6555   const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
6556 
6557   return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
6558          MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
6559 }
6560 
6561 static bool
6562 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
6563                                        const outliner::Candidate &b) {
6564   const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
6565   const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
6566 
6567   return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
6568 }
6569 
6570 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
6571                                                 const outliner::Candidate &b) {
6572   const AArch64Subtarget &SubtargetA =
6573       a.getMF()->getSubtarget<AArch64Subtarget>();
6574   const AArch64Subtarget &SubtargetB =
6575       b.getMF()->getSubtarget<AArch64Subtarget>();
6576   return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
6577 }
6578 
6579 outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
6580     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
6581   outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
6582   unsigned SequenceSize =
6583       std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
6584                       [this](unsigned Sum, const MachineInstr &MI) {
6585                         return Sum + getInstSizeInBytes(MI);
6586                       });
6587   unsigned NumBytesToCreateFrame = 0;
6588 
6589   // We only allow outlining for functions having exactly matching return
6590   // address signing attributes, i.e., all share the same value for the
6591   // attribute "sign-return-address" and all share the same type of key they
6592   // are signed with.
6593   // Additionally we require all functions to simultaniously either support
6594   // v8.3a features or not. Otherwise an outlined function could get signed
6595   // using dedicated v8.3 instructions and a call from a function that doesn't
6596   // support v8.3 instructions would therefore be invalid.
6597   if (std::adjacent_find(
6598           RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
6599           [](const outliner::Candidate &a, const outliner::Candidate &b) {
6600             // Return true if a and b are non-equal w.r.t. return address
6601             // signing or support of v8.3a features
6602             if (outliningCandidatesSigningScopeConsensus(a, b) &&
6603                 outliningCandidatesSigningKeyConsensus(a, b) &&
6604                 outliningCandidatesV8_3OpsConsensus(a, b)) {
6605               return false;
6606             }
6607             return true;
6608           }) != RepeatedSequenceLocs.end()) {
6609     return outliner::OutlinedFunction();
6610   }
6611 
6612   // Since at this point all candidates agree on their return address signing
6613   // picking just one is fine. If the candidate functions potentially sign their
6614   // return addresses, the outlined function should do the same. Note that in
6615   // the case of "sign-return-address"="non-leaf" this is an assumption: It is
6616   // not certainly true that the outlined function will have to sign its return
6617   // address but this decision is made later, when the decision to outline
6618   // has already been made.
6619   // The same holds for the number of additional instructions we need: On
6620   // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
6621   // necessary. However, at this point we don't know if the outlined function
6622   // will have a RET instruction so we assume the worst.
6623   const TargetRegisterInfo &TRI = getRegisterInfo();
6624   if (FirstCand.getMF()
6625           ->getInfo<AArch64FunctionInfo>()
6626           ->shouldSignReturnAddress(true)) {
6627     // One PAC and one AUT instructions
6628     NumBytesToCreateFrame += 8;
6629 
6630     // We have to check if sp modifying instructions would get outlined.
6631     // If so we only allow outlining if sp is unchanged overall, so matching
6632     // sub and add instructions are okay to outline, all other sp modifications
6633     // are not
6634     auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
6635       int SPValue = 0;
6636       MachineBasicBlock::iterator MBBI = C.front();
6637       for (;;) {
6638         if (MBBI->modifiesRegister(AArch64::SP, &TRI)) {
6639           switch (MBBI->getOpcode()) {
6640           case AArch64::ADDXri:
6641           case AArch64::ADDWri:
6642             assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
6643             assert(MBBI->getOperand(2).isImm() &&
6644                    "Expected operand to be immediate");
6645             assert(MBBI->getOperand(1).isReg() &&
6646                    "Expected operand to be a register");
6647             // Check if the add just increments sp. If so, we search for
6648             // matching sub instructions that decrement sp. If not, the
6649             // modification is illegal
6650             if (MBBI->getOperand(1).getReg() == AArch64::SP)
6651               SPValue += MBBI->getOperand(2).getImm();
6652             else
6653               return true;
6654             break;
6655           case AArch64::SUBXri:
6656           case AArch64::SUBWri:
6657             assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
6658             assert(MBBI->getOperand(2).isImm() &&
6659                    "Expected operand to be immediate");
6660             assert(MBBI->getOperand(1).isReg() &&
6661                    "Expected operand to be a register");
6662             // Check if the sub just decrements sp. If so, we search for
6663             // matching add instructions that increment sp. If not, the
6664             // modification is illegal
6665             if (MBBI->getOperand(1).getReg() == AArch64::SP)
6666               SPValue -= MBBI->getOperand(2).getImm();
6667             else
6668               return true;
6669             break;
6670           default:
6671             return true;
6672           }
6673         }
6674         if (MBBI == C.back())
6675           break;
6676         ++MBBI;
6677       }
6678       if (SPValue)
6679         return true;
6680       return false;
6681     };
6682     // Remove candidates with illegal stack modifying instructions
6683     llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
6684 
6685     // If the sequence doesn't have enough candidates left, then we're done.
6686     if (RepeatedSequenceLocs.size() < 2)
6687       return outliner::OutlinedFunction();
6688   }
6689 
6690   // Properties about candidate MBBs that hold for all of them.
6691   unsigned FlagsSetInAll = 0xF;
6692 
6693   // Compute liveness information for each candidate, and set FlagsSetInAll.
6694   std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
6695                 [&FlagsSetInAll](outliner::Candidate &C) {
6696                   FlagsSetInAll &= C.Flags;
6697                 });
6698 
6699   // According to the AArch64 Procedure Call Standard, the following are
6700   // undefined on entry/exit from a function call:
6701   //
6702   // * Registers x16, x17, (and thus w16, w17)
6703   // * Condition codes (and thus the NZCV register)
6704   //
6705   // Because if this, we can't outline any sequence of instructions where
6706   // one
6707   // of these registers is live into/across it. Thus, we need to delete
6708   // those
6709   // candidates.
6710   auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
6711     // If the unsafe registers in this block are all dead, then we don't need
6712     // to compute liveness here.
6713     if (C.Flags & UnsafeRegsDead)
6714       return false;
6715     C.initLRU(TRI);
6716     LiveRegUnits LRU = C.LRU;
6717     return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
6718             !LRU.available(AArch64::NZCV));
6719   };
6720 
6721   // Are there any candidates where those registers are live?
6722   if (!(FlagsSetInAll & UnsafeRegsDead)) {
6723     // Erase every candidate that violates the restrictions above. (It could be
6724     // true that we have viable candidates, so it's not worth bailing out in
6725     // the case that, say, 1 out of 20 candidates violate the restructions.)
6726     llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
6727 
6728     // If the sequence doesn't have enough candidates left, then we're done.
6729     if (RepeatedSequenceLocs.size() < 2)
6730       return outliner::OutlinedFunction();
6731   }
6732 
6733   // At this point, we have only "safe" candidates to outline. Figure out
6734   // frame + call instruction information.
6735 
6736   unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
6737 
6738   // Helper lambda which sets call information for every candidate.
6739   auto SetCandidateCallInfo =
6740       [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
6741         for (outliner::Candidate &C : RepeatedSequenceLocs)
6742           C.setCallInfo(CallID, NumBytesForCall);
6743       };
6744 
6745   unsigned FrameID = MachineOutlinerDefault;
6746   NumBytesToCreateFrame += 4;
6747 
6748   bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
6749     return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
6750   });
6751 
6752   // We check to see if CFI Instructions are present, and if they are
6753   // we find the number of CFI Instructions in the candidates.
6754   unsigned CFICount = 0;
6755   MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
6756   for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
6757        Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
6758     if (MBBI->isCFIInstruction())
6759       CFICount++;
6760     MBBI++;
6761   }
6762 
6763   // We compare the number of found CFI Instructions to  the number of CFI
6764   // instructions in the parent function for each candidate.  We must check this
6765   // since if we outline one of the CFI instructions in a function, we have to
6766   // outline them all for correctness. If we do not, the address offsets will be
6767   // incorrect between the two sections of the program.
6768   for (outliner::Candidate &C : RepeatedSequenceLocs) {
6769     std::vector<MCCFIInstruction> CFIInstructions =
6770         C.getMF()->getFrameInstructions();
6771 
6772     if (CFICount > 0 && CFICount != CFIInstructions.size())
6773       return outliner::OutlinedFunction();
6774   }
6775 
6776   // Returns true if an instructions is safe to fix up, false otherwise.
6777   auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
6778     if (MI.isCall())
6779       return true;
6780 
6781     if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
6782         !MI.readsRegister(AArch64::SP, &TRI))
6783       return true;
6784 
6785     // Any modification of SP will break our code to save/restore LR.
6786     // FIXME: We could handle some instructions which add a constant
6787     // offset to SP, with a bit more work.
6788     if (MI.modifiesRegister(AArch64::SP, &TRI))
6789       return false;
6790 
6791     // At this point, we have a stack instruction that we might need to
6792     // fix up. We'll handle it if it's a load or store.
6793     if (MI.mayLoadOrStore()) {
6794       const MachineOperand *Base; // Filled with the base operand of MI.
6795       int64_t Offset;             // Filled with the offset of MI.
6796       bool OffsetIsScalable;
6797 
6798       // Does it allow us to offset the base operand and is the base the
6799       // register SP?
6800       if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
6801           !Base->isReg() || Base->getReg() != AArch64::SP)
6802         return false;
6803 
6804       // Fixe-up code below assumes bytes.
6805       if (OffsetIsScalable)
6806         return false;
6807 
6808       // Find the minimum/maximum offset for this instruction and check
6809       // if fixing it up would be in range.
6810       int64_t MinOffset,
6811           MaxOffset;  // Unscaled offsets for the instruction.
6812       TypeSize Scale(0U, false); // The scale to multiply the offsets by.
6813       unsigned DummyWidth;
6814       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
6815 
6816       Offset += 16; // Update the offset to what it would be if we outlined.
6817       if (Offset < MinOffset * (int64_t)Scale.getFixedSize() ||
6818           Offset > MaxOffset * (int64_t)Scale.getFixedSize())
6819         return false;
6820 
6821       // It's in range, so we can outline it.
6822       return true;
6823     }
6824 
6825     // FIXME: Add handling for instructions like "add x0, sp, #8".
6826 
6827     // We can't fix it up, so don't outline it.
6828     return false;
6829   };
6830 
6831   // True if it's possible to fix up each stack instruction in this sequence.
6832   // Important for frames/call variants that modify the stack.
6833   bool AllStackInstrsSafe = std::all_of(
6834       FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
6835 
6836   // If the last instruction in any candidate is a terminator, then we should
6837   // tail call all of the candidates.
6838   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
6839     FrameID = MachineOutlinerTailCall;
6840     NumBytesToCreateFrame = 0;
6841     SetCandidateCallInfo(MachineOutlinerTailCall, 4);
6842   }
6843 
6844   else if (LastInstrOpcode == AArch64::BL ||
6845            ((LastInstrOpcode == AArch64::BLR ||
6846              LastInstrOpcode == AArch64::BLRNoIP) &&
6847             !HasBTI)) {
6848     // FIXME: Do we need to check if the code after this uses the value of LR?
6849     FrameID = MachineOutlinerThunk;
6850     NumBytesToCreateFrame = 0;
6851     SetCandidateCallInfo(MachineOutlinerThunk, 4);
6852   }
6853 
6854   else {
6855     // We need to decide how to emit calls + frames. We can always emit the same
6856     // frame if we don't need to save to the stack. If we have to save to the
6857     // stack, then we need a different frame.
6858     unsigned NumBytesNoStackCalls = 0;
6859     std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
6860 
6861     // Check if we have to save LR.
6862     for (outliner::Candidate &C : RepeatedSequenceLocs) {
6863       C.initLRU(TRI);
6864 
6865       // If we have a noreturn caller, then we're going to be conservative and
6866       // say that we have to save LR. If we don't have a ret at the end of the
6867       // block, then we can't reason about liveness accurately.
6868       //
6869       // FIXME: We can probably do better than always disabling this in
6870       // noreturn functions by fixing up the liveness info.
6871       bool IsNoReturn =
6872           C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
6873 
6874       // Is LR available? If so, we don't need a save.
6875       if (C.LRU.available(AArch64::LR) && !IsNoReturn) {
6876         NumBytesNoStackCalls += 4;
6877         C.setCallInfo(MachineOutlinerNoLRSave, 4);
6878         CandidatesWithoutStackFixups.push_back(C);
6879       }
6880 
6881       // Is an unused register available? If so, we won't modify the stack, so
6882       // we can outline with the same frame type as those that don't save LR.
6883       else if (findRegisterToSaveLRTo(C)) {
6884         NumBytesNoStackCalls += 12;
6885         C.setCallInfo(MachineOutlinerRegSave, 12);
6886         CandidatesWithoutStackFixups.push_back(C);
6887       }
6888 
6889       // Is SP used in the sequence at all? If not, we don't have to modify
6890       // the stack, so we are guaranteed to get the same frame.
6891       else if (C.UsedInSequence.available(AArch64::SP)) {
6892         NumBytesNoStackCalls += 12;
6893         C.setCallInfo(MachineOutlinerDefault, 12);
6894         CandidatesWithoutStackFixups.push_back(C);
6895       }
6896 
6897       // If we outline this, we need to modify the stack. Pretend we don't
6898       // outline this by saving all of its bytes.
6899       else {
6900         NumBytesNoStackCalls += SequenceSize;
6901       }
6902     }
6903 
6904     // If there are no places where we have to save LR, then note that we
6905     // don't have to update the stack. Otherwise, give every candidate the
6906     // default call type, as long as it's safe to do so.
6907     if (!AllStackInstrsSafe ||
6908         NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
6909       RepeatedSequenceLocs = CandidatesWithoutStackFixups;
6910       FrameID = MachineOutlinerNoLRSave;
6911     } else {
6912       SetCandidateCallInfo(MachineOutlinerDefault, 12);
6913 
6914       // Bugzilla ID: 46767
6915       // TODO: Check if fixing up the stack more than once is safe so we can
6916       // outline these.
6917       //
6918       // An outline resulting in a caller that requires stack fixups at the
6919       // callsite to a callee that also requires stack fixups can happen when
6920       // there are no available registers at the candidate callsite for a
6921       // candidate that itself also has calls.
6922       //
6923       // In other words if function_containing_sequence in the following pseudo
6924       // assembly requires that we save LR at the point of the call, but there
6925       // are no available registers: in this case we save using SP and as a
6926       // result the SP offsets requires stack fixups by multiples of 16.
6927       //
6928       // function_containing_sequence:
6929       //   ...
6930       //   save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
6931       //   call OUTLINED_FUNCTION_N
6932       //   restore LR from SP
6933       //   ...
6934       //
6935       // OUTLINED_FUNCTION_N:
6936       //   save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
6937       //   ...
6938       //   bl foo
6939       //   restore LR from SP
6940       //   ret
6941       //
6942       // Because the code to handle more than one stack fixup does not
6943       // currently have the proper checks for legality, these cases will assert
6944       // in the AArch64 MachineOutliner. This is because the code to do this
6945       // needs more hardening, testing, better checks that generated code is
6946       // legal, etc and because it is only verified to handle a single pass of
6947       // stack fixup.
6948       //
6949       // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
6950       // these cases until they are known to be handled. Bugzilla 46767 is
6951       // referenced in comments at the assert site.
6952       //
6953       // To avoid asserting (or generating non-legal code on noassert builds)
6954       // we remove all candidates which would need more than one stack fixup by
6955       // pruning the cases where the candidate has calls while also having no
6956       // available LR and having no available general purpose registers to copy
6957       // LR to (ie one extra stack save/restore).
6958       //
6959       if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
6960         erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) {
6961           return (std::any_of(
6962                      C.front(), std::next(C.back()),
6963                      [](const MachineInstr &MI) { return MI.isCall(); })) &&
6964                  (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C));
6965         });
6966       }
6967     }
6968 
6969     // If we dropped all of the candidates, bail out here.
6970     if (RepeatedSequenceLocs.size() < 2) {
6971       RepeatedSequenceLocs.clear();
6972       return outliner::OutlinedFunction();
6973     }
6974   }
6975 
6976   // Does every candidate's MBB contain a call? If so, then we might have a call
6977   // in the range.
6978   if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
6979     // Check if the range contains a call. These require a save + restore of the
6980     // link register.
6981     bool ModStackToSaveLR = false;
6982     if (std::any_of(FirstCand.front(), FirstCand.back(),
6983                     [](const MachineInstr &MI) { return MI.isCall(); }))
6984       ModStackToSaveLR = true;
6985 
6986     // Handle the last instruction separately. If this is a tail call, then the
6987     // last instruction is a call. We don't want to save + restore in this case.
6988     // However, it could be possible that the last instruction is a call without
6989     // it being valid to tail call this sequence. We should consider this as
6990     // well.
6991     else if (FrameID != MachineOutlinerThunk &&
6992              FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
6993       ModStackToSaveLR = true;
6994 
6995     if (ModStackToSaveLR) {
6996       // We can't fix up the stack. Bail out.
6997       if (!AllStackInstrsSafe) {
6998         RepeatedSequenceLocs.clear();
6999         return outliner::OutlinedFunction();
7000       }
7001 
7002       // Save + restore LR.
7003       NumBytesToCreateFrame += 8;
7004     }
7005   }
7006 
7007   // If we have CFI instructions, we can only outline if the outlined section
7008   // can be a tail call
7009   if (FrameID != MachineOutlinerTailCall && CFICount > 0)
7010     return outliner::OutlinedFunction();
7011 
7012   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
7013                                     NumBytesToCreateFrame, FrameID);
7014 }
7015 
7016 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
7017     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
7018   const Function &F = MF.getFunction();
7019 
7020   // Can F be deduplicated by the linker? If it can, don't outline from it.
7021   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
7022     return false;
7023 
7024   // Don't outline from functions with section markings; the program could
7025   // expect that all the code is in the named section.
7026   // FIXME: Allow outlining from multiple functions with the same section
7027   // marking.
7028   if (F.hasSection())
7029     return false;
7030 
7031   // Outlining from functions with redzones is unsafe since the outliner may
7032   // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
7033   // outline from it.
7034   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
7035   if (!AFI || AFI->hasRedZone().getValueOr(true))
7036     return false;
7037 
7038   // FIXME: Teach the outliner to generate/handle Windows unwind info.
7039   if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
7040     return false;
7041 
7042   // It's safe to outline from MF.
7043   return true;
7044 }
7045 
7046 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
7047                                               unsigned &Flags) const {
7048   if (!TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags))
7049     return false;
7050   // Check if LR is available through all of the MBB. If it's not, then set
7051   // a flag.
7052   assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
7053          "Suitable Machine Function for outlining must track liveness");
7054   LiveRegUnits LRU(getRegisterInfo());
7055 
7056   std::for_each(MBB.rbegin(), MBB.rend(),
7057                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
7058 
7059   // Check if each of the unsafe registers are available...
7060   bool W16AvailableInBlock = LRU.available(AArch64::W16);
7061   bool W17AvailableInBlock = LRU.available(AArch64::W17);
7062   bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
7063 
7064   // If all of these are dead (and not live out), we know we don't have to check
7065   // them later.
7066   if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
7067     Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
7068 
7069   // Now, add the live outs to the set.
7070   LRU.addLiveOuts(MBB);
7071 
7072   // If any of these registers is available in the MBB, but also a live out of
7073   // the block, then we know outlining is unsafe.
7074   if (W16AvailableInBlock && !LRU.available(AArch64::W16))
7075     return false;
7076   if (W17AvailableInBlock && !LRU.available(AArch64::W17))
7077     return false;
7078   if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
7079     return false;
7080 
7081   // Check if there's a call inside this MachineBasicBlock. If there is, then
7082   // set a flag.
7083   if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
7084     Flags |= MachineOutlinerMBBFlags::HasCalls;
7085 
7086   MachineFunction *MF = MBB.getParent();
7087 
7088   // In the event that we outline, we may have to save LR. If there is an
7089   // available register in the MBB, then we'll always save LR there. Check if
7090   // this is true.
7091   bool CanSaveLR = false;
7092   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
7093       MF->getSubtarget().getRegisterInfo());
7094 
7095   // Check if there is an available register across the sequence that we can
7096   // use.
7097   for (unsigned Reg : AArch64::GPR64RegClass) {
7098     if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
7099         Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
7100       CanSaveLR = true;
7101       break;
7102     }
7103   }
7104 
7105   // Check if we have a register we can save LR to, and if LR was used
7106   // somewhere. If both of those things are true, then we need to evaluate the
7107   // safety of outlining stack instructions later.
7108   if (!CanSaveLR && !LRU.available(AArch64::LR))
7109     Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
7110 
7111   return true;
7112 }
7113 
7114 outliner::InstrType
7115 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
7116                                    unsigned Flags) const {
7117   MachineInstr &MI = *MIT;
7118   MachineBasicBlock *MBB = MI.getParent();
7119   MachineFunction *MF = MBB->getParent();
7120   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
7121 
7122   // Don't outline anything used for return address signing. The outlined
7123   // function will get signed later if needed
7124   switch (MI.getOpcode()) {
7125   case AArch64::PACIASP:
7126   case AArch64::PACIBSP:
7127   case AArch64::AUTIASP:
7128   case AArch64::AUTIBSP:
7129   case AArch64::RETAA:
7130   case AArch64::RETAB:
7131   case AArch64::EMITBKEY:
7132     return outliner::InstrType::Illegal;
7133   }
7134 
7135   // Don't outline LOHs.
7136   if (FuncInfo->getLOHRelated().count(&MI))
7137     return outliner::InstrType::Illegal;
7138 
7139   // We can only outline these if we will tail call the outlined function, or
7140   // fix up the CFI offsets. Currently, CFI instructions are outlined only if
7141   // in a tail call.
7142   //
7143   // FIXME: If the proper fixups for the offset are implemented, this should be
7144   // possible.
7145   if (MI.isCFIInstruction())
7146     return outliner::InstrType::Legal;
7147 
7148   // Don't allow debug values to impact outlining type.
7149   if (MI.isDebugInstr() || MI.isIndirectDebugValue())
7150     return outliner::InstrType::Invisible;
7151 
7152   // At this point, KILL instructions don't really tell us much so we can go
7153   // ahead and skip over them.
7154   if (MI.isKill())
7155     return outliner::InstrType::Invisible;
7156 
7157   // Is this a terminator for a basic block?
7158   if (MI.isTerminator()) {
7159 
7160     // Is this the end of a function?
7161     if (MI.getParent()->succ_empty())
7162       return outliner::InstrType::Legal;
7163 
7164     // It's not, so don't outline it.
7165     return outliner::InstrType::Illegal;
7166   }
7167 
7168   // Make sure none of the operands are un-outlinable.
7169   for (const MachineOperand &MOP : MI.operands()) {
7170     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
7171         MOP.isTargetIndex())
7172       return outliner::InstrType::Illegal;
7173 
7174     // If it uses LR or W30 explicitly, then don't touch it.
7175     if (MOP.isReg() && !MOP.isImplicit() &&
7176         (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
7177       return outliner::InstrType::Illegal;
7178   }
7179 
7180   // Special cases for instructions that can always be outlined, but will fail
7181   // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
7182   // be outlined because they don't require a *specific* value to be in LR.
7183   if (MI.getOpcode() == AArch64::ADRP)
7184     return outliner::InstrType::Legal;
7185 
7186   // If MI is a call we might be able to outline it. We don't want to outline
7187   // any calls that rely on the position of items on the stack. When we outline
7188   // something containing a call, we have to emit a save and restore of LR in
7189   // the outlined function. Currently, this always happens by saving LR to the
7190   // stack. Thus, if we outline, say, half the parameters for a function call
7191   // plus the call, then we'll break the callee's expectations for the layout
7192   // of the stack.
7193   //
7194   // FIXME: Allow calls to functions which construct a stack frame, as long
7195   // as they don't access arguments on the stack.
7196   // FIXME: Figure out some way to analyze functions defined in other modules.
7197   // We should be able to compute the memory usage based on the IR calling
7198   // convention, even if we can't see the definition.
7199   if (MI.isCall()) {
7200     // Get the function associated with the call. Look at each operand and find
7201     // the one that represents the callee and get its name.
7202     const Function *Callee = nullptr;
7203     for (const MachineOperand &MOP : MI.operands()) {
7204       if (MOP.isGlobal()) {
7205         Callee = dyn_cast<Function>(MOP.getGlobal());
7206         break;
7207       }
7208     }
7209 
7210     // Never outline calls to mcount.  There isn't any rule that would require
7211     // this, but the Linux kernel's "ftrace" feature depends on it.
7212     if (Callee && Callee->getName() == "\01_mcount")
7213       return outliner::InstrType::Illegal;
7214 
7215     // If we don't know anything about the callee, assume it depends on the
7216     // stack layout of the caller. In that case, it's only legal to outline
7217     // as a tail-call. Explicitly list the call instructions we know about so we
7218     // don't get unexpected results with call pseudo-instructions.
7219     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
7220     if (MI.getOpcode() == AArch64::BLR ||
7221         MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
7222       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
7223 
7224     if (!Callee)
7225       return UnknownCallOutlineType;
7226 
7227     // We have a function we have information about. Check it if it's something
7228     // can safely outline.
7229     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
7230 
7231     // We don't know what's going on with the callee at all. Don't touch it.
7232     if (!CalleeMF)
7233       return UnknownCallOutlineType;
7234 
7235     // Check if we know anything about the callee saves on the function. If we
7236     // don't, then don't touch it, since that implies that we haven't
7237     // computed anything about its stack frame yet.
7238     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
7239     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
7240         MFI.getNumObjects() > 0)
7241       return UnknownCallOutlineType;
7242 
7243     // At this point, we can say that CalleeMF ought to not pass anything on the
7244     // stack. Therefore, we can outline it.
7245     return outliner::InstrType::Legal;
7246   }
7247 
7248   // Don't outline positions.
7249   if (MI.isPosition())
7250     return outliner::InstrType::Illegal;
7251 
7252   // Don't touch the link register or W30.
7253   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
7254       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
7255     return outliner::InstrType::Illegal;
7256 
7257   // Don't outline BTI instructions, because that will prevent the outlining
7258   // site from being indirectly callable.
7259   if (MI.getOpcode() == AArch64::HINT) {
7260     int64_t Imm = MI.getOperand(0).getImm();
7261     if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
7262       return outliner::InstrType::Illegal;
7263   }
7264 
7265   return outliner::InstrType::Legal;
7266 }
7267 
7268 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
7269   for (MachineInstr &MI : MBB) {
7270     const MachineOperand *Base;
7271     unsigned Width;
7272     int64_t Offset;
7273     bool OffsetIsScalable;
7274 
7275     // Is this a load or store with an immediate offset with SP as the base?
7276     if (!MI.mayLoadOrStore() ||
7277         !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
7278                                       &RI) ||
7279         (Base->isReg() && Base->getReg() != AArch64::SP))
7280       continue;
7281 
7282     // It is, so we have to fix it up.
7283     TypeSize Scale(0U, false);
7284     int64_t Dummy1, Dummy2;
7285 
7286     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
7287     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
7288     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
7289     assert(Scale != 0 && "Unexpected opcode!");
7290     assert(!OffsetIsScalable && "Expected offset to be a byte offset");
7291 
7292     // We've pushed the return address to the stack, so add 16 to the offset.
7293     // This is safe, since we already checked if it would overflow when we
7294     // checked if this instruction was legal to outline.
7295     int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize();
7296     StackOffsetOperand.setImm(NewImm);
7297   }
7298 }
7299 
7300 static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
7301                                  bool ShouldSignReturnAddr,
7302                                  bool ShouldSignReturnAddrWithAKey) {
7303   if (ShouldSignReturnAddr) {
7304     MachineBasicBlock::iterator MBBPAC = MBB.begin();
7305     MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator();
7306     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
7307     const TargetInstrInfo *TII = Subtarget.getInstrInfo();
7308     DebugLoc DL;
7309 
7310     if (MBBAUT != MBB.end())
7311       DL = MBBAUT->getDebugLoc();
7312 
7313     // At the very beginning of the basic block we insert the following
7314     // depending on the key type
7315     //
7316     // a_key:                   b_key:
7317     //    PACIASP                   EMITBKEY
7318     //    CFI_INSTRUCTION           PACIBSP
7319     //                              CFI_INSTRUCTION
7320     unsigned PACI;
7321     if (ShouldSignReturnAddrWithAKey) {
7322       PACI = Subtarget.hasPAuth() ? AArch64::PACIA : AArch64::PACIASP;
7323     } else {
7324       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
7325           .setMIFlag(MachineInstr::FrameSetup);
7326       PACI = Subtarget.hasPAuth() ? AArch64::PACIB : AArch64::PACIBSP;
7327     }
7328 
7329     auto MI = BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(PACI));
7330     if (Subtarget.hasPAuth())
7331       MI.addReg(AArch64::LR, RegState::Define)
7332           .addReg(AArch64::LR)
7333           .addReg(AArch64::SP, RegState::InternalRead);
7334     MI.setMIFlag(MachineInstr::FrameSetup);
7335 
7336     unsigned CFIIndex =
7337         MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
7338     BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
7339         .addCFIIndex(CFIIndex)
7340         .setMIFlags(MachineInstr::FrameSetup);
7341 
7342     // If v8.3a features are available we can replace a RET instruction by
7343     // RETAA or RETAB and omit the AUT instructions
7344     if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
7345         MBBAUT->getOpcode() == AArch64::RET) {
7346       BuildMI(MBB, MBBAUT, DL,
7347               TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
7348                                                     : AArch64::RETAB))
7349           .copyImplicitOps(*MBBAUT);
7350       MBB.erase(MBBAUT);
7351     } else {
7352       BuildMI(MBB, MBBAUT, DL,
7353               TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
7354                                                     : AArch64::AUTIBSP))
7355           .setMIFlag(MachineInstr::FrameDestroy);
7356     }
7357   }
7358 }
7359 
7360 void AArch64InstrInfo::buildOutlinedFrame(
7361     MachineBasicBlock &MBB, MachineFunction &MF,
7362     const outliner::OutlinedFunction &OF) const {
7363 
7364   AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
7365 
7366   if (OF.FrameConstructionID == MachineOutlinerTailCall)
7367     FI->setOutliningStyle("Tail Call");
7368   else if (OF.FrameConstructionID == MachineOutlinerThunk) {
7369     // For thunk outlining, rewrite the last instruction from a call to a
7370     // tail-call.
7371     MachineInstr *Call = &*--MBB.instr_end();
7372     unsigned TailOpcode;
7373     if (Call->getOpcode() == AArch64::BL) {
7374       TailOpcode = AArch64::TCRETURNdi;
7375     } else {
7376       assert(Call->getOpcode() == AArch64::BLR ||
7377              Call->getOpcode() == AArch64::BLRNoIP);
7378       TailOpcode = AArch64::TCRETURNriALL;
7379     }
7380     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
7381                            .add(Call->getOperand(0))
7382                            .addImm(0);
7383     MBB.insert(MBB.end(), TC);
7384     Call->eraseFromParent();
7385 
7386     FI->setOutliningStyle("Thunk");
7387   }
7388 
7389   bool IsLeafFunction = true;
7390 
7391   // Is there a call in the outlined range?
7392   auto IsNonTailCall = [](const MachineInstr &MI) {
7393     return MI.isCall() && !MI.isReturn();
7394   };
7395 
7396   if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
7397     // Fix up the instructions in the range, since we're going to modify the
7398     // stack.
7399 
7400     // Bugzilla ID: 46767
7401     // TODO: Check if fixing up twice is safe so we can outline these.
7402     assert(OF.FrameConstructionID != MachineOutlinerDefault &&
7403            "Can only fix up stack references once");
7404     fixupPostOutline(MBB);
7405 
7406     IsLeafFunction = false;
7407 
7408     // LR has to be a live in so that we can save it.
7409     if (!MBB.isLiveIn(AArch64::LR))
7410       MBB.addLiveIn(AArch64::LR);
7411 
7412     MachineBasicBlock::iterator It = MBB.begin();
7413     MachineBasicBlock::iterator Et = MBB.end();
7414 
7415     if (OF.FrameConstructionID == MachineOutlinerTailCall ||
7416         OF.FrameConstructionID == MachineOutlinerThunk)
7417       Et = std::prev(MBB.end());
7418 
7419     // Insert a save before the outlined region
7420     MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
7421                                 .addReg(AArch64::SP, RegState::Define)
7422                                 .addReg(AArch64::LR)
7423                                 .addReg(AArch64::SP)
7424                                 .addImm(-16);
7425     It = MBB.insert(It, STRXpre);
7426 
7427     const TargetSubtargetInfo &STI = MF.getSubtarget();
7428     const MCRegisterInfo *MRI = STI.getRegisterInfo();
7429     unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
7430 
7431     // Add a CFI saying the stack was moved 16 B down.
7432     int64_t StackPosEntry =
7433         MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
7434     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
7435         .addCFIIndex(StackPosEntry)
7436         .setMIFlags(MachineInstr::FrameSetup);
7437 
7438     // Add a CFI saying that the LR that we want to find is now 16 B higher than
7439     // before.
7440     int64_t LRPosEntry =
7441         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
7442     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
7443         .addCFIIndex(LRPosEntry)
7444         .setMIFlags(MachineInstr::FrameSetup);
7445 
7446     // Insert a restore before the terminator for the function.
7447     MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
7448                                  .addReg(AArch64::SP, RegState::Define)
7449                                  .addReg(AArch64::LR, RegState::Define)
7450                                  .addReg(AArch64::SP)
7451                                  .addImm(16);
7452     Et = MBB.insert(Et, LDRXpost);
7453   }
7454 
7455   // If a bunch of candidates reach this point they must agree on their return
7456   // address signing. It is therefore enough to just consider the signing
7457   // behaviour of one of them
7458   const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>();
7459   bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction);
7460 
7461   // a_key is the default
7462   bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey();
7463 
7464   // If this is a tail call outlined function, then there's already a return.
7465   if (OF.FrameConstructionID == MachineOutlinerTailCall ||
7466       OF.FrameConstructionID == MachineOutlinerThunk) {
7467     signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
7468                          ShouldSignReturnAddrWithAKey);
7469     return;
7470   }
7471 
7472   // It's not a tail call, so we have to insert the return ourselves.
7473 
7474   // LR has to be a live in so that we can return to it.
7475   if (!MBB.isLiveIn(AArch64::LR))
7476     MBB.addLiveIn(AArch64::LR);
7477 
7478   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
7479                           .addReg(AArch64::LR);
7480   MBB.insert(MBB.end(), ret);
7481 
7482   signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
7483                        ShouldSignReturnAddrWithAKey);
7484 
7485   FI->setOutliningStyle("Function");
7486 
7487   // Did we have to modify the stack by saving the link register?
7488   if (OF.FrameConstructionID != MachineOutlinerDefault)
7489     return;
7490 
7491   // We modified the stack.
7492   // Walk over the basic block and fix up all the stack accesses.
7493   fixupPostOutline(MBB);
7494 }
7495 
7496 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
7497     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
7498     MachineFunction &MF, const outliner::Candidate &C) const {
7499 
7500   // Are we tail calling?
7501   if (C.CallConstructionID == MachineOutlinerTailCall) {
7502     // If yes, then we can just branch to the label.
7503     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
7504                             .addGlobalAddress(M.getNamedValue(MF.getName()))
7505                             .addImm(0));
7506     return It;
7507   }
7508 
7509   // Are we saving the link register?
7510   if (C.CallConstructionID == MachineOutlinerNoLRSave ||
7511       C.CallConstructionID == MachineOutlinerThunk) {
7512     // No, so just insert the call.
7513     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
7514                             .addGlobalAddress(M.getNamedValue(MF.getName())));
7515     return It;
7516   }
7517 
7518   // We want to return the spot where we inserted the call.
7519   MachineBasicBlock::iterator CallPt;
7520 
7521   // Instructions for saving and restoring LR around the call instruction we're
7522   // going to insert.
7523   MachineInstr *Save;
7524   MachineInstr *Restore;
7525   // Can we save to a register?
7526   if (C.CallConstructionID == MachineOutlinerRegSave) {
7527     // FIXME: This logic should be sunk into a target-specific interface so that
7528     // we don't have to recompute the register.
7529     unsigned Reg = findRegisterToSaveLRTo(C);
7530     assert(Reg != 0 && "No callee-saved register available?");
7531 
7532     // LR has to be a live in so that we can save it.
7533     if (!MBB.isLiveIn(AArch64::LR))
7534       MBB.addLiveIn(AArch64::LR);
7535 
7536     // Save and restore LR from Reg.
7537     Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
7538                .addReg(AArch64::XZR)
7539                .addReg(AArch64::LR)
7540                .addImm(0);
7541     Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
7542                 .addReg(AArch64::XZR)
7543                 .addReg(Reg)
7544                 .addImm(0);
7545   } else {
7546     // We have the default case. Save and restore from SP.
7547     Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
7548                .addReg(AArch64::SP, RegState::Define)
7549                .addReg(AArch64::LR)
7550                .addReg(AArch64::SP)
7551                .addImm(-16);
7552     Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
7553                   .addReg(AArch64::SP, RegState::Define)
7554                   .addReg(AArch64::LR, RegState::Define)
7555                   .addReg(AArch64::SP)
7556                   .addImm(16);
7557   }
7558 
7559   It = MBB.insert(It, Save);
7560   It++;
7561 
7562   // Insert the call.
7563   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
7564                           .addGlobalAddress(M.getNamedValue(MF.getName())));
7565   CallPt = It;
7566   It++;
7567 
7568   It = MBB.insert(It, Restore);
7569   return CallPt;
7570 }
7571 
7572 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
7573   MachineFunction &MF) const {
7574   return MF.getFunction().hasMinSize();
7575 }
7576 
7577 Optional<DestSourcePair>
7578 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
7579 
7580   // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
7581   // and zero immediate operands used as an alias for mov instruction.
7582   if (MI.getOpcode() == AArch64::ORRWrs &&
7583       MI.getOperand(1).getReg() == AArch64::WZR &&
7584       MI.getOperand(3).getImm() == 0x0) {
7585     return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
7586   }
7587 
7588   if (MI.getOpcode() == AArch64::ORRXrs &&
7589       MI.getOperand(1).getReg() == AArch64::XZR &&
7590       MI.getOperand(3).getImm() == 0x0) {
7591     return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
7592   }
7593 
7594   return None;
7595 }
7596 
7597 Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
7598                                                       Register Reg) const {
7599   int Sign = 1;
7600   int64_t Offset = 0;
7601 
7602   // TODO: Handle cases where Reg is a super- or sub-register of the
7603   // destination register.
7604   const MachineOperand &Op0 = MI.getOperand(0);
7605   if (!Op0.isReg() || Reg != Op0.getReg())
7606     return None;
7607 
7608   switch (MI.getOpcode()) {
7609   default:
7610     return None;
7611   case AArch64::SUBWri:
7612   case AArch64::SUBXri:
7613   case AArch64::SUBSWri:
7614   case AArch64::SUBSXri:
7615     Sign *= -1;
7616     LLVM_FALLTHROUGH;
7617   case AArch64::ADDSWri:
7618   case AArch64::ADDSXri:
7619   case AArch64::ADDWri:
7620   case AArch64::ADDXri: {
7621     // TODO: Third operand can be global address (usually some string).
7622     if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
7623         !MI.getOperand(2).isImm())
7624       return None;
7625     int Shift = MI.getOperand(3).getImm();
7626     assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
7627     Offset = Sign * (MI.getOperand(2).getImm() << Shift);
7628   }
7629   }
7630   return RegImmPair{MI.getOperand(1).getReg(), Offset};
7631 }
7632 
7633 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
7634 /// the destination register then, if possible, describe the value in terms of
7635 /// the source register.
7636 static Optional<ParamLoadedValue>
7637 describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
7638                        const TargetInstrInfo *TII,
7639                        const TargetRegisterInfo *TRI) {
7640   auto DestSrc = TII->isCopyInstr(MI);
7641   if (!DestSrc)
7642     return None;
7643 
7644   Register DestReg = DestSrc->Destination->getReg();
7645   Register SrcReg = DestSrc->Source->getReg();
7646 
7647   auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
7648 
7649   // If the described register is the destination, just return the source.
7650   if (DestReg == DescribedReg)
7651     return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
7652 
7653   // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
7654   if (MI.getOpcode() == AArch64::ORRWrs &&
7655       TRI->isSuperRegister(DestReg, DescribedReg))
7656     return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
7657 
7658   // We may need to describe the lower part of a ORRXrs move.
7659   if (MI.getOpcode() == AArch64::ORRXrs &&
7660       TRI->isSubRegister(DestReg, DescribedReg)) {
7661     Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
7662     return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
7663   }
7664 
7665   assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
7666          "Unhandled ORR[XW]rs copy case");
7667 
7668   return None;
7669 }
7670 
7671 Optional<ParamLoadedValue>
7672 AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
7673                                       Register Reg) const {
7674   const MachineFunction *MF = MI.getMF();
7675   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
7676   switch (MI.getOpcode()) {
7677   case AArch64::MOVZWi:
7678   case AArch64::MOVZXi: {
7679     // MOVZWi may be used for producing zero-extended 32-bit immediates in
7680     // 64-bit parameters, so we need to consider super-registers.
7681     if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
7682       return None;
7683 
7684     if (!MI.getOperand(1).isImm())
7685       return None;
7686     int64_t Immediate = MI.getOperand(1).getImm();
7687     int Shift = MI.getOperand(2).getImm();
7688     return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
7689                             nullptr);
7690   }
7691   case AArch64::ORRWrs:
7692   case AArch64::ORRXrs:
7693     return describeORRLoadedValue(MI, Reg, this, TRI);
7694   }
7695 
7696   return TargetInstrInfo::describeLoadedValue(MI, Reg);
7697 }
7698 
7699 bool AArch64InstrInfo::isExtendLikelyToBeFolded(
7700     MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
7701   assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
7702          ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
7703          ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
7704 
7705   // Anyexts are nops.
7706   if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
7707     return true;
7708 
7709   Register DefReg = ExtMI.getOperand(0).getReg();
7710   if (!MRI.hasOneNonDBGUse(DefReg))
7711     return false;
7712 
7713   // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
7714   // addressing mode.
7715   auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
7716   return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
7717 }
7718 
7719 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
7720   return get(Opc).TSFlags & AArch64::ElementSizeMask;
7721 }
7722 
7723 bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
7724   return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
7725 }
7726 
7727 bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
7728   return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
7729 }
7730 
7731 unsigned int
7732 AArch64InstrInfo::getTailDuplicateSize(CodeGenOpt::Level OptLevel) const {
7733   return OptLevel >= CodeGenOpt::Aggressive ? 6 : 2;
7734 }
7735 
7736 unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
7737   if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
7738     return AArch64::BLRNoIP;
7739   else
7740     return AArch64::BLR;
7741 }
7742 
7743 #define GET_INSTRINFO_HELPERS
7744 #define GET_INSTRMAP_INFO
7745 #include "AArch64GenInstrInfo.inc"
7746