xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64GlobalISelUtils.h"
15 #include "AArch64InstrInfo.h"
16 #include "AArch64MachineFunctionInfo.h"
17 #include "AArch64RegisterBankInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "AArch64TargetMachine.h"
21 #include "AArch64GlobalISelUtils.h"
22 #include "MCTargetDesc/AArch64AddressingModes.h"
23 #include "MCTargetDesc/AArch64MCTargetDesc.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
26 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
27 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
28 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
29 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineConstantPool.h"
32 #include "llvm/CodeGen/MachineFunction.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineOperand.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/TargetOpcodes.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DerivedTypes.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/PatternMatch.h"
43 #include "llvm/IR/Type.h"
44 #include "llvm/IR/IntrinsicsAArch64.h"
45 #include "llvm/Pass.h"
46 #include "llvm/Support/Debug.h"
47 #include "llvm/Support/raw_ostream.h"
48 
49 #define DEBUG_TYPE "aarch64-isel"
50 
51 using namespace llvm;
52 using namespace MIPatternMatch;
53 using namespace AArch64GISelUtils;
54 
55 namespace llvm {
56 class BlockFrequencyInfo;
57 class ProfileSummaryInfo;
58 }
59 
60 namespace {
61 
62 #define GET_GLOBALISEL_PREDICATE_BITSET
63 #include "AArch64GenGlobalISel.inc"
64 #undef GET_GLOBALISEL_PREDICATE_BITSET
65 
66 class AArch64InstructionSelector : public InstructionSelector {
67 public:
68   AArch64InstructionSelector(const AArch64TargetMachine &TM,
69                              const AArch64Subtarget &STI,
70                              const AArch64RegisterBankInfo &RBI);
71 
72   bool select(MachineInstr &I) override;
73   static const char *getName() { return DEBUG_TYPE; }
74 
75   void setupMF(MachineFunction &MF, GISelKnownBits *KB,
76                CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI,
77                BlockFrequencyInfo *BFI) override {
78     InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI);
79     MIB.setMF(MF);
80 
81     // hasFnAttribute() is expensive to call on every BRCOND selection, so
82     // cache it here for each run of the selector.
83     ProduceNonFlagSettingCondBr =
84         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
85     MFReturnAddr = Register();
86 
87     processPHIs(MF);
88   }
89 
90 private:
91   /// tblgen-erated 'select' implementation, used as the initial selector for
92   /// the patterns that don't require complex C++.
93   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
94 
95   // A lowering phase that runs before any selection attempts.
96   // Returns true if the instruction was modified.
97   bool preISelLower(MachineInstr &I);
98 
99   // An early selection function that runs before the selectImpl() call.
100   bool earlySelect(MachineInstr &I);
101 
102   // Do some preprocessing of G_PHIs before we begin selection.
103   void processPHIs(MachineFunction &MF);
104 
105   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI);
106 
107   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
108   bool contractCrossBankCopyIntoStore(MachineInstr &I,
109                                       MachineRegisterInfo &MRI);
110 
111   bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
112 
113   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
114                           MachineRegisterInfo &MRI) const;
115   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
116                            MachineRegisterInfo &MRI) const;
117 
118   ///@{
119   /// Helper functions for selectCompareBranch.
120   bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp,
121                                     MachineIRBuilder &MIB) const;
122   bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
123                                     MachineIRBuilder &MIB) const;
124   bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp,
125                                     MachineIRBuilder &MIB) const;
126   bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert,
127                                   MachineBasicBlock *DstMBB,
128                                   MachineIRBuilder &MIB) const;
129   ///@}
130 
131   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
132                            MachineRegisterInfo &MRI);
133 
134   bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI);
135   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI);
136 
137   // Helper to generate an equivalent of scalar_to_vector into a new register,
138   // returned via 'Dst'.
139   MachineInstr *emitScalarToVector(unsigned EltSize,
140                                    const TargetRegisterClass *DstRC,
141                                    Register Scalar,
142                                    MachineIRBuilder &MIRBuilder) const;
143 
144   /// Emit a lane insert into \p DstReg, or a new vector register if None is
145   /// provided.
146   ///
147   /// The lane inserted into is defined by \p LaneIdx. The vector source
148   /// register is given by \p SrcReg. The register containing the element is
149   /// given by \p EltReg.
150   MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
151                                Register EltReg, unsigned LaneIdx,
152                                const RegisterBank &RB,
153                                MachineIRBuilder &MIRBuilder) const;
154 
155   /// Emit a sequence of instructions representing a constant \p CV for a
156   /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.)
157   ///
158   /// \returns the last instruction in the sequence on success, and nullptr
159   /// otherwise.
160   MachineInstr *emitConstantVector(Register Dst, Constant *CV,
161                                    MachineIRBuilder &MIRBuilder,
162                                    MachineRegisterInfo &MRI);
163 
164   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI);
165   bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
166                               MachineRegisterInfo &MRI);
167   /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a
168   /// SUBREG_TO_REG.
169   bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI);
170   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI);
171   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
172   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI);
173 
174   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI);
175   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI);
176   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI);
177   bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI);
178 
179   /// Helper function to select vector load intrinsics like
180   /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc.
181   /// \p Opc is the opcode that the selected instruction should use.
182   /// \p NumVecs is the number of vector destinations for the instruction.
183   /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction.
184   bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs,
185                                  MachineInstr &I);
186   bool selectIntrinsicWithSideEffects(MachineInstr &I,
187                                       MachineRegisterInfo &MRI);
188   bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
189   bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI);
190   bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
191   bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
192   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI);
193   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI);
194   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI);
195   bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI);
196   bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI);
197 
198   unsigned emitConstantPoolEntry(const Constant *CPVal,
199                                  MachineFunction &MF) const;
200   MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
201                                          MachineIRBuilder &MIRBuilder) const;
202 
203   // Emit a vector concat operation.
204   MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
205                                  Register Op2,
206                                  MachineIRBuilder &MIRBuilder) const;
207 
208   // Emit an integer compare between LHS and RHS, which checks for Predicate.
209   MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
210                                    MachineOperand &Predicate,
211                                    MachineIRBuilder &MIRBuilder) const;
212 
213   /// Emit a floating point comparison between \p LHS and \p RHS.
214   /// \p Pred if given is the intended predicate to use.
215   MachineInstr *emitFPCompare(Register LHS, Register RHS,
216                               MachineIRBuilder &MIRBuilder,
217                               Optional<CmpInst::Predicate> = None) const;
218 
219   MachineInstr *emitInstr(unsigned Opcode,
220                           std::initializer_list<llvm::DstOp> DstOps,
221                           std::initializer_list<llvm::SrcOp> SrcOps,
222                           MachineIRBuilder &MIRBuilder,
223                           const ComplexRendererFns &RenderFns = None) const;
224   /// Helper function to emit an add or sub instruction.
225   ///
226   /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above
227   /// in a specific order.
228   ///
229   /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode.
230   ///
231   /// \code
232   ///   const std::array<std::array<unsigned, 2>, 4> Table {
233   ///    {{AArch64::ADDXri, AArch64::ADDWri},
234   ///     {AArch64::ADDXrs, AArch64::ADDWrs},
235   ///     {AArch64::ADDXrr, AArch64::ADDWrr},
236   ///     {AArch64::SUBXri, AArch64::SUBWri},
237   ///     {AArch64::ADDXrx, AArch64::ADDWrx}}};
238   /// \endcode
239   ///
240   /// Each row in the table corresponds to a different addressing mode. Each
241   /// column corresponds to a different register size.
242   ///
243   /// \attention Rows must be structured as follows:
244   ///   - Row 0: The ri opcode variants
245   ///   - Row 1: The rs opcode variants
246   ///   - Row 2: The rr opcode variants
247   ///   - Row 3: The ri opcode variants for negative immediates
248   ///   - Row 4: The rx opcode variants
249   ///
250   /// \attention Columns must be structured as follows:
251   ///   - Column 0: The 64-bit opcode variants
252   ///   - Column 1: The 32-bit opcode variants
253   ///
254   /// \p Dst is the destination register of the binop to emit.
255   /// \p LHS is the left-hand operand of the binop to emit.
256   /// \p RHS is the right-hand operand of the binop to emit.
257   MachineInstr *emitAddSub(
258       const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
259       Register Dst, MachineOperand &LHS, MachineOperand &RHS,
260       MachineIRBuilder &MIRBuilder) const;
261   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS,
262                         MachineOperand &RHS,
263                         MachineIRBuilder &MIRBuilder) const;
264   MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
265                          MachineIRBuilder &MIRBuilder) const;
266   MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS,
267                          MachineIRBuilder &MIRBuilder) const;
268   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
269                         MachineIRBuilder &MIRBuilder) const;
270   MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS,
271                         MachineIRBuilder &MIRBuilder) const;
272   MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS,
273                            AArch64CC::CondCode CC,
274                            MachineIRBuilder &MIRBuilder) const;
275   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
276                                      const RegisterBank &DstRB, LLT ScalarTy,
277                                      Register VecReg, unsigned LaneIdx,
278                                      MachineIRBuilder &MIRBuilder) const;
279   MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2,
280                           AArch64CC::CondCode Pred,
281                           MachineIRBuilder &MIRBuilder) const;
282   /// Emit a CSet for a FP compare.
283   ///
284   /// \p Dst is expected to be a 32-bit scalar register.
285   MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred,
286                                 MachineIRBuilder &MIRBuilder) const;
287 
288   /// Emit the overflow op for \p Opcode.
289   ///
290   /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO,
291   /// G_USUBO, etc.
292   std::pair<MachineInstr *, AArch64CC::CondCode>
293   emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS,
294                  MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const;
295 
296   /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
297   /// \p IsNegative is true if the test should be "not zero".
298   /// This will also optimize the test bit instruction when possible.
299   MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
300                             MachineBasicBlock *DstMBB,
301                             MachineIRBuilder &MIB) const;
302 
303   /// Emit a CB(N)Z instruction which branches to \p DestMBB.
304   MachineInstr *emitCBZ(Register CompareReg, bool IsNegative,
305                         MachineBasicBlock *DestMBB,
306                         MachineIRBuilder &MIB) const;
307 
308   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
309   // We use these manually instead of using the importer since it doesn't
310   // support SDNodeXForm.
311   ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
312   ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
313   ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
314   ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
315 
316   ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
317   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
318   ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
319 
320   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
321                                             unsigned Size) const;
322 
323   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
324     return selectAddrModeUnscaled(Root, 1);
325   }
326   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
327     return selectAddrModeUnscaled(Root, 2);
328   }
329   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
330     return selectAddrModeUnscaled(Root, 4);
331   }
332   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
333     return selectAddrModeUnscaled(Root, 8);
334   }
335   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
336     return selectAddrModeUnscaled(Root, 16);
337   }
338 
339   /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
340   /// from complex pattern matchers like selectAddrModeIndexed().
341   ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
342                                           MachineRegisterInfo &MRI) const;
343 
344   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
345                                            unsigned Size) const;
346   template <int Width>
347   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
348     return selectAddrModeIndexed(Root, Width / 8);
349   }
350 
351   bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
352                                      const MachineRegisterInfo &MRI) const;
353   ComplexRendererFns
354   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
355                                   unsigned SizeInBytes) const;
356 
357   /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
358   /// or not a shift + extend should be folded into an addressing mode. Returns
359   /// None when this is not profitable or possible.
360   ComplexRendererFns
361   selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
362                     MachineOperand &Offset, unsigned SizeInBytes,
363                     bool WantsExt) const;
364   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
365   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
366                                        unsigned SizeInBytes) const;
367   template <int Width>
368   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
369     return selectAddrModeXRO(Root, Width / 8);
370   }
371 
372   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
373                                        unsigned SizeInBytes) const;
374   template <int Width>
375   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
376     return selectAddrModeWRO(Root, Width / 8);
377   }
378 
379   ComplexRendererFns selectShiftedRegister(MachineOperand &Root,
380                                            bool AllowROR = false) const;
381 
382   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
383     return selectShiftedRegister(Root);
384   }
385 
386   ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
387     return selectShiftedRegister(Root, true);
388   }
389 
390   /// Given an extend instruction, determine the correct shift-extend type for
391   /// that instruction.
392   ///
393   /// If the instruction is going to be used in a load or store, pass
394   /// \p IsLoadStore = true.
395   AArch64_AM::ShiftExtendType
396   getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
397                        bool IsLoadStore = false) const;
398 
399   /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
400   ///
401   /// \returns Either \p Reg if no change was necessary, or the new register
402   /// created by moving \p Reg.
403   ///
404   /// Note: This uses emitCopy right now.
405   Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
406                               MachineIRBuilder &MIB) const;
407 
408   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
409 
410   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
411                       int OpIdx = -1) const;
412   void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
413                           int OpIdx = -1) const;
414   void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
415                           int OpIdx = -1) const;
416   void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI,
417                      int OpIdx = -1) const;
418   void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI,
419                      int OpIdx = -1) const;
420   void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI,
421                      int OpIdx = -1) const;
422 
423   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
424   void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags);
425 
426   // Optimization methods.
427   bool tryOptSelect(MachineInstr &MI);
428   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
429                                       MachineOperand &Predicate,
430                                       MachineIRBuilder &MIRBuilder) const;
431 
432   /// Return true if \p MI is a load or store of \p NumBytes bytes.
433   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
434 
435   /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
436   /// register zeroed out. In other words, the result of MI has been explicitly
437   /// zero extended.
438   bool isDef32(const MachineInstr &MI) const;
439 
440   const AArch64TargetMachine &TM;
441   const AArch64Subtarget &STI;
442   const AArch64InstrInfo &TII;
443   const AArch64RegisterInfo &TRI;
444   const AArch64RegisterBankInfo &RBI;
445 
446   bool ProduceNonFlagSettingCondBr = false;
447 
448   // Some cached values used during selection.
449   // We use LR as a live-in register, and we keep track of it here as it can be
450   // clobbered by calls.
451   Register MFReturnAddr;
452 
453   MachineIRBuilder MIB;
454 
455 #define GET_GLOBALISEL_PREDICATES_DECL
456 #include "AArch64GenGlobalISel.inc"
457 #undef GET_GLOBALISEL_PREDICATES_DECL
458 
459 // We declare the temporaries used by selectImpl() in the class to minimize the
460 // cost of constructing placeholder values.
461 #define GET_GLOBALISEL_TEMPORARIES_DECL
462 #include "AArch64GenGlobalISel.inc"
463 #undef GET_GLOBALISEL_TEMPORARIES_DECL
464 };
465 
466 } // end anonymous namespace
467 
468 #define GET_GLOBALISEL_IMPL
469 #include "AArch64GenGlobalISel.inc"
470 #undef GET_GLOBALISEL_IMPL
471 
472 AArch64InstructionSelector::AArch64InstructionSelector(
473     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
474     const AArch64RegisterBankInfo &RBI)
475     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
476       TRI(*STI.getRegisterInfo()), RBI(RBI),
477 #define GET_GLOBALISEL_PREDICATES_INIT
478 #include "AArch64GenGlobalISel.inc"
479 #undef GET_GLOBALISEL_PREDICATES_INIT
480 #define GET_GLOBALISEL_TEMPORARIES_INIT
481 #include "AArch64GenGlobalISel.inc"
482 #undef GET_GLOBALISEL_TEMPORARIES_INIT
483 {
484 }
485 
486 // FIXME: This should be target-independent, inferred from the types declared
487 // for each class in the bank.
488 static const TargetRegisterClass *
489 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
490                          const RegisterBankInfo &RBI,
491                          bool GetAllRegSet = false) {
492   if (RB.getID() == AArch64::GPRRegBankID) {
493     if (Ty.getSizeInBits() <= 32)
494       return GetAllRegSet ? &AArch64::GPR32allRegClass
495                           : &AArch64::GPR32RegClass;
496     if (Ty.getSizeInBits() == 64)
497       return GetAllRegSet ? &AArch64::GPR64allRegClass
498                           : &AArch64::GPR64RegClass;
499     if (Ty.getSizeInBits() == 128)
500       return &AArch64::XSeqPairsClassRegClass;
501     return nullptr;
502   }
503 
504   if (RB.getID() == AArch64::FPRRegBankID) {
505     switch (Ty.getSizeInBits()) {
506     case 8:
507       return &AArch64::FPR8RegClass;
508     case 16:
509       return &AArch64::FPR16RegClass;
510     case 32:
511       return &AArch64::FPR32RegClass;
512     case 64:
513       return &AArch64::FPR64RegClass;
514     case 128:
515       return &AArch64::FPR128RegClass;
516     }
517     return nullptr;
518   }
519 
520   return nullptr;
521 }
522 
523 /// Given a register bank, and size in bits, return the smallest register class
524 /// that can represent that combination.
525 static const TargetRegisterClass *
526 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
527                       bool GetAllRegSet = false) {
528   unsigned RegBankID = RB.getID();
529 
530   if (RegBankID == AArch64::GPRRegBankID) {
531     if (SizeInBits <= 32)
532       return GetAllRegSet ? &AArch64::GPR32allRegClass
533                           : &AArch64::GPR32RegClass;
534     if (SizeInBits == 64)
535       return GetAllRegSet ? &AArch64::GPR64allRegClass
536                           : &AArch64::GPR64RegClass;
537     if (SizeInBits == 128)
538       return &AArch64::XSeqPairsClassRegClass;
539   }
540 
541   if (RegBankID == AArch64::FPRRegBankID) {
542     switch (SizeInBits) {
543     default:
544       return nullptr;
545     case 8:
546       return &AArch64::FPR8RegClass;
547     case 16:
548       return &AArch64::FPR16RegClass;
549     case 32:
550       return &AArch64::FPR32RegClass;
551     case 64:
552       return &AArch64::FPR64RegClass;
553     case 128:
554       return &AArch64::FPR128RegClass;
555     }
556   }
557 
558   return nullptr;
559 }
560 
561 /// Returns the correct subregister to use for a given register class.
562 static bool getSubRegForClass(const TargetRegisterClass *RC,
563                               const TargetRegisterInfo &TRI, unsigned &SubReg) {
564   switch (TRI.getRegSizeInBits(*RC)) {
565   case 8:
566     SubReg = AArch64::bsub;
567     break;
568   case 16:
569     SubReg = AArch64::hsub;
570     break;
571   case 32:
572     if (RC != &AArch64::FPR32RegClass)
573       SubReg = AArch64::sub_32;
574     else
575       SubReg = AArch64::ssub;
576     break;
577   case 64:
578     SubReg = AArch64::dsub;
579     break;
580   default:
581     LLVM_DEBUG(
582         dbgs() << "Couldn't find appropriate subregister for register class.");
583     return false;
584   }
585 
586   return true;
587 }
588 
589 /// Returns the minimum size the given register bank can hold.
590 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
591   switch (RB.getID()) {
592   case AArch64::GPRRegBankID:
593     return 32;
594   case AArch64::FPRRegBankID:
595     return 8;
596   default:
597     llvm_unreachable("Tried to get minimum size for unknown register bank.");
598   }
599 }
600 
601 /// Create a REG_SEQUENCE instruction using the registers in \p Regs.
602 /// Helper function for functions like createDTuple and createQTuple.
603 ///
604 /// \p RegClassIDs - The list of register class IDs available for some tuple of
605 /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is
606 /// expected to contain between 2 and 4 tuple classes.
607 ///
608 /// \p SubRegs - The list of subregister classes associated with each register
609 /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0
610 /// subregister class. The index of each subregister class is expected to
611 /// correspond with the index of each register class.
612 ///
613 /// \returns Either the destination register of REG_SEQUENCE instruction that
614 /// was created, or the 0th element of \p Regs if \p Regs contains a single
615 /// element.
616 static Register createTuple(ArrayRef<Register> Regs,
617                             const unsigned RegClassIDs[],
618                             const unsigned SubRegs[], MachineIRBuilder &MIB) {
619   unsigned NumRegs = Regs.size();
620   if (NumRegs == 1)
621     return Regs[0];
622   assert(NumRegs >= 2 && NumRegs <= 4 &&
623          "Only support between two and 4 registers in a tuple!");
624   const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo();
625   auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]);
626   auto RegSequence =
627       MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {});
628   for (unsigned I = 0, E = Regs.size(); I < E; ++I) {
629     RegSequence.addUse(Regs[I]);
630     RegSequence.addImm(SubRegs[I]);
631   }
632   return RegSequence.getReg(0);
633 }
634 
635 /// Create a tuple of D-registers using the registers in \p Regs.
636 static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
637   static const unsigned RegClassIDs[] = {
638       AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
639   static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
640                                      AArch64::dsub2, AArch64::dsub3};
641   return createTuple(Regs, RegClassIDs, SubRegs, MIB);
642 }
643 
644 /// Create a tuple of Q-registers using the registers in \p Regs.
645 static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) {
646   static const unsigned RegClassIDs[] = {
647       AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
648   static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
649                                      AArch64::qsub2, AArch64::qsub3};
650   return createTuple(Regs, RegClassIDs, SubRegs, MIB);
651 }
652 
653 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
654   auto &MI = *Root.getParent();
655   auto &MBB = *MI.getParent();
656   auto &MF = *MBB.getParent();
657   auto &MRI = MF.getRegInfo();
658   uint64_t Immed;
659   if (Root.isImm())
660     Immed = Root.getImm();
661   else if (Root.isCImm())
662     Immed = Root.getCImm()->getZExtValue();
663   else if (Root.isReg()) {
664     auto ValAndVReg =
665         getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
666     if (!ValAndVReg)
667       return None;
668     Immed = ValAndVReg->Value.getSExtValue();
669   } else
670     return None;
671   return Immed;
672 }
673 
674 /// Check whether \p I is a currently unsupported binary operation:
675 /// - it has an unsized type
676 /// - an operand is not a vreg
677 /// - all operands are not in the same bank
678 /// These are checks that should someday live in the verifier, but right now,
679 /// these are mostly limitations of the aarch64 selector.
680 static bool unsupportedBinOp(const MachineInstr &I,
681                              const AArch64RegisterBankInfo &RBI,
682                              const MachineRegisterInfo &MRI,
683                              const AArch64RegisterInfo &TRI) {
684   LLT Ty = MRI.getType(I.getOperand(0).getReg());
685   if (!Ty.isValid()) {
686     LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
687     return true;
688   }
689 
690   const RegisterBank *PrevOpBank = nullptr;
691   for (auto &MO : I.operands()) {
692     // FIXME: Support non-register operands.
693     if (!MO.isReg()) {
694       LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
695       return true;
696     }
697 
698     // FIXME: Can generic operations have physical registers operands? If
699     // so, this will need to be taught about that, and we'll need to get the
700     // bank out of the minimal class for the register.
701     // Either way, this needs to be documented (and possibly verified).
702     if (!Register::isVirtualRegister(MO.getReg())) {
703       LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
704       return true;
705     }
706 
707     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
708     if (!OpBank) {
709       LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
710       return true;
711     }
712 
713     if (PrevOpBank && OpBank != PrevOpBank) {
714       LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
715       return true;
716     }
717     PrevOpBank = OpBank;
718   }
719   return false;
720 }
721 
722 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
723 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
724 /// and of size \p OpSize.
725 /// \returns \p GenericOpc if the combination is unsupported.
726 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
727                                unsigned OpSize) {
728   switch (RegBankID) {
729   case AArch64::GPRRegBankID:
730     if (OpSize == 32) {
731       switch (GenericOpc) {
732       case TargetOpcode::G_SHL:
733         return AArch64::LSLVWr;
734       case TargetOpcode::G_LSHR:
735         return AArch64::LSRVWr;
736       case TargetOpcode::G_ASHR:
737         return AArch64::ASRVWr;
738       default:
739         return GenericOpc;
740       }
741     } else if (OpSize == 64) {
742       switch (GenericOpc) {
743       case TargetOpcode::G_PTR_ADD:
744         return AArch64::ADDXrr;
745       case TargetOpcode::G_SHL:
746         return AArch64::LSLVXr;
747       case TargetOpcode::G_LSHR:
748         return AArch64::LSRVXr;
749       case TargetOpcode::G_ASHR:
750         return AArch64::ASRVXr;
751       default:
752         return GenericOpc;
753       }
754     }
755     break;
756   case AArch64::FPRRegBankID:
757     switch (OpSize) {
758     case 32:
759       switch (GenericOpc) {
760       case TargetOpcode::G_FADD:
761         return AArch64::FADDSrr;
762       case TargetOpcode::G_FSUB:
763         return AArch64::FSUBSrr;
764       case TargetOpcode::G_FMUL:
765         return AArch64::FMULSrr;
766       case TargetOpcode::G_FDIV:
767         return AArch64::FDIVSrr;
768       default:
769         return GenericOpc;
770       }
771     case 64:
772       switch (GenericOpc) {
773       case TargetOpcode::G_FADD:
774         return AArch64::FADDDrr;
775       case TargetOpcode::G_FSUB:
776         return AArch64::FSUBDrr;
777       case TargetOpcode::G_FMUL:
778         return AArch64::FMULDrr;
779       case TargetOpcode::G_FDIV:
780         return AArch64::FDIVDrr;
781       case TargetOpcode::G_OR:
782         return AArch64::ORRv8i8;
783       default:
784         return GenericOpc;
785       }
786     }
787     break;
788   }
789   return GenericOpc;
790 }
791 
792 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
793 /// appropriate for the (value) register bank \p RegBankID and of memory access
794 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
795 /// addressing mode (e.g., LDRXui).
796 /// \returns \p GenericOpc if the combination is unsupported.
797 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
798                                     unsigned OpSize) {
799   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
800   switch (RegBankID) {
801   case AArch64::GPRRegBankID:
802     switch (OpSize) {
803     case 8:
804       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
805     case 16:
806       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
807     case 32:
808       return isStore ? AArch64::STRWui : AArch64::LDRWui;
809     case 64:
810       return isStore ? AArch64::STRXui : AArch64::LDRXui;
811     }
812     break;
813   case AArch64::FPRRegBankID:
814     switch (OpSize) {
815     case 8:
816       return isStore ? AArch64::STRBui : AArch64::LDRBui;
817     case 16:
818       return isStore ? AArch64::STRHui : AArch64::LDRHui;
819     case 32:
820       return isStore ? AArch64::STRSui : AArch64::LDRSui;
821     case 64:
822       return isStore ? AArch64::STRDui : AArch64::LDRDui;
823     case 128:
824       return isStore ? AArch64::STRQui : AArch64::LDRQui;
825     }
826     break;
827   }
828   return GenericOpc;
829 }
830 
831 #ifndef NDEBUG
832 /// Helper function that verifies that we have a valid copy at the end of
833 /// selectCopy. Verifies that the source and dest have the expected sizes and
834 /// then returns true.
835 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
836                         const MachineRegisterInfo &MRI,
837                         const TargetRegisterInfo &TRI,
838                         const RegisterBankInfo &RBI) {
839   const Register DstReg = I.getOperand(0).getReg();
840   const Register SrcReg = I.getOperand(1).getReg();
841   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
842   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
843 
844   // Make sure the size of the source and dest line up.
845   assert(
846       (DstSize == SrcSize ||
847        // Copies are a mean to setup initial types, the number of
848        // bits may not exactly match.
849        (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
850        // Copies are a mean to copy bits around, as long as we are
851        // on the same register class, that's fine. Otherwise, that
852        // means we need some SUBREG_TO_REG or AND & co.
853        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
854       "Copy with different width?!");
855 
856   // Check the size of the destination.
857   assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
858          "GPRs cannot get more than 64-bit width values");
859 
860   return true;
861 }
862 #endif
863 
864 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
865 /// to \p *To.
866 ///
867 /// E.g "To = COPY SrcReg:SubReg"
868 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
869                        const RegisterBankInfo &RBI, Register SrcReg,
870                        const TargetRegisterClass *To, unsigned SubReg) {
871   assert(SrcReg.isValid() && "Expected a valid source register?");
872   assert(To && "Destination register class cannot be null");
873   assert(SubReg && "Expected a valid subregister");
874 
875   MachineIRBuilder MIB(I);
876   auto SubRegCopy =
877       MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
878   MachineOperand &RegOp = I.getOperand(1);
879   RegOp.setReg(SubRegCopy.getReg(0));
880 
881   // It's possible that the destination register won't be constrained. Make
882   // sure that happens.
883   if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
884     RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
885 
886   return true;
887 }
888 
889 /// Helper function to get the source and destination register classes for a
890 /// copy. Returns a std::pair containing the source register class for the
891 /// copy, and the destination register class for the copy. If a register class
892 /// cannot be determined, then it will be nullptr.
893 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
894 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
895                      MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
896                      const RegisterBankInfo &RBI) {
897   Register DstReg = I.getOperand(0).getReg();
898   Register SrcReg = I.getOperand(1).getReg();
899   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
900   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
901   unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
902   unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
903 
904   // Special casing for cross-bank copies of s1s. We can technically represent
905   // a 1-bit value with any size of register. The minimum size for a GPR is 32
906   // bits. So, we need to put the FPR on 32 bits as well.
907   //
908   // FIXME: I'm not sure if this case holds true outside of copies. If it does,
909   // then we can pull it into the helpers that get the appropriate class for a
910   // register bank. Or make a new helper that carries along some constraint
911   // information.
912   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
913     SrcSize = DstSize = 32;
914 
915   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
916           getMinClassForRegBank(DstRegBank, DstSize, true)};
917 }
918 
919 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
920                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
921                        const RegisterBankInfo &RBI) {
922   Register DstReg = I.getOperand(0).getReg();
923   Register SrcReg = I.getOperand(1).getReg();
924   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
925   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
926 
927   // Find the correct register classes for the source and destination registers.
928   const TargetRegisterClass *SrcRC;
929   const TargetRegisterClass *DstRC;
930   std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
931 
932   if (!DstRC) {
933     LLVM_DEBUG(dbgs() << "Unexpected dest size "
934                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
935     return false;
936   }
937 
938   // A couple helpers below, for making sure that the copy we produce is valid.
939 
940   // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
941   // to verify that the src and dst are the same size, since that's handled by
942   // the SUBREG_TO_REG.
943   bool KnownValid = false;
944 
945   // Returns true, or asserts if something we don't expect happens. Instead of
946   // returning true, we return isValidCopy() to ensure that we verify the
947   // result.
948   auto CheckCopy = [&]() {
949     // If we have a bitcast or something, we can't have physical registers.
950     assert((I.isCopy() ||
951             (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
952              !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
953            "No phys reg on generic operator!");
954     bool ValidCopy = true;
955 #ifndef NDEBUG
956     ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
957     assert(ValidCopy && "Invalid copy.");
958 #endif
959     (void)KnownValid;
960     return ValidCopy;
961   };
962 
963   // Is this a copy? If so, then we may need to insert a subregister copy.
964   if (I.isCopy()) {
965     // Yes. Check if there's anything to fix up.
966     if (!SrcRC) {
967       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
968       return false;
969     }
970 
971     unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
972     unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
973     unsigned SubReg;
974 
975     // If the source bank doesn't support a subregister copy small enough,
976     // then we first need to copy to the destination bank.
977     if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
978       const TargetRegisterClass *DstTempRC =
979           getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
980       getSubRegForClass(DstRC, TRI, SubReg);
981 
982       MachineIRBuilder MIB(I);
983       auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
984       copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
985     } else if (SrcSize > DstSize) {
986       // If the source register is bigger than the destination we need to
987       // perform a subregister copy.
988       const TargetRegisterClass *SubRegRC =
989           getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
990       getSubRegForClass(SubRegRC, TRI, SubReg);
991       copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
992     } else if (DstSize > SrcSize) {
993       // If the destination register is bigger than the source we need to do
994       // a promotion using SUBREG_TO_REG.
995       const TargetRegisterClass *PromotionRC =
996           getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
997       getSubRegForClass(SrcRC, TRI, SubReg);
998 
999       Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
1000       BuildMI(*I.getParent(), I, I.getDebugLoc(),
1001               TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
1002           .addImm(0)
1003           .addUse(SrcReg)
1004           .addImm(SubReg);
1005       MachineOperand &RegOp = I.getOperand(1);
1006       RegOp.setReg(PromoteReg);
1007 
1008       // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
1009       KnownValid = true;
1010     }
1011 
1012     // If the destination is a physical register, then there's nothing to
1013     // change, so we're done.
1014     if (Register::isPhysicalRegister(DstReg))
1015       return CheckCopy();
1016   }
1017 
1018   // No need to constrain SrcReg. It will get constrained when we hit another
1019   // of its use or its defs. Copies do not have constraints.
1020   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1021     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1022                       << " operand\n");
1023     return false;
1024   }
1025 
1026   // If this a GPR ZEXT that we want to just reduce down into a copy.
1027   // The sizes will be mismatched with the source < 32b but that's ok.
1028   if (I.getOpcode() == TargetOpcode::G_ZEXT) {
1029     I.setDesc(TII.get(AArch64::COPY));
1030     assert(SrcRegBank.getID() == AArch64::GPRRegBankID);
1031     return selectCopy(I, TII, MRI, TRI, RBI);
1032   }
1033 
1034   I.setDesc(TII.get(AArch64::COPY));
1035   return CheckCopy();
1036 }
1037 
1038 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
1039   if (!DstTy.isScalar() || !SrcTy.isScalar())
1040     return GenericOpc;
1041 
1042   const unsigned DstSize = DstTy.getSizeInBits();
1043   const unsigned SrcSize = SrcTy.getSizeInBits();
1044 
1045   switch (DstSize) {
1046   case 32:
1047     switch (SrcSize) {
1048     case 32:
1049       switch (GenericOpc) {
1050       case TargetOpcode::G_SITOFP:
1051         return AArch64::SCVTFUWSri;
1052       case TargetOpcode::G_UITOFP:
1053         return AArch64::UCVTFUWSri;
1054       case TargetOpcode::G_FPTOSI:
1055         return AArch64::FCVTZSUWSr;
1056       case TargetOpcode::G_FPTOUI:
1057         return AArch64::FCVTZUUWSr;
1058       default:
1059         return GenericOpc;
1060       }
1061     case 64:
1062       switch (GenericOpc) {
1063       case TargetOpcode::G_SITOFP:
1064         return AArch64::SCVTFUXSri;
1065       case TargetOpcode::G_UITOFP:
1066         return AArch64::UCVTFUXSri;
1067       case TargetOpcode::G_FPTOSI:
1068         return AArch64::FCVTZSUWDr;
1069       case TargetOpcode::G_FPTOUI:
1070         return AArch64::FCVTZUUWDr;
1071       default:
1072         return GenericOpc;
1073       }
1074     default:
1075       return GenericOpc;
1076     }
1077   case 64:
1078     switch (SrcSize) {
1079     case 32:
1080       switch (GenericOpc) {
1081       case TargetOpcode::G_SITOFP:
1082         return AArch64::SCVTFUWDri;
1083       case TargetOpcode::G_UITOFP:
1084         return AArch64::UCVTFUWDri;
1085       case TargetOpcode::G_FPTOSI:
1086         return AArch64::FCVTZSUXSr;
1087       case TargetOpcode::G_FPTOUI:
1088         return AArch64::FCVTZUUXSr;
1089       default:
1090         return GenericOpc;
1091       }
1092     case 64:
1093       switch (GenericOpc) {
1094       case TargetOpcode::G_SITOFP:
1095         return AArch64::SCVTFUXDri;
1096       case TargetOpcode::G_UITOFP:
1097         return AArch64::UCVTFUXDri;
1098       case TargetOpcode::G_FPTOSI:
1099         return AArch64::FCVTZSUXDr;
1100       case TargetOpcode::G_FPTOUI:
1101         return AArch64::FCVTZUUXDr;
1102       default:
1103         return GenericOpc;
1104       }
1105     default:
1106       return GenericOpc;
1107     }
1108   default:
1109     return GenericOpc;
1110   };
1111   return GenericOpc;
1112 }
1113 
1114 MachineInstr *
1115 AArch64InstructionSelector::emitSelect(Register Dst, Register True,
1116                                        Register False, AArch64CC::CondCode CC,
1117                                        MachineIRBuilder &MIB) const {
1118   MachineRegisterInfo &MRI = *MIB.getMRI();
1119   assert(RBI.getRegBank(False, MRI, TRI)->getID() ==
1120              RBI.getRegBank(True, MRI, TRI)->getID() &&
1121          "Expected both select operands to have the same regbank?");
1122   LLT Ty = MRI.getType(True);
1123   if (Ty.isVector())
1124     return nullptr;
1125   const unsigned Size = Ty.getSizeInBits();
1126   assert((Size == 32 || Size == 64) &&
1127          "Expected 32 bit or 64 bit select only?");
1128   const bool Is32Bit = Size == 32;
1129   if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) {
1130     unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr;
1131     auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1132     constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI);
1133     return &*FCSel;
1134   }
1135 
1136   // By default, we'll try and emit a CSEL.
1137   unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr;
1138   bool Optimized = false;
1139   auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI,
1140                                  &Optimized](Register &Reg, Register &OtherReg,
1141                                              bool Invert) {
1142     if (Optimized)
1143       return false;
1144 
1145     // Attempt to fold:
1146     //
1147     // %sub = G_SUB 0, %x
1148     // %select = G_SELECT cc, %reg, %sub
1149     //
1150     // Into:
1151     // %select = CSNEG %reg, %x, cc
1152     Register MatchReg;
1153     if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) {
1154       Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr;
1155       Reg = MatchReg;
1156       if (Invert) {
1157         CC = AArch64CC::getInvertedCondCode(CC);
1158         std::swap(Reg, OtherReg);
1159       }
1160       return true;
1161     }
1162 
1163     // Attempt to fold:
1164     //
1165     // %xor = G_XOR %x, -1
1166     // %select = G_SELECT cc, %reg, %xor
1167     //
1168     // Into:
1169     // %select = CSINV %reg, %x, cc
1170     if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) {
1171       Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1172       Reg = MatchReg;
1173       if (Invert) {
1174         CC = AArch64CC::getInvertedCondCode(CC);
1175         std::swap(Reg, OtherReg);
1176       }
1177       return true;
1178     }
1179 
1180     // Attempt to fold:
1181     //
1182     // %add = G_ADD %x, 1
1183     // %select = G_SELECT cc, %reg, %add
1184     //
1185     // Into:
1186     // %select = CSINC %reg, %x, cc
1187     if (mi_match(Reg, MRI,
1188                  m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)),
1189                           m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) {
1190       Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1191       Reg = MatchReg;
1192       if (Invert) {
1193         CC = AArch64CC::getInvertedCondCode(CC);
1194         std::swap(Reg, OtherReg);
1195       }
1196       return true;
1197     }
1198 
1199     return false;
1200   };
1201 
1202   // Helper lambda which tries to use CSINC/CSINV for the instruction when its
1203   // true/false values are constants.
1204   // FIXME: All of these patterns already exist in tablegen. We should be
1205   // able to import these.
1206   auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI,
1207                           &Optimized]() {
1208     if (Optimized)
1209       return false;
1210     auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI);
1211     auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI);
1212     if (!TrueCst && !FalseCst)
1213       return false;
1214 
1215     Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
1216     if (TrueCst && FalseCst) {
1217       int64_t T = TrueCst->Value.getSExtValue();
1218       int64_t F = FalseCst->Value.getSExtValue();
1219 
1220       if (T == 0 && F == 1) {
1221         // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc
1222         Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1223         True = ZReg;
1224         False = ZReg;
1225         return true;
1226       }
1227 
1228       if (T == 0 && F == -1) {
1229         // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc
1230         Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1231         True = ZReg;
1232         False = ZReg;
1233         return true;
1234       }
1235     }
1236 
1237     if (TrueCst) {
1238       int64_t T = TrueCst->Value.getSExtValue();
1239       if (T == 1) {
1240         // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc
1241         Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1242         True = False;
1243         False = ZReg;
1244         CC = AArch64CC::getInvertedCondCode(CC);
1245         return true;
1246       }
1247 
1248       if (T == -1) {
1249         // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc
1250         Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1251         True = False;
1252         False = ZReg;
1253         CC = AArch64CC::getInvertedCondCode(CC);
1254         return true;
1255       }
1256     }
1257 
1258     if (FalseCst) {
1259       int64_t F = FalseCst->Value.getSExtValue();
1260       if (F == 1) {
1261         // G_SELECT cc, t, 1 -> CSINC t, zreg, cc
1262         Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr;
1263         False = ZReg;
1264         return true;
1265       }
1266 
1267       if (F == -1) {
1268         // G_SELECT cc, t, -1 -> CSINC t, zreg, cc
1269         Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr;
1270         False = ZReg;
1271         return true;
1272       }
1273     }
1274     return false;
1275   };
1276 
1277   Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false);
1278   Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true);
1279   Optimized |= TryOptSelectCst();
1280   auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC);
1281   constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI);
1282   return &*SelectInst;
1283 }
1284 
1285 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
1286   switch (P) {
1287   default:
1288     llvm_unreachable("Unknown condition code!");
1289   case CmpInst::ICMP_NE:
1290     return AArch64CC::NE;
1291   case CmpInst::ICMP_EQ:
1292     return AArch64CC::EQ;
1293   case CmpInst::ICMP_SGT:
1294     return AArch64CC::GT;
1295   case CmpInst::ICMP_SGE:
1296     return AArch64CC::GE;
1297   case CmpInst::ICMP_SLT:
1298     return AArch64CC::LT;
1299   case CmpInst::ICMP_SLE:
1300     return AArch64CC::LE;
1301   case CmpInst::ICMP_UGT:
1302     return AArch64CC::HI;
1303   case CmpInst::ICMP_UGE:
1304     return AArch64CC::HS;
1305   case CmpInst::ICMP_ULT:
1306     return AArch64CC::LO;
1307   case CmpInst::ICMP_ULE:
1308     return AArch64CC::LS;
1309   }
1310 }
1311 
1312 /// Return a register which can be used as a bit to test in a TB(N)Z.
1313 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1314                               MachineRegisterInfo &MRI) {
1315   assert(Reg.isValid() && "Expected valid register!");
1316   bool HasZext = false;
1317   while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1318     unsigned Opc = MI->getOpcode();
1319 
1320     if (!MI->getOperand(0).isReg() ||
1321         !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1322       break;
1323 
1324     // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1325     //
1326     // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1327     // on the truncated x is the same as the bit number on x.
1328     if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1329         Opc == TargetOpcode::G_TRUNC) {
1330       if (Opc == TargetOpcode::G_ZEXT)
1331         HasZext = true;
1332 
1333       Register NextReg = MI->getOperand(1).getReg();
1334       // Did we find something worth folding?
1335       if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1336         break;
1337 
1338       // NextReg is worth folding. Keep looking.
1339       Reg = NextReg;
1340       continue;
1341     }
1342 
1343     // Attempt to find a suitable operation with a constant on one side.
1344     Optional<uint64_t> C;
1345     Register TestReg;
1346     switch (Opc) {
1347     default:
1348       break;
1349     case TargetOpcode::G_AND:
1350     case TargetOpcode::G_XOR: {
1351       TestReg = MI->getOperand(1).getReg();
1352       Register ConstantReg = MI->getOperand(2).getReg();
1353       auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1354       if (!VRegAndVal) {
1355         // AND commutes, check the other side for a constant.
1356         // FIXME: Can we canonicalize the constant so that it's always on the
1357         // same side at some point earlier?
1358         std::swap(ConstantReg, TestReg);
1359         VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
1360       }
1361       if (VRegAndVal) {
1362         if (HasZext)
1363           C = VRegAndVal->Value.getZExtValue();
1364         else
1365           C = VRegAndVal->Value.getSExtValue();
1366       }
1367       break;
1368     }
1369     case TargetOpcode::G_ASHR:
1370     case TargetOpcode::G_LSHR:
1371     case TargetOpcode::G_SHL: {
1372       TestReg = MI->getOperand(1).getReg();
1373       auto VRegAndVal =
1374           getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1375       if (VRegAndVal)
1376         C = VRegAndVal->Value.getSExtValue();
1377       break;
1378     }
1379     }
1380 
1381     // Didn't find a constant or viable register. Bail out of the loop.
1382     if (!C || !TestReg.isValid())
1383       break;
1384 
1385     // We found a suitable instruction with a constant. Check to see if we can
1386     // walk through the instruction.
1387     Register NextReg;
1388     unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1389     switch (Opc) {
1390     default:
1391       break;
1392     case TargetOpcode::G_AND:
1393       // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1394       if ((*C >> Bit) & 1)
1395         NextReg = TestReg;
1396       break;
1397     case TargetOpcode::G_SHL:
1398       // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1399       // the type of the register.
1400       if (*C <= Bit && (Bit - *C) < TestRegSize) {
1401         NextReg = TestReg;
1402         Bit = Bit - *C;
1403       }
1404       break;
1405     case TargetOpcode::G_ASHR:
1406       // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1407       // in x
1408       NextReg = TestReg;
1409       Bit = Bit + *C;
1410       if (Bit >= TestRegSize)
1411         Bit = TestRegSize - 1;
1412       break;
1413     case TargetOpcode::G_LSHR:
1414       // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1415       if ((Bit + *C) < TestRegSize) {
1416         NextReg = TestReg;
1417         Bit = Bit + *C;
1418       }
1419       break;
1420     case TargetOpcode::G_XOR:
1421       // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1422       // appropriate.
1423       //
1424       // e.g. If x' = xor x, c, and the b-th bit is set in c then
1425       //
1426       // tbz x', b -> tbnz x, b
1427       //
1428       // Because x' only has the b-th bit set if x does not.
1429       if ((*C >> Bit) & 1)
1430         Invert = !Invert;
1431       NextReg = TestReg;
1432       break;
1433     }
1434 
1435     // Check if we found anything worth folding.
1436     if (!NextReg.isValid())
1437       return Reg;
1438     Reg = NextReg;
1439   }
1440 
1441   return Reg;
1442 }
1443 
1444 MachineInstr *AArch64InstructionSelector::emitTestBit(
1445     Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1446     MachineIRBuilder &MIB) const {
1447   assert(TestReg.isValid());
1448   assert(ProduceNonFlagSettingCondBr &&
1449          "Cannot emit TB(N)Z with speculation tracking!");
1450   MachineRegisterInfo &MRI = *MIB.getMRI();
1451 
1452   // Attempt to optimize the test bit by walking over instructions.
1453   TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1454   LLT Ty = MRI.getType(TestReg);
1455   unsigned Size = Ty.getSizeInBits();
1456   assert(!Ty.isVector() && "Expected a scalar!");
1457   assert(Bit < 64 && "Bit is too large!");
1458 
1459   // When the test register is a 64-bit register, we have to narrow to make
1460   // TBNZW work.
1461   bool UseWReg = Bit < 32;
1462   unsigned NecessarySize = UseWReg ? 32 : 64;
1463   if (Size != NecessarySize)
1464     TestReg = moveScalarRegClass(
1465         TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1466         MIB);
1467 
1468   static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1469                                           {AArch64::TBZW, AArch64::TBNZW}};
1470   unsigned Opc = OpcTable[UseWReg][IsNegative];
1471   auto TestBitMI =
1472       MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1473   constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1474   return &*TestBitMI;
1475 }
1476 
1477 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1478     MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB,
1479     MachineIRBuilder &MIB) const {
1480   assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?");
1481   // Given something like this:
1482   //
1483   //  %x = ...Something...
1484   //  %one = G_CONSTANT i64 1
1485   //  %zero = G_CONSTANT i64 0
1486   //  %and = G_AND %x, %one
1487   //  %cmp = G_ICMP intpred(ne), %and, %zero
1488   //  %cmp_trunc = G_TRUNC %cmp
1489   //  G_BRCOND %cmp_trunc, %bb.3
1490   //
1491   // We want to try and fold the AND into the G_BRCOND and produce either a
1492   // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1493   //
1494   // In this case, we'd get
1495   //
1496   // TBNZ %x %bb.3
1497   //
1498 
1499   // Check if the AND has a constant on its RHS which we can use as a mask.
1500   // If it's a power of 2, then it's the same as checking a specific bit.
1501   // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1502   auto MaybeBit = getIConstantVRegValWithLookThrough(
1503       AndInst.getOperand(2).getReg(), *MIB.getMRI());
1504   if (!MaybeBit)
1505     return false;
1506 
1507   int32_t Bit = MaybeBit->Value.exactLogBase2();
1508   if (Bit < 0)
1509     return false;
1510 
1511   Register TestReg = AndInst.getOperand(1).getReg();
1512 
1513   // Emit a TB(N)Z.
1514   emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1515   return true;
1516 }
1517 
1518 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg,
1519                                                   bool IsNegative,
1520                                                   MachineBasicBlock *DestMBB,
1521                                                   MachineIRBuilder &MIB) const {
1522   assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!");
1523   MachineRegisterInfo &MRI = *MIB.getMRI();
1524   assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() ==
1525              AArch64::GPRRegBankID &&
1526          "Expected GPRs only?");
1527   auto Ty = MRI.getType(CompareReg);
1528   unsigned Width = Ty.getSizeInBits();
1529   assert(!Ty.isVector() && "Expected scalar only?");
1530   assert(Width <= 64 && "Expected width to be at most 64?");
1531   static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX},
1532                                           {AArch64::CBNZW, AArch64::CBNZX}};
1533   unsigned Opc = OpcTable[IsNegative][Width == 64];
1534   auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB);
1535   constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
1536   return &*BranchMI;
1537 }
1538 
1539 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp(
1540     MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const {
1541   assert(FCmp.getOpcode() == TargetOpcode::G_FCMP);
1542   assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1543   // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
1544   // totally clean.  Some of them require two branches to implement.
1545   auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate();
1546   emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB,
1547                 Pred);
1548   AArch64CC::CondCode CC1, CC2;
1549   changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2);
1550   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1551   MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB);
1552   if (CC2 != AArch64CC::AL)
1553     MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB);
1554   I.eraseFromParent();
1555   return true;
1556 }
1557 
1558 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp(
1559     MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1560   assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1561   assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1562   // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z.
1563   //
1564   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1565   // instructions will not be produced, as they are conditional branch
1566   // instructions that do not set flags.
1567   if (!ProduceNonFlagSettingCondBr)
1568     return false;
1569 
1570   MachineRegisterInfo &MRI = *MIB.getMRI();
1571   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1572   auto Pred =
1573       static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate());
1574   Register LHS = ICmp.getOperand(2).getReg();
1575   Register RHS = ICmp.getOperand(3).getReg();
1576 
1577   // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that.
1578   auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1579   MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1580 
1581   // When we can emit a TB(N)Z, prefer that.
1582   //
1583   // Handle non-commutative condition codes first.
1584   // Note that we don't want to do this when we have a G_AND because it can
1585   // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1586   if (VRegAndVal && !AndInst) {
1587     int64_t C = VRegAndVal->Value.getSExtValue();
1588 
1589     // When we have a greater-than comparison, we can just test if the msb is
1590     // zero.
1591     if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1592       uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1593       emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1594       I.eraseFromParent();
1595       return true;
1596     }
1597 
1598     // When we have a less than comparison, we can just test if the msb is not
1599     // zero.
1600     if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1601       uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1602       emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1603       I.eraseFromParent();
1604       return true;
1605     }
1606   }
1607 
1608   // Attempt to handle commutative condition codes. Right now, that's only
1609   // eq/ne.
1610   if (ICmpInst::isEquality(Pred)) {
1611     if (!VRegAndVal) {
1612       std::swap(RHS, LHS);
1613       VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI);
1614       AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI);
1615     }
1616 
1617     if (VRegAndVal && VRegAndVal->Value == 0) {
1618       // If there's a G_AND feeding into this branch, try to fold it away by
1619       // emitting a TB(N)Z instead.
1620       //
1621       // Note: If we have LT, then it *is* possible to fold, but it wouldn't be
1622       // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding
1623       // would be redundant.
1624       if (AndInst &&
1625           tryOptAndIntoCompareBranch(
1626               *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) {
1627         I.eraseFromParent();
1628         return true;
1629       }
1630 
1631       // Otherwise, try to emit a CB(N)Z instead.
1632       auto LHSTy = MRI.getType(LHS);
1633       if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) {
1634         emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB);
1635         I.eraseFromParent();
1636         return true;
1637       }
1638     }
1639   }
1640 
1641   return false;
1642 }
1643 
1644 bool AArch64InstructionSelector::selectCompareBranchFedByICmp(
1645     MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const {
1646   assert(ICmp.getOpcode() == TargetOpcode::G_ICMP);
1647   assert(I.getOpcode() == TargetOpcode::G_BRCOND);
1648   if (tryOptCompareBranchFedByICmp(I, ICmp, MIB))
1649     return true;
1650 
1651   // Couldn't optimize. Emit a compare + a Bcc.
1652   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1653   auto PredOp = ICmp.getOperand(1);
1654   emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB);
1655   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
1656       static_cast<CmpInst::Predicate>(PredOp.getPredicate()));
1657   MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1658   I.eraseFromParent();
1659   return true;
1660 }
1661 
1662 bool AArch64InstructionSelector::selectCompareBranch(
1663     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) {
1664   Register CondReg = I.getOperand(0).getReg();
1665   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1666   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) {
1667     CondReg = CCMI->getOperand(1).getReg();
1668     CCMI = MRI.getVRegDef(CondReg);
1669   }
1670 
1671   // Try to select the G_BRCOND using whatever is feeding the condition if
1672   // possible.
1673   unsigned CCMIOpc = CCMI->getOpcode();
1674   if (CCMIOpc == TargetOpcode::G_FCMP)
1675     return selectCompareBranchFedByFCmp(I, *CCMI, MIB);
1676   if (CCMIOpc == TargetOpcode::G_ICMP)
1677     return selectCompareBranchFedByICmp(I, *CCMI, MIB);
1678 
1679   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1680   // instructions will not be produced, as they are conditional branch
1681   // instructions that do not set flags.
1682   if (ProduceNonFlagSettingCondBr) {
1683     emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true,
1684                 I.getOperand(1).getMBB(), MIB);
1685     I.eraseFromParent();
1686     return true;
1687   }
1688 
1689   // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead.
1690   auto TstMI =
1691       MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1);
1692   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
1693   auto Bcc = MIB.buildInstr(AArch64::Bcc)
1694                  .addImm(AArch64CC::EQ)
1695                  .addMBB(I.getOperand(1).getMBB());
1696   I.eraseFromParent();
1697   return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI);
1698 }
1699 
1700 /// Returns the element immediate value of a vector shift operand if found.
1701 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1702 static Optional<int64_t> getVectorShiftImm(Register Reg,
1703                                            MachineRegisterInfo &MRI) {
1704   assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1705   MachineInstr *OpMI = MRI.getVRegDef(Reg);
1706   assert(OpMI && "Expected to find a vreg def for vector shift operand");
1707   return getAArch64VectorSplatScalar(*OpMI, MRI);
1708 }
1709 
1710 /// Matches and returns the shift immediate value for a SHL instruction given
1711 /// a shift operand.
1712 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1713   Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1714   if (!ShiftImm)
1715     return None;
1716   // Check the immediate is in range for a SHL.
1717   int64_t Imm = *ShiftImm;
1718   if (Imm < 0)
1719     return None;
1720   switch (SrcTy.getElementType().getSizeInBits()) {
1721   default:
1722     LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1723     return None;
1724   case 8:
1725     if (Imm > 7)
1726       return None;
1727     break;
1728   case 16:
1729     if (Imm > 15)
1730       return None;
1731     break;
1732   case 32:
1733     if (Imm > 31)
1734       return None;
1735     break;
1736   case 64:
1737     if (Imm > 63)
1738       return None;
1739     break;
1740   }
1741   return Imm;
1742 }
1743 
1744 bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I,
1745                                                  MachineRegisterInfo &MRI) {
1746   assert(I.getOpcode() == TargetOpcode::G_SHL);
1747   Register DstReg = I.getOperand(0).getReg();
1748   const LLT Ty = MRI.getType(DstReg);
1749   Register Src1Reg = I.getOperand(1).getReg();
1750   Register Src2Reg = I.getOperand(2).getReg();
1751 
1752   if (!Ty.isVector())
1753     return false;
1754 
1755   // Check if we have a vector of constants on RHS that we can select as the
1756   // immediate form.
1757   Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1758 
1759   unsigned Opc = 0;
1760   if (Ty == LLT::fixed_vector(2, 64)) {
1761     Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1762   } else if (Ty == LLT::fixed_vector(4, 32)) {
1763     Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1764   } else if (Ty == LLT::fixed_vector(2, 32)) {
1765     Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1766   } else if (Ty == LLT::fixed_vector(4, 16)) {
1767     Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16;
1768   } else if (Ty == LLT::fixed_vector(8, 16)) {
1769     Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16;
1770   } else if (Ty == LLT::fixed_vector(16, 8)) {
1771     Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8;
1772   } else if (Ty == LLT::fixed_vector(8, 8)) {
1773     Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8;
1774   } else {
1775     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1776     return false;
1777   }
1778 
1779   auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1780   if (ImmVal)
1781     Shl.addImm(*ImmVal);
1782   else
1783     Shl.addUse(Src2Reg);
1784   constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1785   I.eraseFromParent();
1786   return true;
1787 }
1788 
1789 bool AArch64InstructionSelector::selectVectorAshrLshr(
1790     MachineInstr &I, MachineRegisterInfo &MRI) {
1791   assert(I.getOpcode() == TargetOpcode::G_ASHR ||
1792          I.getOpcode() == TargetOpcode::G_LSHR);
1793   Register DstReg = I.getOperand(0).getReg();
1794   const LLT Ty = MRI.getType(DstReg);
1795   Register Src1Reg = I.getOperand(1).getReg();
1796   Register Src2Reg = I.getOperand(2).getReg();
1797 
1798   if (!Ty.isVector())
1799     return false;
1800 
1801   bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR;
1802 
1803   // We expect the immediate case to be lowered in the PostLegalCombiner to
1804   // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents.
1805 
1806   // There is not a shift right register instruction, but the shift left
1807   // register instruction takes a signed value, where negative numbers specify a
1808   // right shift.
1809 
1810   unsigned Opc = 0;
1811   unsigned NegOpc = 0;
1812   const TargetRegisterClass *RC =
1813       getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI);
1814   if (Ty == LLT::fixed_vector(2, 64)) {
1815     Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64;
1816     NegOpc = AArch64::NEGv2i64;
1817   } else if (Ty == LLT::fixed_vector(4, 32)) {
1818     Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32;
1819     NegOpc = AArch64::NEGv4i32;
1820   } else if (Ty == LLT::fixed_vector(2, 32)) {
1821     Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32;
1822     NegOpc = AArch64::NEGv2i32;
1823   } else if (Ty == LLT::fixed_vector(4, 16)) {
1824     Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16;
1825     NegOpc = AArch64::NEGv4i16;
1826   } else if (Ty == LLT::fixed_vector(8, 16)) {
1827     Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16;
1828     NegOpc = AArch64::NEGv8i16;
1829   } else if (Ty == LLT::fixed_vector(16, 8)) {
1830     Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8;
1831     NegOpc = AArch64::NEGv16i8;
1832   } else if (Ty == LLT::fixed_vector(8, 8)) {
1833     Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8;
1834     NegOpc = AArch64::NEGv8i8;
1835   } else {
1836     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1837     return false;
1838   }
1839 
1840   auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1841   constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1842   auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1843   constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1844   I.eraseFromParent();
1845   return true;
1846 }
1847 
1848 bool AArch64InstructionSelector::selectVaStartAAPCS(
1849     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1850   return false;
1851 }
1852 
1853 bool AArch64InstructionSelector::selectVaStartDarwin(
1854     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1855   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1856   Register ListReg = I.getOperand(0).getReg();
1857 
1858   Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1859 
1860   auto MIB =
1861       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1862           .addDef(ArgsAddrReg)
1863           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1864           .addImm(0)
1865           .addImm(0);
1866 
1867   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1868 
1869   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1870             .addUse(ArgsAddrReg)
1871             .addUse(ListReg)
1872             .addImm(0)
1873             .addMemOperand(*I.memoperands_begin());
1874 
1875   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1876   I.eraseFromParent();
1877   return true;
1878 }
1879 
1880 void AArch64InstructionSelector::materializeLargeCMVal(
1881     MachineInstr &I, const Value *V, unsigned OpFlags) {
1882   MachineBasicBlock &MBB = *I.getParent();
1883   MachineFunction &MF = *MBB.getParent();
1884   MachineRegisterInfo &MRI = MF.getRegInfo();
1885 
1886   auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1887   MovZ->addOperand(MF, I.getOperand(1));
1888   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1889                                      AArch64II::MO_NC);
1890   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1891   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1892 
1893   auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1894                        Register ForceDstReg) {
1895     Register DstReg = ForceDstReg
1896                           ? ForceDstReg
1897                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1898     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1899     if (auto *GV = dyn_cast<GlobalValue>(V)) {
1900       MovI->addOperand(MF, MachineOperand::CreateGA(
1901                                GV, MovZ->getOperand(1).getOffset(), Flags));
1902     } else {
1903       MovI->addOperand(
1904           MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1905                                        MovZ->getOperand(1).getOffset(), Flags));
1906     }
1907     MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1908     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1909     return DstReg;
1910   };
1911   Register DstReg = BuildMovK(MovZ.getReg(0),
1912                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1913   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1914   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1915 }
1916 
1917 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1918   MachineBasicBlock &MBB = *I.getParent();
1919   MachineFunction &MF = *MBB.getParent();
1920   MachineRegisterInfo &MRI = MF.getRegInfo();
1921 
1922   switch (I.getOpcode()) {
1923   case TargetOpcode::G_SHL:
1924   case TargetOpcode::G_ASHR:
1925   case TargetOpcode::G_LSHR: {
1926     // These shifts are legalized to have 64 bit shift amounts because we want
1927     // to take advantage of the existing imported selection patterns that assume
1928     // the immediates are s64s. However, if the shifted type is 32 bits and for
1929     // some reason we receive input GMIR that has an s64 shift amount that's not
1930     // a G_CONSTANT, insert a truncate so that we can still select the s32
1931     // register-register variant.
1932     Register SrcReg = I.getOperand(1).getReg();
1933     Register ShiftReg = I.getOperand(2).getReg();
1934     const LLT ShiftTy = MRI.getType(ShiftReg);
1935     const LLT SrcTy = MRI.getType(SrcReg);
1936     if (SrcTy.isVector())
1937       return false;
1938     assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1939     if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1940       return false;
1941     auto *AmtMI = MRI.getVRegDef(ShiftReg);
1942     assert(AmtMI && "could not find a vreg definition for shift amount");
1943     if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1944       // Insert a subregister copy to implement a 64->32 trunc
1945       auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1946                        .addReg(ShiftReg, 0, AArch64::sub_32);
1947       MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1948       I.getOperand(2).setReg(Trunc.getReg(0));
1949     }
1950     return true;
1951   }
1952   case TargetOpcode::G_STORE: {
1953     bool Changed = contractCrossBankCopyIntoStore(I, MRI);
1954     MachineOperand &SrcOp = I.getOperand(0);
1955     if (MRI.getType(SrcOp.getReg()).isPointer()) {
1956       // Allow matching with imported patterns for stores of pointers. Unlike
1957       // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy
1958       // and constrain.
1959       auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp);
1960       Register NewSrc = Copy.getReg(0);
1961       SrcOp.setReg(NewSrc);
1962       RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI);
1963       Changed = true;
1964     }
1965     return Changed;
1966   }
1967   case TargetOpcode::G_PTR_ADD:
1968     return convertPtrAddToAdd(I, MRI);
1969   case TargetOpcode::G_LOAD: {
1970     // For scalar loads of pointers, we try to convert the dest type from p0
1971     // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1972     // conversion, this should be ok because all users should have been
1973     // selected already, so the type doesn't matter for them.
1974     Register DstReg = I.getOperand(0).getReg();
1975     const LLT DstTy = MRI.getType(DstReg);
1976     if (!DstTy.isPointer())
1977       return false;
1978     MRI.setType(DstReg, LLT::scalar(64));
1979     return true;
1980   }
1981   case AArch64::G_DUP: {
1982     // Convert the type from p0 to s64 to help selection.
1983     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1984     if (!DstTy.getElementType().isPointer())
1985       return false;
1986     auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg());
1987     MRI.setType(I.getOperand(0).getReg(),
1988                 DstTy.changeElementType(LLT::scalar(64)));
1989     MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass);
1990     I.getOperand(1).setReg(NewSrc.getReg(0));
1991     return true;
1992   }
1993   case TargetOpcode::G_UITOFP:
1994   case TargetOpcode::G_SITOFP: {
1995     // If both source and destination regbanks are FPR, then convert the opcode
1996     // to G_SITOF so that the importer can select it to an fpr variant.
1997     // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank
1998     // copy.
1999     Register SrcReg = I.getOperand(1).getReg();
2000     LLT SrcTy = MRI.getType(SrcReg);
2001     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2002     if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits())
2003       return false;
2004 
2005     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) {
2006       if (I.getOpcode() == TargetOpcode::G_SITOFP)
2007         I.setDesc(TII.get(AArch64::G_SITOF));
2008       else
2009         I.setDesc(TII.get(AArch64::G_UITOF));
2010       return true;
2011     }
2012     return false;
2013   }
2014   default:
2015     return false;
2016   }
2017 }
2018 
2019 /// This lowering tries to look for G_PTR_ADD instructions and then converts
2020 /// them to a standard G_ADD with a COPY on the source.
2021 ///
2022 /// The motivation behind this is to expose the add semantics to the imported
2023 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
2024 /// because the selector works bottom up, uses before defs. By the time we
2025 /// end up trying to select a G_PTR_ADD, we should have already attempted to
2026 /// fold this into addressing modes and were therefore unsuccessful.
2027 bool AArch64InstructionSelector::convertPtrAddToAdd(
2028     MachineInstr &I, MachineRegisterInfo &MRI) {
2029   assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
2030   Register DstReg = I.getOperand(0).getReg();
2031   Register AddOp1Reg = I.getOperand(1).getReg();
2032   const LLT PtrTy = MRI.getType(DstReg);
2033   if (PtrTy.getAddressSpace() != 0)
2034     return false;
2035 
2036   const LLT CastPtrTy =
2037       PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64);
2038   auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
2039   // Set regbanks on the registers.
2040   if (PtrTy.isVector())
2041     MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
2042   else
2043     MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
2044 
2045   // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
2046   // %dst(intty) = G_ADD %intbase, off
2047   I.setDesc(TII.get(TargetOpcode::G_ADD));
2048   MRI.setType(DstReg, CastPtrTy);
2049   I.getOperand(1).setReg(PtrToInt.getReg(0));
2050   if (!select(*PtrToInt)) {
2051     LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
2052     return false;
2053   }
2054 
2055   // Also take the opportunity here to try to do some optimization.
2056   // Try to convert this into a G_SUB if the offset is a 0-x negate idiom.
2057   Register NegatedReg;
2058   if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg))))
2059     return true;
2060   I.getOperand(2).setReg(NegatedReg);
2061   I.setDesc(TII.get(TargetOpcode::G_SUB));
2062   return true;
2063 }
2064 
2065 bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I,
2066                                                 MachineRegisterInfo &MRI) {
2067   // We try to match the immediate variant of LSL, which is actually an alias
2068   // for a special case of UBFM. Otherwise, we fall back to the imported
2069   // selector which will match the register variant.
2070   assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
2071   const auto &MO = I.getOperand(2);
2072   auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI);
2073   if (!VRegAndVal)
2074     return false;
2075 
2076   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2077   if (DstTy.isVector())
2078     return false;
2079   bool Is64Bit = DstTy.getSizeInBits() == 64;
2080   auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
2081   auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
2082 
2083   if (!Imm1Fn || !Imm2Fn)
2084     return false;
2085 
2086   auto NewI =
2087       MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
2088                      {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
2089 
2090   for (auto &RenderFn : *Imm1Fn)
2091     RenderFn(NewI);
2092   for (auto &RenderFn : *Imm2Fn)
2093     RenderFn(NewI);
2094 
2095   I.eraseFromParent();
2096   return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
2097 }
2098 
2099 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
2100     MachineInstr &I, MachineRegisterInfo &MRI) {
2101   assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
2102   // If we're storing a scalar, it doesn't matter what register bank that
2103   // scalar is on. All that matters is the size.
2104   //
2105   // So, if we see something like this (with a 32-bit scalar as an example):
2106   //
2107   // %x:gpr(s32) = ... something ...
2108   // %y:fpr(s32) = COPY %x:gpr(s32)
2109   // G_STORE %y:fpr(s32)
2110   //
2111   // We can fix this up into something like this:
2112   //
2113   // G_STORE %x:gpr(s32)
2114   //
2115   // And then continue the selection process normally.
2116   Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
2117   if (!DefDstReg.isValid())
2118     return false;
2119   LLT DefDstTy = MRI.getType(DefDstReg);
2120   Register StoreSrcReg = I.getOperand(0).getReg();
2121   LLT StoreSrcTy = MRI.getType(StoreSrcReg);
2122 
2123   // If we get something strange like a physical register, then we shouldn't
2124   // go any further.
2125   if (!DefDstTy.isValid())
2126     return false;
2127 
2128   // Are the source and dst types the same size?
2129   if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
2130     return false;
2131 
2132   if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
2133       RBI.getRegBank(DefDstReg, MRI, TRI))
2134     return false;
2135 
2136   // We have a cross-bank copy, which is entering a store. Let's fold it.
2137   I.getOperand(0).setReg(DefDstReg);
2138   return true;
2139 }
2140 
2141 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) {
2142   assert(I.getParent() && "Instruction should be in a basic block!");
2143   assert(I.getParent()->getParent() && "Instruction should be in a function!");
2144 
2145   MachineBasicBlock &MBB = *I.getParent();
2146   MachineFunction &MF = *MBB.getParent();
2147   MachineRegisterInfo &MRI = MF.getRegInfo();
2148 
2149   switch (I.getOpcode()) {
2150   case AArch64::G_DUP: {
2151     // Before selecting a DUP instruction, check if it is better selected as a
2152     // MOV or load from a constant pool.
2153     Register Src = I.getOperand(1).getReg();
2154     auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI);
2155     if (!ValAndVReg)
2156       return false;
2157     LLVMContext &Ctx = MF.getFunction().getContext();
2158     Register Dst = I.getOperand(0).getReg();
2159     auto *CV = ConstantDataVector::getSplat(
2160         MRI.getType(Dst).getNumElements(),
2161         ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()),
2162                          ValAndVReg->Value));
2163     if (!emitConstantVector(Dst, CV, MIB, MRI))
2164       return false;
2165     I.eraseFromParent();
2166     return true;
2167   }
2168   case TargetOpcode::G_SEXT:
2169     // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV
2170     // over a normal extend.
2171     if (selectUSMovFromExtend(I, MRI))
2172       return true;
2173     return false;
2174   case TargetOpcode::G_BR:
2175     return false;
2176   case TargetOpcode::G_SHL:
2177     return earlySelectSHL(I, MRI);
2178   case TargetOpcode::G_CONSTANT: {
2179     bool IsZero = false;
2180     if (I.getOperand(1).isCImm())
2181       IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
2182     else if (I.getOperand(1).isImm())
2183       IsZero = I.getOperand(1).getImm() == 0;
2184 
2185     if (!IsZero)
2186       return false;
2187 
2188     Register DefReg = I.getOperand(0).getReg();
2189     LLT Ty = MRI.getType(DefReg);
2190     if (Ty.getSizeInBits() == 64) {
2191       I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
2192       RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
2193     } else if (Ty.getSizeInBits() == 32) {
2194       I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
2195       RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
2196     } else
2197       return false;
2198 
2199     I.setDesc(TII.get(TargetOpcode::COPY));
2200     return true;
2201   }
2202 
2203   case TargetOpcode::G_ADD: {
2204     // Check if this is being fed by a G_ICMP on either side.
2205     //
2206     // (cmp pred, x, y) + z
2207     //
2208     // In the above case, when the cmp is true, we increment z by 1. So, we can
2209     // fold the add into the cset for the cmp by using cinc.
2210     //
2211     // FIXME: This would probably be a lot nicer in PostLegalizerLowering.
2212     Register AddDst = I.getOperand(0).getReg();
2213     Register AddLHS = I.getOperand(1).getReg();
2214     Register AddRHS = I.getOperand(2).getReg();
2215     // Only handle scalars.
2216     LLT Ty = MRI.getType(AddLHS);
2217     if (Ty.isVector())
2218       return false;
2219     // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64
2220     // bits.
2221     unsigned Size = Ty.getSizeInBits();
2222     if (Size != 32 && Size != 64)
2223       return false;
2224     auto MatchCmp = [&](Register Reg) -> MachineInstr * {
2225       if (!MRI.hasOneNonDBGUse(Reg))
2226         return nullptr;
2227       // If the LHS of the add is 32 bits, then we want to fold a 32-bit
2228       // compare.
2229       if (Size == 32)
2230         return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI);
2231       // We model scalar compares using 32-bit destinations right now.
2232       // If it's a 64-bit compare, it'll have 64-bit sources.
2233       Register ZExt;
2234       if (!mi_match(Reg, MRI,
2235                     m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt))))))
2236         return nullptr;
2237       auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI);
2238       if (!Cmp ||
2239           MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64)
2240         return nullptr;
2241       return Cmp;
2242     };
2243     // Try to match
2244     // z + (cmp pred, x, y)
2245     MachineInstr *Cmp = MatchCmp(AddRHS);
2246     if (!Cmp) {
2247       // (cmp pred, x, y) + z
2248       std::swap(AddLHS, AddRHS);
2249       Cmp = MatchCmp(AddRHS);
2250       if (!Cmp)
2251         return false;
2252     }
2253     auto &PredOp = Cmp->getOperand(1);
2254     auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate());
2255     const AArch64CC::CondCode InvCC =
2256         changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
2257     MIB.setInstrAndDebugLoc(I);
2258     emitIntegerCompare(/*LHS=*/Cmp->getOperand(2),
2259                        /*RHS=*/Cmp->getOperand(3), PredOp, MIB);
2260     emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB);
2261     I.eraseFromParent();
2262     return true;
2263   }
2264   case TargetOpcode::G_OR: {
2265     // Look for operations that take the lower `Width=Size-ShiftImm` bits of
2266     // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via
2267     // shifting and masking that we can replace with a BFI (encoded as a BFM).
2268     Register Dst = I.getOperand(0).getReg();
2269     LLT Ty = MRI.getType(Dst);
2270 
2271     if (!Ty.isScalar())
2272       return false;
2273 
2274     unsigned Size = Ty.getSizeInBits();
2275     if (Size != 32 && Size != 64)
2276       return false;
2277 
2278     Register ShiftSrc;
2279     int64_t ShiftImm;
2280     Register MaskSrc;
2281     int64_t MaskImm;
2282     if (!mi_match(
2283             Dst, MRI,
2284             m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))),
2285                   m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm))))))
2286       return false;
2287 
2288     if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm))
2289       return false;
2290 
2291     int64_t Immr = Size - ShiftImm;
2292     int64_t Imms = Size - ShiftImm - 1;
2293     unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri;
2294     emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB);
2295     I.eraseFromParent();
2296     return true;
2297   }
2298   default:
2299     return false;
2300   }
2301 }
2302 
2303 bool AArch64InstructionSelector::select(MachineInstr &I) {
2304   assert(I.getParent() && "Instruction should be in a basic block!");
2305   assert(I.getParent()->getParent() && "Instruction should be in a function!");
2306 
2307   MachineBasicBlock &MBB = *I.getParent();
2308   MachineFunction &MF = *MBB.getParent();
2309   MachineRegisterInfo &MRI = MF.getRegInfo();
2310 
2311   const AArch64Subtarget *Subtarget =
2312       &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
2313   if (Subtarget->requiresStrictAlign()) {
2314     // We don't support this feature yet.
2315     LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
2316     return false;
2317   }
2318 
2319   MIB.setInstrAndDebugLoc(I);
2320 
2321   unsigned Opcode = I.getOpcode();
2322   // G_PHI requires same handling as PHI
2323   if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
2324     // Certain non-generic instructions also need some special handling.
2325 
2326     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
2327       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2328 
2329     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
2330       const Register DefReg = I.getOperand(0).getReg();
2331       const LLT DefTy = MRI.getType(DefReg);
2332 
2333       const RegClassOrRegBank &RegClassOrBank =
2334         MRI.getRegClassOrRegBank(DefReg);
2335 
2336       const TargetRegisterClass *DefRC
2337         = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
2338       if (!DefRC) {
2339         if (!DefTy.isValid()) {
2340           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
2341           return false;
2342         }
2343         const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
2344         DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
2345         if (!DefRC) {
2346           LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
2347           return false;
2348         }
2349       }
2350 
2351       I.setDesc(TII.get(TargetOpcode::PHI));
2352 
2353       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
2354     }
2355 
2356     if (I.isCopy())
2357       return selectCopy(I, TII, MRI, TRI, RBI);
2358 
2359     return true;
2360   }
2361 
2362 
2363   if (I.getNumOperands() != I.getNumExplicitOperands()) {
2364     LLVM_DEBUG(
2365         dbgs() << "Generic instruction has unexpected implicit operands\n");
2366     return false;
2367   }
2368 
2369   // Try to do some lowering before we start instruction selecting. These
2370   // lowerings are purely transformations on the input G_MIR and so selection
2371   // must continue after any modification of the instruction.
2372   if (preISelLower(I)) {
2373     Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
2374   }
2375 
2376   // There may be patterns where the importer can't deal with them optimally,
2377   // but does select it to a suboptimal sequence so our custom C++ selection
2378   // code later never has a chance to work on it. Therefore, we have an early
2379   // selection attempt here to give priority to certain selection routines
2380   // over the imported ones.
2381   if (earlySelect(I))
2382     return true;
2383 
2384   if (selectImpl(I, *CoverageInfo))
2385     return true;
2386 
2387   LLT Ty =
2388       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
2389 
2390   switch (Opcode) {
2391   case TargetOpcode::G_SBFX:
2392   case TargetOpcode::G_UBFX: {
2393     static const unsigned OpcTable[2][2] = {
2394         {AArch64::UBFMWri, AArch64::UBFMXri},
2395         {AArch64::SBFMWri, AArch64::SBFMXri}};
2396     bool IsSigned = Opcode == TargetOpcode::G_SBFX;
2397     unsigned Size = Ty.getSizeInBits();
2398     unsigned Opc = OpcTable[IsSigned][Size == 64];
2399     auto Cst1 =
2400         getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI);
2401     assert(Cst1 && "Should have gotten a constant for src 1?");
2402     auto Cst2 =
2403         getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI);
2404     assert(Cst2 && "Should have gotten a constant for src 2?");
2405     auto LSB = Cst1->Value.getZExtValue();
2406     auto Width = Cst2->Value.getZExtValue();
2407     auto BitfieldInst =
2408         MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)})
2409             .addImm(LSB)
2410             .addImm(LSB + Width - 1);
2411     I.eraseFromParent();
2412     return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI);
2413   }
2414   case TargetOpcode::G_BRCOND:
2415     return selectCompareBranch(I, MF, MRI);
2416 
2417   case TargetOpcode::G_BRINDIRECT: {
2418     I.setDesc(TII.get(AArch64::BR));
2419     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2420   }
2421 
2422   case TargetOpcode::G_BRJT:
2423     return selectBrJT(I, MRI);
2424 
2425   case AArch64::G_ADD_LOW: {
2426     // This op may have been separated from it's ADRP companion by the localizer
2427     // or some other code motion pass. Given that many CPUs will try to
2428     // macro fuse these operations anyway, select this into a MOVaddr pseudo
2429     // which will later be expanded into an ADRP+ADD pair after scheduling.
2430     MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
2431     if (BaseMI->getOpcode() != AArch64::ADRP) {
2432       I.setDesc(TII.get(AArch64::ADDXri));
2433       I.addOperand(MachineOperand::CreateImm(0));
2434       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2435     }
2436     assert(TM.getCodeModel() == CodeModel::Small &&
2437            "Expected small code model");
2438     auto Op1 = BaseMI->getOperand(1);
2439     auto Op2 = I.getOperand(2);
2440     auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
2441                        .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
2442                                          Op1.getTargetFlags())
2443                        .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
2444                                          Op2.getTargetFlags());
2445     I.eraseFromParent();
2446     return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
2447   }
2448 
2449   case TargetOpcode::G_BSWAP: {
2450     // Handle vector types for G_BSWAP directly.
2451     Register DstReg = I.getOperand(0).getReg();
2452     LLT DstTy = MRI.getType(DstReg);
2453 
2454     // We should only get vector types here; everything else is handled by the
2455     // importer right now.
2456     if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
2457       LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
2458       return false;
2459     }
2460 
2461     // Only handle 4 and 2 element vectors for now.
2462     // TODO: 16-bit elements.
2463     unsigned NumElts = DstTy.getNumElements();
2464     if (NumElts != 4 && NumElts != 2) {
2465       LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
2466       return false;
2467     }
2468 
2469     // Choose the correct opcode for the supported types. Right now, that's
2470     // v2s32, v4s32, and v2s64.
2471     unsigned Opc = 0;
2472     unsigned EltSize = DstTy.getElementType().getSizeInBits();
2473     if (EltSize == 32)
2474       Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
2475                                           : AArch64::REV32v16i8;
2476     else if (EltSize == 64)
2477       Opc = AArch64::REV64v16i8;
2478 
2479     // We should always get something by the time we get here...
2480     assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
2481 
2482     I.setDesc(TII.get(Opc));
2483     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2484   }
2485 
2486   case TargetOpcode::G_FCONSTANT:
2487   case TargetOpcode::G_CONSTANT: {
2488     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
2489 
2490     const LLT s8 = LLT::scalar(8);
2491     const LLT s16 = LLT::scalar(16);
2492     const LLT s32 = LLT::scalar(32);
2493     const LLT s64 = LLT::scalar(64);
2494     const LLT s128 = LLT::scalar(128);
2495     const LLT p0 = LLT::pointer(0, 64);
2496 
2497     const Register DefReg = I.getOperand(0).getReg();
2498     const LLT DefTy = MRI.getType(DefReg);
2499     const unsigned DefSize = DefTy.getSizeInBits();
2500     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2501 
2502     // FIXME: Redundant check, but even less readable when factored out.
2503     if (isFP) {
2504       if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) {
2505         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2506                           << " constant, expected: " << s16 << " or " << s32
2507                           << " or " << s64 << " or " << s128 << '\n');
2508         return false;
2509       }
2510 
2511       if (RB.getID() != AArch64::FPRRegBankID) {
2512         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2513                           << " constant on bank: " << RB
2514                           << ", expected: FPR\n");
2515         return false;
2516       }
2517 
2518       // The case when we have 0.0 is covered by tablegen. Reject it here so we
2519       // can be sure tablegen works correctly and isn't rescued by this code.
2520       // 0.0 is not covered by tablegen for FP128. So we will handle this
2521       // scenario in the code here.
2522       if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0))
2523         return false;
2524     } else {
2525       // s32 and s64 are covered by tablegen.
2526       if (Ty != p0 && Ty != s8 && Ty != s16) {
2527         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2528                           << " constant, expected: " << s32 << ", " << s64
2529                           << ", or " << p0 << '\n');
2530         return false;
2531       }
2532 
2533       if (RB.getID() != AArch64::GPRRegBankID) {
2534         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2535                           << " constant on bank: " << RB
2536                           << ", expected: GPR\n");
2537         return false;
2538       }
2539     }
2540 
2541     if (isFP) {
2542       const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize);
2543       // For 16, 64, and 128b values, emit a constant pool load.
2544       switch (DefSize) {
2545       default:
2546         llvm_unreachable("Unexpected destination size for G_FCONSTANT?");
2547       case 32:
2548         // For s32, use a cp load if we have optsize/minsize.
2549         if (!shouldOptForSize(&MF))
2550           break;
2551         LLVM_FALLTHROUGH;
2552       case 16:
2553       case 64:
2554       case 128: {
2555         auto *FPImm = I.getOperand(1).getFPImm();
2556         auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2557         if (!LoadMI) {
2558           LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2559           return false;
2560         }
2561         MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2562         I.eraseFromParent();
2563         return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2564       }
2565       }
2566 
2567       // Either emit a FMOV, or emit a copy to emit a normal mov.
2568       assert(DefSize == 32 &&
2569              "Expected constant pool loads for all sizes other than 32!");
2570       const Register DefGPRReg =
2571           MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2572       MachineOperand &RegOp = I.getOperand(0);
2573       RegOp.setReg(DefGPRReg);
2574       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2575       MIB.buildCopy({DefReg}, {DefGPRReg});
2576 
2577       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2578         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2579         return false;
2580       }
2581 
2582       MachineOperand &ImmOp = I.getOperand(1);
2583       // FIXME: Is going through int64_t always correct?
2584       ImmOp.ChangeToImmediate(
2585           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2586     } else if (I.getOperand(1).isCImm()) {
2587       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2588       I.getOperand(1).ChangeToImmediate(Val);
2589     } else if (I.getOperand(1).isImm()) {
2590       uint64_t Val = I.getOperand(1).getImm();
2591       I.getOperand(1).ChangeToImmediate(Val);
2592     }
2593 
2594     const unsigned MovOpc =
2595         DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2596     I.setDesc(TII.get(MovOpc));
2597     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2598     return true;
2599   }
2600   case TargetOpcode::G_EXTRACT: {
2601     Register DstReg = I.getOperand(0).getReg();
2602     Register SrcReg = I.getOperand(1).getReg();
2603     LLT SrcTy = MRI.getType(SrcReg);
2604     LLT DstTy = MRI.getType(DstReg);
2605     (void)DstTy;
2606     unsigned SrcSize = SrcTy.getSizeInBits();
2607 
2608     if (SrcTy.getSizeInBits() > 64) {
2609       // This should be an extract of an s128, which is like a vector extract.
2610       if (SrcTy.getSizeInBits() != 128)
2611         return false;
2612       // Only support extracting 64 bits from an s128 at the moment.
2613       if (DstTy.getSizeInBits() != 64)
2614         return false;
2615 
2616       unsigned Offset = I.getOperand(2).getImm();
2617       if (Offset % 64 != 0)
2618         return false;
2619 
2620       // Check we have the right regbank always.
2621       const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2622       const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2623       assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!");
2624 
2625       if (SrcRB.getID() == AArch64::GPRRegBankID) {
2626         MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
2627             .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64);
2628         I.eraseFromParent();
2629         return true;
2630       }
2631 
2632       // Emit the same code as a vector extract.
2633       // Offset must be a multiple of 64.
2634       unsigned LaneIdx = Offset / 64;
2635       MachineInstr *Extract = emitExtractVectorElt(
2636           DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2637       if (!Extract)
2638         return false;
2639       I.eraseFromParent();
2640       return true;
2641     }
2642 
2643     I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2644     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2645                                       Ty.getSizeInBits() - 1);
2646 
2647     if (SrcSize < 64) {
2648       assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2649              "unexpected G_EXTRACT types");
2650       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2651     }
2652 
2653     DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2654     MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2655     MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2656         .addReg(DstReg, 0, AArch64::sub_32);
2657     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2658                                  AArch64::GPR32RegClass, MRI);
2659     I.getOperand(0).setReg(DstReg);
2660 
2661     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2662   }
2663 
2664   case TargetOpcode::G_INSERT: {
2665     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2666     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2667     unsigned DstSize = DstTy.getSizeInBits();
2668     // Larger inserts are vectors, same-size ones should be something else by
2669     // now (split up or turned into COPYs).
2670     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2671       return false;
2672 
2673     I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2674     unsigned LSB = I.getOperand(3).getImm();
2675     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2676     I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2677     MachineInstrBuilder(MF, I).addImm(Width - 1);
2678 
2679     if (DstSize < 64) {
2680       assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2681              "unexpected G_INSERT types");
2682       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2683     }
2684 
2685     Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2686     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2687             TII.get(AArch64::SUBREG_TO_REG))
2688         .addDef(SrcReg)
2689         .addImm(0)
2690         .addUse(I.getOperand(2).getReg())
2691         .addImm(AArch64::sub_32);
2692     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2693                                  AArch64::GPR32RegClass, MRI);
2694     I.getOperand(2).setReg(SrcReg);
2695 
2696     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2697   }
2698   case TargetOpcode::G_FRAME_INDEX: {
2699     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2700     if (Ty != LLT::pointer(0, 64)) {
2701       LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2702                         << ", expected: " << LLT::pointer(0, 64) << '\n');
2703       return false;
2704     }
2705     I.setDesc(TII.get(AArch64::ADDXri));
2706 
2707     // MOs for a #0 shifted immediate.
2708     I.addOperand(MachineOperand::CreateImm(0));
2709     I.addOperand(MachineOperand::CreateImm(0));
2710 
2711     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2712   }
2713 
2714   case TargetOpcode::G_GLOBAL_VALUE: {
2715     auto GV = I.getOperand(1).getGlobal();
2716     if (GV->isThreadLocal())
2717       return selectTLSGlobalValue(I, MRI);
2718 
2719     unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2720     if (OpFlags & AArch64II::MO_GOT) {
2721       I.setDesc(TII.get(AArch64::LOADgot));
2722       I.getOperand(1).setTargetFlags(OpFlags);
2723     } else if (TM.getCodeModel() == CodeModel::Large) {
2724       // Materialize the global using movz/movk instructions.
2725       materializeLargeCMVal(I, GV, OpFlags);
2726       I.eraseFromParent();
2727       return true;
2728     } else if (TM.getCodeModel() == CodeModel::Tiny) {
2729       I.setDesc(TII.get(AArch64::ADR));
2730       I.getOperand(1).setTargetFlags(OpFlags);
2731     } else {
2732       I.setDesc(TII.get(AArch64::MOVaddr));
2733       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2734       MachineInstrBuilder MIB(MF, I);
2735       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2736                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2737     }
2738     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2739   }
2740 
2741   case TargetOpcode::G_ZEXTLOAD:
2742   case TargetOpcode::G_LOAD:
2743   case TargetOpcode::G_STORE: {
2744     GLoadStore &LdSt = cast<GLoadStore>(I);
2745     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2746     LLT PtrTy = MRI.getType(LdSt.getPointerReg());
2747 
2748     if (PtrTy != LLT::pointer(0, 64)) {
2749       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2750                         << ", expected: " << LLT::pointer(0, 64) << '\n');
2751       return false;
2752     }
2753 
2754     uint64_t MemSizeInBytes = LdSt.getMemSize();
2755     unsigned MemSizeInBits = LdSt.getMemSizeInBits();
2756     AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering();
2757 
2758     // Need special instructions for atomics that affect ordering.
2759     if (Order != AtomicOrdering::NotAtomic &&
2760         Order != AtomicOrdering::Unordered &&
2761         Order != AtomicOrdering::Monotonic) {
2762       assert(!isa<GZExtLoad>(LdSt));
2763       if (MemSizeInBytes > 64)
2764         return false;
2765 
2766       if (isa<GLoad>(LdSt)) {
2767         static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH,
2768                                      AArch64::LDARW, AArch64::LDARX};
2769         I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2770       } else {
2771         static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
2772                                      AArch64::STLRW, AArch64::STLRX};
2773         Register ValReg = LdSt.getReg(0);
2774         if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
2775           // Emit a subreg copy of 32 bits.
2776           Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2777           MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
2778               .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
2779           I.getOperand(0).setReg(NewVal);
2780         }
2781         I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
2782       }
2783       constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2784       return true;
2785     }
2786 
2787 #ifndef NDEBUG
2788     const Register PtrReg = LdSt.getPointerReg();
2789     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2790     // Check that the pointer register is valid.
2791     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2792            "Load/Store pointer operand isn't a GPR");
2793     assert(MRI.getType(PtrReg).isPointer() &&
2794            "Load/Store pointer operand isn't a pointer");
2795 #endif
2796 
2797     const Register ValReg = LdSt.getReg(0);
2798     const LLT ValTy = MRI.getType(ValReg);
2799     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2800 
2801     // The code below doesn't support truncating stores, so we need to split it
2802     // again.
2803     if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2804       unsigned SubReg;
2805       LLT MemTy = LdSt.getMMO().getMemoryType();
2806       auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2807       if (!getSubRegForClass(RC, TRI, SubReg))
2808         return false;
2809 
2810       // Generate a subreg copy.
2811       auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {})
2812                       .addReg(ValReg, 0, SubReg)
2813                       .getReg(0);
2814       RBI.constrainGenericRegister(Copy, *RC, MRI);
2815       LdSt.getOperand(0).setReg(Copy);
2816     } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) {
2817       // If this is an any-extending load from the FPR bank, split it into a regular
2818       // load + extend.
2819       if (RB.getID() == AArch64::FPRRegBankID) {
2820         unsigned SubReg;
2821         LLT MemTy = LdSt.getMMO().getMemoryType();
2822         auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI);
2823         if (!getSubRegForClass(RC, TRI, SubReg))
2824           return false;
2825         Register OldDst = LdSt.getReg(0);
2826         Register NewDst =
2827             MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType());
2828         LdSt.getOperand(0).setReg(NewDst);
2829         MRI.setRegBank(NewDst, RB);
2830         // Generate a SUBREG_TO_REG to extend it.
2831         MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator()));
2832         MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {})
2833             .addImm(0)
2834             .addUse(NewDst)
2835             .addImm(SubReg);
2836         auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI);
2837         RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI);
2838         MIB.setInstr(LdSt);
2839       }
2840     }
2841 
2842     // Helper lambda for partially selecting I. Either returns the original
2843     // instruction with an updated opcode, or a new instruction.
2844     auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
2845       bool IsStore = isa<GStore>(I);
2846       const unsigned NewOpc =
2847           selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2848       if (NewOpc == I.getOpcode())
2849         return nullptr;
2850       // Check if we can fold anything into the addressing mode.
2851       auto AddrModeFns =
2852           selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
2853       if (!AddrModeFns) {
2854         // Can't fold anything. Use the original instruction.
2855         I.setDesc(TII.get(NewOpc));
2856         I.addOperand(MachineOperand::CreateImm(0));
2857         return &I;
2858       }
2859 
2860       // Folded something. Create a new instruction and return it.
2861       auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
2862       Register CurValReg = I.getOperand(0).getReg();
2863       IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg);
2864       NewInst.cloneMemRefs(I);
2865       for (auto &Fn : *AddrModeFns)
2866         Fn(NewInst);
2867       I.eraseFromParent();
2868       return &*NewInst;
2869     };
2870 
2871     MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
2872     if (!LoadStore)
2873       return false;
2874 
2875     // If we're storing a 0, use WZR/XZR.
2876     if (Opcode == TargetOpcode::G_STORE) {
2877       auto CVal = getIConstantVRegValWithLookThrough(
2878           LoadStore->getOperand(0).getReg(), MRI);
2879       if (CVal && CVal->Value == 0) {
2880         switch (LoadStore->getOpcode()) {
2881         case AArch64::STRWui:
2882         case AArch64::STRHHui:
2883         case AArch64::STRBBui:
2884           LoadStore->getOperand(0).setReg(AArch64::WZR);
2885           break;
2886         case AArch64::STRXui:
2887           LoadStore->getOperand(0).setReg(AArch64::XZR);
2888           break;
2889         }
2890       }
2891     }
2892 
2893     if (IsZExtLoad) {
2894       // The zextload from a smaller type to i32 should be handled by the
2895       // importer.
2896       if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
2897         return false;
2898       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2899       // and zero_extend with SUBREG_TO_REG.
2900       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2901       Register DstReg = LoadStore->getOperand(0).getReg();
2902       LoadStore->getOperand(0).setReg(LdReg);
2903 
2904       MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
2905       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2906           .addImm(0)
2907           .addUse(LdReg)
2908           .addImm(AArch64::sub_32);
2909       constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2910       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2911                                           MRI);
2912     }
2913     return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
2914   }
2915 
2916   case TargetOpcode::G_SMULH:
2917   case TargetOpcode::G_UMULH: {
2918     // Reject the various things we don't support yet.
2919     if (unsupportedBinOp(I, RBI, MRI, TRI))
2920       return false;
2921 
2922     const Register DefReg = I.getOperand(0).getReg();
2923     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2924 
2925     if (RB.getID() != AArch64::GPRRegBankID) {
2926       LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2927       return false;
2928     }
2929 
2930     if (Ty != LLT::scalar(64)) {
2931       LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2932                         << ", expected: " << LLT::scalar(64) << '\n');
2933       return false;
2934     }
2935 
2936     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2937                                                              : AArch64::UMULHrr;
2938     I.setDesc(TII.get(NewOpc));
2939 
2940     // Now that we selected an opcode, we need to constrain the register
2941     // operands to use appropriate classes.
2942     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2943   }
2944   case TargetOpcode::G_LSHR:
2945   case TargetOpcode::G_ASHR:
2946     if (MRI.getType(I.getOperand(0).getReg()).isVector())
2947       return selectVectorAshrLshr(I, MRI);
2948     LLVM_FALLTHROUGH;
2949   case TargetOpcode::G_SHL:
2950     if (Opcode == TargetOpcode::G_SHL &&
2951         MRI.getType(I.getOperand(0).getReg()).isVector())
2952       return selectVectorSHL(I, MRI);
2953     LLVM_FALLTHROUGH;
2954   case TargetOpcode::G_FADD:
2955   case TargetOpcode::G_FSUB:
2956   case TargetOpcode::G_FMUL:
2957   case TargetOpcode::G_FDIV:
2958   case TargetOpcode::G_OR: {
2959     // Reject the various things we don't support yet.
2960     if (unsupportedBinOp(I, RBI, MRI, TRI))
2961       return false;
2962 
2963     const unsigned OpSize = Ty.getSizeInBits();
2964 
2965     const Register DefReg = I.getOperand(0).getReg();
2966     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2967 
2968     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2969     if (NewOpc == I.getOpcode())
2970       return false;
2971 
2972     I.setDesc(TII.get(NewOpc));
2973     // FIXME: Should the type be always reset in setDesc?
2974 
2975     // Now that we selected an opcode, we need to constrain the register
2976     // operands to use appropriate classes.
2977     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2978   }
2979 
2980   case TargetOpcode::G_PTR_ADD: {
2981     emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB);
2982     I.eraseFromParent();
2983     return true;
2984   }
2985   case TargetOpcode::G_SADDO:
2986   case TargetOpcode::G_UADDO:
2987   case TargetOpcode::G_SSUBO:
2988   case TargetOpcode::G_USUBO: {
2989     // Emit the operation and get the correct condition code.
2990     auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(),
2991                                   I.getOperand(2), I.getOperand(3), MIB);
2992 
2993     // Now, put the overflow result in the register given by the first operand
2994     // to the overflow op. CSINC increments the result when the predicate is
2995     // false, so to get the increment when it's true, we need to use the
2996     // inverse. In this case, we want to increment when carry is set.
2997     Register ZReg = AArch64::WZR;
2998     emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg,
2999               getInvertedCondCode(OpAndCC.second), MIB);
3000     I.eraseFromParent();
3001     return true;
3002   }
3003 
3004   case TargetOpcode::G_PTRMASK: {
3005     Register MaskReg = I.getOperand(2).getReg();
3006     Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI);
3007     // TODO: Implement arbitrary cases
3008     if (!MaskVal || !isShiftedMask_64(*MaskVal))
3009       return false;
3010 
3011     uint64_t Mask = *MaskVal;
3012     I.setDesc(TII.get(AArch64::ANDXri));
3013     I.getOperand(2).ChangeToImmediate(
3014         AArch64_AM::encodeLogicalImmediate(Mask, 64));
3015 
3016     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3017   }
3018   case TargetOpcode::G_PTRTOINT:
3019   case TargetOpcode::G_TRUNC: {
3020     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3021     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3022 
3023     const Register DstReg = I.getOperand(0).getReg();
3024     const Register SrcReg = I.getOperand(1).getReg();
3025 
3026     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3027     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3028 
3029     if (DstRB.getID() != SrcRB.getID()) {
3030       LLVM_DEBUG(
3031           dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
3032       return false;
3033     }
3034 
3035     if (DstRB.getID() == AArch64::GPRRegBankID) {
3036       const TargetRegisterClass *DstRC =
3037           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3038       if (!DstRC)
3039         return false;
3040 
3041       const TargetRegisterClass *SrcRC =
3042           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
3043       if (!SrcRC)
3044         return false;
3045 
3046       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
3047           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
3048         LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
3049         return false;
3050       }
3051 
3052       if (DstRC == SrcRC) {
3053         // Nothing to be done
3054       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
3055                  SrcTy == LLT::scalar(64)) {
3056         llvm_unreachable("TableGen can import this case");
3057         return false;
3058       } else if (DstRC == &AArch64::GPR32RegClass &&
3059                  SrcRC == &AArch64::GPR64RegClass) {
3060         I.getOperand(1).setSubReg(AArch64::sub_32);
3061       } else {
3062         LLVM_DEBUG(
3063             dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
3064         return false;
3065       }
3066 
3067       I.setDesc(TII.get(TargetOpcode::COPY));
3068       return true;
3069     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
3070       if (DstTy == LLT::fixed_vector(4, 16) &&
3071           SrcTy == LLT::fixed_vector(4, 32)) {
3072         I.setDesc(TII.get(AArch64::XTNv4i16));
3073         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3074         return true;
3075       }
3076 
3077       if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
3078         MachineInstr *Extract = emitExtractVectorElt(
3079             DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
3080         if (!Extract)
3081           return false;
3082         I.eraseFromParent();
3083         return true;
3084       }
3085 
3086       // We might have a vector G_PTRTOINT, in which case just emit a COPY.
3087       if (Opcode == TargetOpcode::G_PTRTOINT) {
3088         assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
3089         I.setDesc(TII.get(TargetOpcode::COPY));
3090         return selectCopy(I, TII, MRI, TRI, RBI);
3091       }
3092     }
3093 
3094     return false;
3095   }
3096 
3097   case TargetOpcode::G_ANYEXT: {
3098     if (selectUSMovFromExtend(I, MRI))
3099       return true;
3100 
3101     const Register DstReg = I.getOperand(0).getReg();
3102     const Register SrcReg = I.getOperand(1).getReg();
3103 
3104     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
3105     if (RBDst.getID() != AArch64::GPRRegBankID) {
3106       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
3107                         << ", expected: GPR\n");
3108       return false;
3109     }
3110 
3111     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
3112     if (RBSrc.getID() != AArch64::GPRRegBankID) {
3113       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
3114                         << ", expected: GPR\n");
3115       return false;
3116     }
3117 
3118     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
3119 
3120     if (DstSize == 0) {
3121       LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
3122       return false;
3123     }
3124 
3125     if (DstSize != 64 && DstSize > 32) {
3126       LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
3127                         << ", expected: 32 or 64\n");
3128       return false;
3129     }
3130     // At this point G_ANYEXT is just like a plain COPY, but we need
3131     // to explicitly form the 64-bit value if any.
3132     if (DstSize > 32) {
3133       Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
3134       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
3135           .addDef(ExtSrc)
3136           .addImm(0)
3137           .addUse(SrcReg)
3138           .addImm(AArch64::sub_32);
3139       I.getOperand(1).setReg(ExtSrc);
3140     }
3141     return selectCopy(I, TII, MRI, TRI, RBI);
3142   }
3143 
3144   case TargetOpcode::G_ZEXT:
3145   case TargetOpcode::G_SEXT_INREG:
3146   case TargetOpcode::G_SEXT: {
3147     if (selectUSMovFromExtend(I, MRI))
3148       return true;
3149 
3150     unsigned Opcode = I.getOpcode();
3151     const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
3152     const Register DefReg = I.getOperand(0).getReg();
3153     Register SrcReg = I.getOperand(1).getReg();
3154     const LLT DstTy = MRI.getType(DefReg);
3155     const LLT SrcTy = MRI.getType(SrcReg);
3156     unsigned DstSize = DstTy.getSizeInBits();
3157     unsigned SrcSize = SrcTy.getSizeInBits();
3158 
3159     // SEXT_INREG has the same src reg size as dst, the size of the value to be
3160     // extended is encoded in the imm.
3161     if (Opcode == TargetOpcode::G_SEXT_INREG)
3162       SrcSize = I.getOperand(2).getImm();
3163 
3164     if (DstTy.isVector())
3165       return false; // Should be handled by imported patterns.
3166 
3167     assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
3168                AArch64::GPRRegBankID &&
3169            "Unexpected ext regbank");
3170 
3171     MachineInstr *ExtI;
3172 
3173     // First check if we're extending the result of a load which has a dest type
3174     // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
3175     // GPR register on AArch64 and all loads which are smaller automatically
3176     // zero-extend the upper bits. E.g.
3177     // %v(s8) = G_LOAD %p, :: (load 1)
3178     // %v2(s32) = G_ZEXT %v(s8)
3179     if (!IsSigned) {
3180       auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
3181       bool IsGPR =
3182           RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
3183       if (LoadMI && IsGPR) {
3184         const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
3185         unsigned BytesLoaded = MemOp->getSize();
3186         if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
3187           return selectCopy(I, TII, MRI, TRI, RBI);
3188       }
3189 
3190       // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs)
3191       // + SUBREG_TO_REG.
3192       //
3193       // If we are zero extending from 32 bits to 64 bits, it's possible that
3194       // the instruction implicitly does the zero extend for us. In that case,
3195       // we only need the SUBREG_TO_REG.
3196       if (IsGPR && SrcSize == 32 && DstSize == 64) {
3197         // Unlike with the G_LOAD case, we don't want to look through copies
3198         // here. (See isDef32.)
3199         MachineInstr *Def = MRI.getVRegDef(SrcReg);
3200         Register SubregToRegSrc = SrcReg;
3201 
3202         // Does the instruction implicitly zero extend?
3203         if (!Def || !isDef32(*Def)) {
3204           // No. Zero out using an OR.
3205           Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3206           const Register ZReg = AArch64::WZR;
3207           MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0);
3208           SubregToRegSrc = OrDst;
3209         }
3210 
3211         MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
3212             .addImm(0)
3213             .addUse(SubregToRegSrc)
3214             .addImm(AArch64::sub_32);
3215 
3216         if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
3217                                           MRI)) {
3218           LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
3219           return false;
3220         }
3221 
3222         if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3223                                           MRI)) {
3224           LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
3225           return false;
3226         }
3227 
3228         I.eraseFromParent();
3229         return true;
3230       }
3231     }
3232 
3233     if (DstSize == 64) {
3234       if (Opcode != TargetOpcode::G_SEXT_INREG) {
3235         // FIXME: Can we avoid manually doing this?
3236         if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
3237                                           MRI)) {
3238           LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
3239                             << " operand\n");
3240           return false;
3241         }
3242         SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
3243                                 {&AArch64::GPR64RegClass}, {})
3244                      .addImm(0)
3245                      .addUse(SrcReg)
3246                      .addImm(AArch64::sub_32)
3247                      .getReg(0);
3248       }
3249 
3250       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
3251                              {DefReg}, {SrcReg})
3252                   .addImm(0)
3253                   .addImm(SrcSize - 1);
3254     } else if (DstSize <= 32) {
3255       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
3256                              {DefReg}, {SrcReg})
3257                   .addImm(0)
3258                   .addImm(SrcSize - 1);
3259     } else {
3260       return false;
3261     }
3262 
3263     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
3264     I.eraseFromParent();
3265     return true;
3266   }
3267 
3268   case TargetOpcode::G_SITOFP:
3269   case TargetOpcode::G_UITOFP:
3270   case TargetOpcode::G_FPTOSI:
3271   case TargetOpcode::G_FPTOUI: {
3272     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
3273               SrcTy = MRI.getType(I.getOperand(1).getReg());
3274     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
3275     if (NewOpc == Opcode)
3276       return false;
3277 
3278     I.setDesc(TII.get(NewOpc));
3279     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3280 
3281     return true;
3282   }
3283 
3284   case TargetOpcode::G_FREEZE:
3285     return selectCopy(I, TII, MRI, TRI, RBI);
3286 
3287   case TargetOpcode::G_INTTOPTR:
3288     // The importer is currently unable to import pointer types since they
3289     // didn't exist in SelectionDAG.
3290     return selectCopy(I, TII, MRI, TRI, RBI);
3291 
3292   case TargetOpcode::G_BITCAST:
3293     // Imported SelectionDAG rules can handle every bitcast except those that
3294     // bitcast from a type to the same type. Ideally, these shouldn't occur
3295     // but we might not run an optimizer that deletes them. The other exception
3296     // is bitcasts involving pointer types, as SelectionDAG has no knowledge
3297     // of them.
3298     return selectCopy(I, TII, MRI, TRI, RBI);
3299 
3300   case TargetOpcode::G_SELECT: {
3301     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
3302       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
3303                         << ", expected: " << LLT::scalar(1) << '\n');
3304       return false;
3305     }
3306 
3307     const Register CondReg = I.getOperand(1).getReg();
3308     const Register TReg = I.getOperand(2).getReg();
3309     const Register FReg = I.getOperand(3).getReg();
3310 
3311     if (tryOptSelect(I))
3312       return true;
3313 
3314     // Make sure to use an unused vreg instead of wzr, so that the peephole
3315     // optimizations will be able to optimize these.
3316     Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3317     auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg})
3318                      .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
3319     constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3320     if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB))
3321       return false;
3322     I.eraseFromParent();
3323     return true;
3324   }
3325   case TargetOpcode::G_ICMP: {
3326     if (Ty.isVector())
3327       return selectVectorICmp(I, MRI);
3328 
3329     if (Ty != LLT::scalar(32)) {
3330       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
3331                         << ", expected: " << LLT::scalar(32) << '\n');
3332       return false;
3333     }
3334 
3335     auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3336     const AArch64CC::CondCode InvCC =
3337         changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred));
3338     emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB);
3339     emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR,
3340               /*Src2=*/AArch64::WZR, InvCC, MIB);
3341     I.eraseFromParent();
3342     return true;
3343   }
3344 
3345   case TargetOpcode::G_FCMP: {
3346     CmpInst::Predicate Pred =
3347         static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate());
3348     if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB,
3349                        Pred) ||
3350         !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB))
3351       return false;
3352     I.eraseFromParent();
3353     return true;
3354   }
3355   case TargetOpcode::G_VASTART:
3356     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
3357                                 : selectVaStartAAPCS(I, MF, MRI);
3358   case TargetOpcode::G_INTRINSIC:
3359     return selectIntrinsic(I, MRI);
3360   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
3361     return selectIntrinsicWithSideEffects(I, MRI);
3362   case TargetOpcode::G_IMPLICIT_DEF: {
3363     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
3364     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3365     const Register DstReg = I.getOperand(0).getReg();
3366     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3367     const TargetRegisterClass *DstRC =
3368         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
3369     RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
3370     return true;
3371   }
3372   case TargetOpcode::G_BLOCK_ADDR: {
3373     if (TM.getCodeModel() == CodeModel::Large) {
3374       materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
3375       I.eraseFromParent();
3376       return true;
3377     } else {
3378       I.setDesc(TII.get(AArch64::MOVaddrBA));
3379       auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
3380                            I.getOperand(0).getReg())
3381                        .addBlockAddress(I.getOperand(1).getBlockAddress(),
3382                                         /* Offset */ 0, AArch64II::MO_PAGE)
3383                        .addBlockAddress(
3384                            I.getOperand(1).getBlockAddress(), /* Offset */ 0,
3385                            AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3386       I.eraseFromParent();
3387       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3388     }
3389   }
3390   case AArch64::G_DUP: {
3391     // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by
3392     // imported patterns. Do it manually here. Avoiding generating s16 gpr is
3393     // difficult because at RBS we may end up pessimizing the fpr case if we
3394     // decided to add an anyextend to fix this. Manual selection is the most
3395     // robust solution for now.
3396     if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3397         AArch64::GPRRegBankID)
3398       return false; // We expect the fpr regbank case to be imported.
3399     LLT VecTy = MRI.getType(I.getOperand(0).getReg());
3400     if (VecTy == LLT::fixed_vector(8, 8))
3401       I.setDesc(TII.get(AArch64::DUPv8i8gpr));
3402     else if (VecTy == LLT::fixed_vector(16, 8))
3403       I.setDesc(TII.get(AArch64::DUPv16i8gpr));
3404     else if (VecTy == LLT::fixed_vector(4, 16))
3405       I.setDesc(TII.get(AArch64::DUPv4i16gpr));
3406     else if (VecTy == LLT::fixed_vector(8, 16))
3407       I.setDesc(TII.get(AArch64::DUPv8i16gpr));
3408     else
3409       return false;
3410     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3411   }
3412   case TargetOpcode::G_INTRINSIC_TRUNC:
3413     return selectIntrinsicTrunc(I, MRI);
3414   case TargetOpcode::G_INTRINSIC_ROUND:
3415     return selectIntrinsicRound(I, MRI);
3416   case TargetOpcode::G_BUILD_VECTOR:
3417     return selectBuildVector(I, MRI);
3418   case TargetOpcode::G_MERGE_VALUES:
3419     return selectMergeValues(I, MRI);
3420   case TargetOpcode::G_UNMERGE_VALUES:
3421     return selectUnmergeValues(I, MRI);
3422   case TargetOpcode::G_SHUFFLE_VECTOR:
3423     return selectShuffleVector(I, MRI);
3424   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3425     return selectExtractElt(I, MRI);
3426   case TargetOpcode::G_INSERT_VECTOR_ELT:
3427     return selectInsertElt(I, MRI);
3428   case TargetOpcode::G_CONCAT_VECTORS:
3429     return selectConcatVectors(I, MRI);
3430   case TargetOpcode::G_JUMP_TABLE:
3431     return selectJumpTable(I, MRI);
3432   case TargetOpcode::G_VECREDUCE_FADD:
3433   case TargetOpcode::G_VECREDUCE_ADD:
3434     return selectReduction(I, MRI);
3435   }
3436 
3437   return false;
3438 }
3439 
3440 bool AArch64InstructionSelector::selectReduction(MachineInstr &I,
3441                                                  MachineRegisterInfo &MRI) {
3442   Register VecReg = I.getOperand(1).getReg();
3443   LLT VecTy = MRI.getType(VecReg);
3444   if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) {
3445     // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit
3446     // a subregister copy afterwards.
3447     if (VecTy == LLT::fixed_vector(2, 32)) {
3448       Register DstReg = I.getOperand(0).getReg();
3449       auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass},
3450                                  {VecReg, VecReg});
3451       auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
3452                       .addReg(AddP.getReg(0), 0, AArch64::ssub)
3453                       .getReg(0);
3454       RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI);
3455       I.eraseFromParent();
3456       return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI);
3457     }
3458 
3459     unsigned Opc = 0;
3460     if (VecTy == LLT::fixed_vector(16, 8))
3461       Opc = AArch64::ADDVv16i8v;
3462     else if (VecTy == LLT::fixed_vector(8, 16))
3463       Opc = AArch64::ADDVv8i16v;
3464     else if (VecTy == LLT::fixed_vector(4, 32))
3465       Opc = AArch64::ADDVv4i32v;
3466     else if (VecTy == LLT::fixed_vector(2, 64))
3467       Opc = AArch64::ADDPv2i64p;
3468     else {
3469       LLVM_DEBUG(dbgs() << "Unhandled type for add reduction");
3470       return false;
3471     }
3472     I.setDesc(TII.get(Opc));
3473     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3474   }
3475 
3476   if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) {
3477     unsigned Opc = 0;
3478     if (VecTy == LLT::fixed_vector(2, 32))
3479       Opc = AArch64::FADDPv2i32p;
3480     else if (VecTy == LLT::fixed_vector(2, 64))
3481       Opc = AArch64::FADDPv2i64p;
3482     else {
3483       LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction");
3484       return false;
3485     }
3486     I.setDesc(TII.get(Opc));
3487     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3488   }
3489   return false;
3490 }
3491 
3492 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
3493                                             MachineRegisterInfo &MRI) {
3494   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
3495   Register JTAddr = I.getOperand(0).getReg();
3496   unsigned JTI = I.getOperand(1).getIndex();
3497   Register Index = I.getOperand(2).getReg();
3498 
3499   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3500   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
3501 
3502   MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr);
3503   auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
3504                                       {TargetReg, ScratchReg}, {JTAddr, Index})
3505                            .addJumpTableIndex(JTI);
3506   // Build the indirect branch.
3507   MIB.buildInstr(AArch64::BR, {}, {TargetReg});
3508   I.eraseFromParent();
3509   return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
3510 }
3511 
3512 bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I,
3513                                                  MachineRegisterInfo &MRI) {
3514   assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
3515   assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
3516 
3517   Register DstReg = I.getOperand(0).getReg();
3518   unsigned JTI = I.getOperand(1).getIndex();
3519   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
3520   auto MovMI =
3521     MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
3522           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
3523           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
3524   I.eraseFromParent();
3525   return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
3526 }
3527 
3528 bool AArch64InstructionSelector::selectTLSGlobalValue(
3529     MachineInstr &I, MachineRegisterInfo &MRI) {
3530   if (!STI.isTargetMachO())
3531     return false;
3532   MachineFunction &MF = *I.getParent()->getParent();
3533   MF.getFrameInfo().setAdjustsStack(true);
3534 
3535   const auto &GlobalOp = I.getOperand(1);
3536   assert(GlobalOp.getOffset() == 0 &&
3537          "Shouldn't have an offset on TLS globals!");
3538   const GlobalValue &GV = *GlobalOp.getGlobal();
3539 
3540   auto LoadGOT =
3541       MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {})
3542           .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
3543 
3544   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
3545                              {LoadGOT.getReg(0)})
3546                   .addImm(0);
3547 
3548   MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0));
3549   // TLS calls preserve all registers except those that absolutely must be
3550   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3551   // silly).
3552   MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
3553       .addUse(AArch64::X0, RegState::Implicit)
3554       .addDef(AArch64::X0, RegState::Implicit)
3555       .addRegMask(TRI.getTLSCallPreservedMask());
3556 
3557   MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
3558   RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
3559                                MRI);
3560   I.eraseFromParent();
3561   return true;
3562 }
3563 
3564 bool AArch64InstructionSelector::selectIntrinsicTrunc(
3565     MachineInstr &I, MachineRegisterInfo &MRI) const {
3566   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3567 
3568   // Select the correct opcode.
3569   unsigned Opc = 0;
3570   if (!SrcTy.isVector()) {
3571     switch (SrcTy.getSizeInBits()) {
3572     default:
3573     case 16:
3574       Opc = AArch64::FRINTZHr;
3575       break;
3576     case 32:
3577       Opc = AArch64::FRINTZSr;
3578       break;
3579     case 64:
3580       Opc = AArch64::FRINTZDr;
3581       break;
3582     }
3583   } else {
3584     unsigned NumElts = SrcTy.getNumElements();
3585     switch (SrcTy.getElementType().getSizeInBits()) {
3586     default:
3587       break;
3588     case 16:
3589       if (NumElts == 4)
3590         Opc = AArch64::FRINTZv4f16;
3591       else if (NumElts == 8)
3592         Opc = AArch64::FRINTZv8f16;
3593       break;
3594     case 32:
3595       if (NumElts == 2)
3596         Opc = AArch64::FRINTZv2f32;
3597       else if (NumElts == 4)
3598         Opc = AArch64::FRINTZv4f32;
3599       break;
3600     case 64:
3601       if (NumElts == 2)
3602         Opc = AArch64::FRINTZv2f64;
3603       break;
3604     }
3605   }
3606 
3607   if (!Opc) {
3608     // Didn't get an opcode above, bail.
3609     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3610     return false;
3611   }
3612 
3613   // Legalization would have set us up perfectly for this; we just need to
3614   // set the opcode and move on.
3615   I.setDesc(TII.get(Opc));
3616   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3617 }
3618 
3619 bool AArch64InstructionSelector::selectIntrinsicRound(
3620     MachineInstr &I, MachineRegisterInfo &MRI) const {
3621   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3622 
3623   // Select the correct opcode.
3624   unsigned Opc = 0;
3625   if (!SrcTy.isVector()) {
3626     switch (SrcTy.getSizeInBits()) {
3627     default:
3628     case 16:
3629       Opc = AArch64::FRINTAHr;
3630       break;
3631     case 32:
3632       Opc = AArch64::FRINTASr;
3633       break;
3634     case 64:
3635       Opc = AArch64::FRINTADr;
3636       break;
3637     }
3638   } else {
3639     unsigned NumElts = SrcTy.getNumElements();
3640     switch (SrcTy.getElementType().getSizeInBits()) {
3641     default:
3642       break;
3643     case 16:
3644       if (NumElts == 4)
3645         Opc = AArch64::FRINTAv4f16;
3646       else if (NumElts == 8)
3647         Opc = AArch64::FRINTAv8f16;
3648       break;
3649     case 32:
3650       if (NumElts == 2)
3651         Opc = AArch64::FRINTAv2f32;
3652       else if (NumElts == 4)
3653         Opc = AArch64::FRINTAv4f32;
3654       break;
3655     case 64:
3656       if (NumElts == 2)
3657         Opc = AArch64::FRINTAv2f64;
3658       break;
3659     }
3660   }
3661 
3662   if (!Opc) {
3663     // Didn't get an opcode above, bail.
3664     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3665     return false;
3666   }
3667 
3668   // Legalization would have set us up perfectly for this; we just need to
3669   // set the opcode and move on.
3670   I.setDesc(TII.get(Opc));
3671   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3672 }
3673 
3674 bool AArch64InstructionSelector::selectVectorICmp(
3675     MachineInstr &I, MachineRegisterInfo &MRI) {
3676   Register DstReg = I.getOperand(0).getReg();
3677   LLT DstTy = MRI.getType(DstReg);
3678   Register SrcReg = I.getOperand(2).getReg();
3679   Register Src2Reg = I.getOperand(3).getReg();
3680   LLT SrcTy = MRI.getType(SrcReg);
3681 
3682   unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3683   unsigned NumElts = DstTy.getNumElements();
3684 
3685   // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3686   // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3687   // Third index is cc opcode:
3688   // 0 == eq
3689   // 1 == ugt
3690   // 2 == uge
3691   // 3 == ult
3692   // 4 == ule
3693   // 5 == sgt
3694   // 6 == sge
3695   // 7 == slt
3696   // 8 == sle
3697   // ne is done by negating 'eq' result.
3698 
3699   // This table below assumes that for some comparisons the operands will be
3700   // commuted.
3701   // ult op == commute + ugt op
3702   // ule op == commute + uge op
3703   // slt op == commute + sgt op
3704   // sle op == commute + sge op
3705   unsigned PredIdx = 0;
3706   bool SwapOperands = false;
3707   CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3708   switch (Pred) {
3709   case CmpInst::ICMP_NE:
3710   case CmpInst::ICMP_EQ:
3711     PredIdx = 0;
3712     break;
3713   case CmpInst::ICMP_UGT:
3714     PredIdx = 1;
3715     break;
3716   case CmpInst::ICMP_UGE:
3717     PredIdx = 2;
3718     break;
3719   case CmpInst::ICMP_ULT:
3720     PredIdx = 3;
3721     SwapOperands = true;
3722     break;
3723   case CmpInst::ICMP_ULE:
3724     PredIdx = 4;
3725     SwapOperands = true;
3726     break;
3727   case CmpInst::ICMP_SGT:
3728     PredIdx = 5;
3729     break;
3730   case CmpInst::ICMP_SGE:
3731     PredIdx = 6;
3732     break;
3733   case CmpInst::ICMP_SLT:
3734     PredIdx = 7;
3735     SwapOperands = true;
3736     break;
3737   case CmpInst::ICMP_SLE:
3738     PredIdx = 8;
3739     SwapOperands = true;
3740     break;
3741   default:
3742     llvm_unreachable("Unhandled icmp predicate");
3743     return false;
3744   }
3745 
3746   // This table obviously should be tablegen'd when we have our GISel native
3747   // tablegen selector.
3748 
3749   static const unsigned OpcTable[4][4][9] = {
3750       {
3751           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3752            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3753            0 /* invalid */},
3754           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3755            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3756            0 /* invalid */},
3757           {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3758            AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3759            AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3760           {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3761            AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3762            AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3763       },
3764       {
3765           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3766            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3767            0 /* invalid */},
3768           {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3769            AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3770            AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3771           {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3772            AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3773            AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3774           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3775            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3776            0 /* invalid */}
3777       },
3778       {
3779           {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3780            AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3781            AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3782           {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3783            AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3784            AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3785           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3786            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3787            0 /* invalid */},
3788           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3789            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3790            0 /* invalid */}
3791       },
3792       {
3793           {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3794            AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3795            AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3796           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3797            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3798            0 /* invalid */},
3799           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3800            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3801            0 /* invalid */},
3802           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3803            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3804            0 /* invalid */}
3805       },
3806   };
3807   unsigned EltIdx = Log2_32(SrcEltSize / 8);
3808   unsigned NumEltsIdx = Log2_32(NumElts / 2);
3809   unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3810   if (!Opc) {
3811     LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3812     return false;
3813   }
3814 
3815   const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3816   const TargetRegisterClass *SrcRC =
3817       getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3818   if (!SrcRC) {
3819     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3820     return false;
3821   }
3822 
3823   unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3824   if (SrcTy.getSizeInBits() == 128)
3825     NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3826 
3827   if (SwapOperands)
3828     std::swap(SrcReg, Src2Reg);
3829 
3830   auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3831   constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3832 
3833   // Invert if we had a 'ne' cc.
3834   if (NotOpc) {
3835     Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3836     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3837   } else {
3838     MIB.buildCopy(DstReg, Cmp.getReg(0));
3839   }
3840   RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3841   I.eraseFromParent();
3842   return true;
3843 }
3844 
3845 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3846     unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3847     MachineIRBuilder &MIRBuilder) const {
3848   auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3849 
3850   auto BuildFn = [&](unsigned SubregIndex) {
3851     auto Ins =
3852         MIRBuilder
3853             .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3854             .addImm(SubregIndex);
3855     constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3856     constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3857     return &*Ins;
3858   };
3859 
3860   switch (EltSize) {
3861   case 16:
3862     return BuildFn(AArch64::hsub);
3863   case 32:
3864     return BuildFn(AArch64::ssub);
3865   case 64:
3866     return BuildFn(AArch64::dsub);
3867   default:
3868     return nullptr;
3869   }
3870 }
3871 
3872 bool AArch64InstructionSelector::selectMergeValues(
3873     MachineInstr &I, MachineRegisterInfo &MRI) {
3874   assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3875   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3876   const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3877   assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3878   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3879 
3880   if (I.getNumOperands() != 3)
3881     return false;
3882 
3883   // Merging 2 s64s into an s128.
3884   if (DstTy == LLT::scalar(128)) {
3885     if (SrcTy.getSizeInBits() != 64)
3886       return false;
3887     Register DstReg = I.getOperand(0).getReg();
3888     Register Src1Reg = I.getOperand(1).getReg();
3889     Register Src2Reg = I.getOperand(2).getReg();
3890     auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3891     MachineInstr *InsMI =
3892         emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3893     if (!InsMI)
3894       return false;
3895     MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3896                                           Src2Reg, /* LaneIdx */ 1, RB, MIB);
3897     if (!Ins2MI)
3898       return false;
3899     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3900     constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3901     I.eraseFromParent();
3902     return true;
3903   }
3904 
3905   if (RB.getID() != AArch64::GPRRegBankID)
3906     return false;
3907 
3908   if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3909     return false;
3910 
3911   auto *DstRC = &AArch64::GPR64RegClass;
3912   Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3913   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3914                                     TII.get(TargetOpcode::SUBREG_TO_REG))
3915                                 .addDef(SubToRegDef)
3916                                 .addImm(0)
3917                                 .addUse(I.getOperand(1).getReg())
3918                                 .addImm(AArch64::sub_32);
3919   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3920   // Need to anyext the second scalar before we can use bfm
3921   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3922                                     TII.get(TargetOpcode::SUBREG_TO_REG))
3923                                 .addDef(SubToRegDef2)
3924                                 .addImm(0)
3925                                 .addUse(I.getOperand(2).getReg())
3926                                 .addImm(AArch64::sub_32);
3927   MachineInstr &BFM =
3928       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3929            .addDef(I.getOperand(0).getReg())
3930            .addUse(SubToRegDef)
3931            .addUse(SubToRegDef2)
3932            .addImm(32)
3933            .addImm(31);
3934   constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3935   constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3936   constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3937   I.eraseFromParent();
3938   return true;
3939 }
3940 
3941 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3942                               const unsigned EltSize) {
3943   // Choose a lane copy opcode and subregister based off of the size of the
3944   // vector's elements.
3945   switch (EltSize) {
3946   case 8:
3947     CopyOpc = AArch64::CPYi8;
3948     ExtractSubReg = AArch64::bsub;
3949     break;
3950   case 16:
3951     CopyOpc = AArch64::CPYi16;
3952     ExtractSubReg = AArch64::hsub;
3953     break;
3954   case 32:
3955     CopyOpc = AArch64::CPYi32;
3956     ExtractSubReg = AArch64::ssub;
3957     break;
3958   case 64:
3959     CopyOpc = AArch64::CPYi64;
3960     ExtractSubReg = AArch64::dsub;
3961     break;
3962   default:
3963     // Unknown size, bail out.
3964     LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3965     return false;
3966   }
3967   return true;
3968 }
3969 
3970 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3971     Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3972     Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3973   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3974   unsigned CopyOpc = 0;
3975   unsigned ExtractSubReg = 0;
3976   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3977     LLVM_DEBUG(
3978         dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3979     return nullptr;
3980   }
3981 
3982   const TargetRegisterClass *DstRC =
3983       getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3984   if (!DstRC) {
3985     LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3986     return nullptr;
3987   }
3988 
3989   const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3990   const LLT &VecTy = MRI.getType(VecReg);
3991   const TargetRegisterClass *VecRC =
3992       getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3993   if (!VecRC) {
3994     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3995     return nullptr;
3996   }
3997 
3998   // The register that we're going to copy into.
3999   Register InsertReg = VecReg;
4000   if (!DstReg)
4001     DstReg = MRI.createVirtualRegister(DstRC);
4002   // If the lane index is 0, we just use a subregister COPY.
4003   if (LaneIdx == 0) {
4004     auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
4005                     .addReg(VecReg, 0, ExtractSubReg);
4006     RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4007     return &*Copy;
4008   }
4009 
4010   // Lane copies require 128-bit wide registers. If we're dealing with an
4011   // unpacked vector, then we need to move up to that width. Insert an implicit
4012   // def and a subregister insert to get us there.
4013   if (VecTy.getSizeInBits() != 128) {
4014     MachineInstr *ScalarToVector = emitScalarToVector(
4015         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
4016     if (!ScalarToVector)
4017       return nullptr;
4018     InsertReg = ScalarToVector->getOperand(0).getReg();
4019   }
4020 
4021   MachineInstr *LaneCopyMI =
4022       MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
4023   constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
4024 
4025   // Make sure that we actually constrain the initial copy.
4026   RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
4027   return LaneCopyMI;
4028 }
4029 
4030 bool AArch64InstructionSelector::selectExtractElt(
4031     MachineInstr &I, MachineRegisterInfo &MRI) {
4032   assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
4033          "unexpected opcode!");
4034   Register DstReg = I.getOperand(0).getReg();
4035   const LLT NarrowTy = MRI.getType(DstReg);
4036   const Register SrcReg = I.getOperand(1).getReg();
4037   const LLT WideTy = MRI.getType(SrcReg);
4038   (void)WideTy;
4039   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
4040          "source register size too small!");
4041   assert(!NarrowTy.isVector() && "cannot extract vector into vector!");
4042 
4043   // Need the lane index to determine the correct copy opcode.
4044   MachineOperand &LaneIdxOp = I.getOperand(2);
4045   assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
4046 
4047   if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4048     LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
4049     return false;
4050   }
4051 
4052   // Find the index to extract from.
4053   auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
4054   if (!VRegAndVal)
4055     return false;
4056   unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
4057 
4058 
4059   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
4060   MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
4061                                                LaneIdx, MIB);
4062   if (!Extract)
4063     return false;
4064 
4065   I.eraseFromParent();
4066   return true;
4067 }
4068 
4069 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
4070     MachineInstr &I, MachineRegisterInfo &MRI) {
4071   unsigned NumElts = I.getNumOperands() - 1;
4072   Register SrcReg = I.getOperand(NumElts).getReg();
4073   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4074   const LLT SrcTy = MRI.getType(SrcReg);
4075 
4076   assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
4077   if (SrcTy.getSizeInBits() > 128) {
4078     LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
4079     return false;
4080   }
4081 
4082   // We implement a split vector operation by treating the sub-vectors as
4083   // scalars and extracting them.
4084   const RegisterBank &DstRB =
4085       *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
4086   for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
4087     Register Dst = I.getOperand(OpIdx).getReg();
4088     MachineInstr *Extract =
4089         emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
4090     if (!Extract)
4091       return false;
4092   }
4093   I.eraseFromParent();
4094   return true;
4095 }
4096 
4097 bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I,
4098                                                      MachineRegisterInfo &MRI) {
4099   assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
4100          "unexpected opcode");
4101 
4102   // TODO: Handle unmerging into GPRs and from scalars to scalars.
4103   if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
4104           AArch64::FPRRegBankID ||
4105       RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
4106           AArch64::FPRRegBankID) {
4107     LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
4108                          "currently unsupported.\n");
4109     return false;
4110   }
4111 
4112   // The last operand is the vector source register, and every other operand is
4113   // a register to unpack into.
4114   unsigned NumElts = I.getNumOperands() - 1;
4115   Register SrcReg = I.getOperand(NumElts).getReg();
4116   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
4117   const LLT WideTy = MRI.getType(SrcReg);
4118   (void)WideTy;
4119   assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
4120          "can only unmerge from vector or s128 types!");
4121   assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
4122          "source register size too small!");
4123 
4124   if (!NarrowTy.isScalar())
4125     return selectSplitVectorUnmerge(I, MRI);
4126 
4127   // Choose a lane copy opcode and subregister based off of the size of the
4128   // vector's elements.
4129   unsigned CopyOpc = 0;
4130   unsigned ExtractSubReg = 0;
4131   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
4132     return false;
4133 
4134   // Set up for the lane copies.
4135   MachineBasicBlock &MBB = *I.getParent();
4136 
4137   // Stores the registers we'll be copying from.
4138   SmallVector<Register, 4> InsertRegs;
4139 
4140   // We'll use the first register twice, so we only need NumElts-1 registers.
4141   unsigned NumInsertRegs = NumElts - 1;
4142 
4143   // If our elements fit into exactly 128 bits, then we can copy from the source
4144   // directly. Otherwise, we need to do a bit of setup with some subregister
4145   // inserts.
4146   if (NarrowTy.getSizeInBits() * NumElts == 128) {
4147     InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
4148   } else {
4149     // No. We have to perform subregister inserts. For each insert, create an
4150     // implicit def and a subregister insert, and save the register we create.
4151     const TargetRegisterClass *RC =
4152         getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI),
4153                               WideTy.getScalarSizeInBits() * NumElts);
4154     unsigned SubReg = 0;
4155     bool Found = getSubRegForClass(RC, TRI, SubReg);
4156     (void)Found;
4157     assert(Found && "expected to find last operand's subeg idx");
4158     for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
4159       Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4160       MachineInstr &ImpDefMI =
4161           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
4162                    ImpDefReg);
4163 
4164       // Now, create the subregister insert from SrcReg.
4165       Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
4166       MachineInstr &InsMI =
4167           *BuildMI(MBB, I, I.getDebugLoc(),
4168                    TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
4169                .addUse(ImpDefReg)
4170                .addUse(SrcReg)
4171                .addImm(SubReg);
4172 
4173       constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
4174       constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
4175 
4176       // Save the register so that we can copy from it after.
4177       InsertRegs.push_back(InsertReg);
4178     }
4179   }
4180 
4181   // Now that we've created any necessary subregister inserts, we can
4182   // create the copies.
4183   //
4184   // Perform the first copy separately as a subregister copy.
4185   Register CopyTo = I.getOperand(0).getReg();
4186   auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
4187                        .addReg(InsertRegs[0], 0, ExtractSubReg);
4188   constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
4189 
4190   // Now, perform the remaining copies as vector lane copies.
4191   unsigned LaneIdx = 1;
4192   for (Register InsReg : InsertRegs) {
4193     Register CopyTo = I.getOperand(LaneIdx).getReg();
4194     MachineInstr &CopyInst =
4195         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
4196              .addUse(InsReg)
4197              .addImm(LaneIdx);
4198     constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
4199     ++LaneIdx;
4200   }
4201 
4202   // Separately constrain the first copy's destination. Because of the
4203   // limitation in constrainOperandRegClass, we can't guarantee that this will
4204   // actually be constrained. So, do it ourselves using the second operand.
4205   const TargetRegisterClass *RC =
4206       MRI.getRegClassOrNull(I.getOperand(1).getReg());
4207   if (!RC) {
4208     LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
4209     return false;
4210   }
4211 
4212   RBI.constrainGenericRegister(CopyTo, *RC, MRI);
4213   I.eraseFromParent();
4214   return true;
4215 }
4216 
4217 bool AArch64InstructionSelector::selectConcatVectors(
4218     MachineInstr &I, MachineRegisterInfo &MRI)  {
4219   assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
4220          "Unexpected opcode");
4221   Register Dst = I.getOperand(0).getReg();
4222   Register Op1 = I.getOperand(1).getReg();
4223   Register Op2 = I.getOperand(2).getReg();
4224   MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB);
4225   if (!ConcatMI)
4226     return false;
4227   I.eraseFromParent();
4228   return true;
4229 }
4230 
4231 unsigned
4232 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
4233                                                   MachineFunction &MF) const {
4234   Type *CPTy = CPVal->getType();
4235   Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
4236 
4237   MachineConstantPool *MCP = MF.getConstantPool();
4238   return MCP->getConstantPoolIndex(CPVal, Alignment);
4239 }
4240 
4241 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
4242     const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
4243   auto &MF = MIRBuilder.getMF();
4244   unsigned CPIdx = emitConstantPoolEntry(CPVal, MF);
4245 
4246   auto Adrp =
4247       MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
4248           .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
4249 
4250   MachineInstr *LoadMI = nullptr;
4251   MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF);
4252   unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType());
4253   switch (Size) {
4254   case 16:
4255     LoadMI =
4256         &*MIRBuilder
4257               .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
4258               .addConstantPoolIndex(CPIdx, 0,
4259                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4260     break;
4261   case 8:
4262     LoadMI =
4263         &*MIRBuilder
4264               .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
4265               .addConstantPoolIndex(CPIdx, 0,
4266                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4267     break;
4268   case 4:
4269     LoadMI =
4270         &*MIRBuilder
4271               .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp})
4272               .addConstantPoolIndex(CPIdx, 0,
4273                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4274     break;
4275   case 2:
4276     LoadMI =
4277         &*MIRBuilder
4278               .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp})
4279               .addConstantPoolIndex(CPIdx, 0,
4280                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4281     break;
4282   default:
4283     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
4284                       << *CPVal->getType());
4285     return nullptr;
4286   }
4287   LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo,
4288                                                     MachineMemOperand::MOLoad,
4289                                                     Size, Align(Size)));
4290   constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
4291   constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
4292   return LoadMI;
4293 }
4294 
4295 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
4296 /// size and RB.
4297 static std::pair<unsigned, unsigned>
4298 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
4299   unsigned Opc, SubregIdx;
4300   if (RB.getID() == AArch64::GPRRegBankID) {
4301     if (EltSize == 16) {
4302       Opc = AArch64::INSvi16gpr;
4303       SubregIdx = AArch64::ssub;
4304     } else if (EltSize == 32) {
4305       Opc = AArch64::INSvi32gpr;
4306       SubregIdx = AArch64::ssub;
4307     } else if (EltSize == 64) {
4308       Opc = AArch64::INSvi64gpr;
4309       SubregIdx = AArch64::dsub;
4310     } else {
4311       llvm_unreachable("invalid elt size!");
4312     }
4313   } else {
4314     if (EltSize == 8) {
4315       Opc = AArch64::INSvi8lane;
4316       SubregIdx = AArch64::bsub;
4317     } else if (EltSize == 16) {
4318       Opc = AArch64::INSvi16lane;
4319       SubregIdx = AArch64::hsub;
4320     } else if (EltSize == 32) {
4321       Opc = AArch64::INSvi32lane;
4322       SubregIdx = AArch64::ssub;
4323     } else if (EltSize == 64) {
4324       Opc = AArch64::INSvi64lane;
4325       SubregIdx = AArch64::dsub;
4326     } else {
4327       llvm_unreachable("invalid elt size!");
4328     }
4329   }
4330   return std::make_pair(Opc, SubregIdx);
4331 }
4332 
4333 MachineInstr *AArch64InstructionSelector::emitInstr(
4334     unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps,
4335     std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder,
4336     const ComplexRendererFns &RenderFns) const {
4337   assert(Opcode && "Expected an opcode?");
4338   assert(!isPreISelGenericOpcode(Opcode) &&
4339          "Function should only be used to produce selected instructions!");
4340   auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps);
4341   if (RenderFns)
4342     for (auto &Fn : *RenderFns)
4343       Fn(MI);
4344   constrainSelectedInstRegOperands(*MI, TII, TRI, RBI);
4345   return &*MI;
4346 }
4347 
4348 MachineInstr *AArch64InstructionSelector::emitAddSub(
4349     const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode,
4350     Register Dst, MachineOperand &LHS, MachineOperand &RHS,
4351     MachineIRBuilder &MIRBuilder) const {
4352   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4353   assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4354   auto Ty = MRI.getType(LHS.getReg());
4355   assert(!Ty.isVector() && "Expected a scalar or pointer?");
4356   unsigned Size = Ty.getSizeInBits();
4357   assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only");
4358   bool Is32Bit = Size == 32;
4359 
4360   // INSTRri form with positive arithmetic immediate.
4361   if (auto Fns = selectArithImmed(RHS))
4362     return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS},
4363                      MIRBuilder, Fns);
4364 
4365   // INSTRri form with negative arithmetic immediate.
4366   if (auto Fns = selectNegArithImmed(RHS))
4367     return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS},
4368                      MIRBuilder, Fns);
4369 
4370   // INSTRrx form.
4371   if (auto Fns = selectArithExtendedRegister(RHS))
4372     return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS},
4373                      MIRBuilder, Fns);
4374 
4375   // INSTRrs form.
4376   if (auto Fns = selectShiftedRegister(RHS))
4377     return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS},
4378                      MIRBuilder, Fns);
4379   return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS},
4380                    MIRBuilder);
4381 }
4382 
4383 MachineInstr *
4384 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
4385                                     MachineOperand &RHS,
4386                                     MachineIRBuilder &MIRBuilder) const {
4387   const std::array<std::array<unsigned, 2>, 5> OpcTable{
4388       {{AArch64::ADDXri, AArch64::ADDWri},
4389        {AArch64::ADDXrs, AArch64::ADDWrs},
4390        {AArch64::ADDXrr, AArch64::ADDWrr},
4391        {AArch64::SUBXri, AArch64::SUBWri},
4392        {AArch64::ADDXrx, AArch64::ADDWrx}}};
4393   return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder);
4394 }
4395 
4396 MachineInstr *
4397 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS,
4398                                      MachineOperand &RHS,
4399                                      MachineIRBuilder &MIRBuilder) const {
4400   const std::array<std::array<unsigned, 2>, 5> OpcTable{
4401       {{AArch64::ADDSXri, AArch64::ADDSWri},
4402        {AArch64::ADDSXrs, AArch64::ADDSWrs},
4403        {AArch64::ADDSXrr, AArch64::ADDSWrr},
4404        {AArch64::SUBSXri, AArch64::SUBSWri},
4405        {AArch64::ADDSXrx, AArch64::ADDSWrx}}};
4406   return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4407 }
4408 
4409 MachineInstr *
4410 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS,
4411                                      MachineOperand &RHS,
4412                                      MachineIRBuilder &MIRBuilder) const {
4413   const std::array<std::array<unsigned, 2>, 5> OpcTable{
4414       {{AArch64::SUBSXri, AArch64::SUBSWri},
4415        {AArch64::SUBSXrs, AArch64::SUBSWrs},
4416        {AArch64::SUBSXrr, AArch64::SUBSWrr},
4417        {AArch64::ADDSXri, AArch64::ADDSWri},
4418        {AArch64::SUBSXrx, AArch64::SUBSWrx}}};
4419   return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder);
4420 }
4421 
4422 MachineInstr *
4423 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
4424                                     MachineIRBuilder &MIRBuilder) const {
4425   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4426   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
4427   auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass;
4428   return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder);
4429 }
4430 
4431 MachineInstr *
4432 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS,
4433                                     MachineIRBuilder &MIRBuilder) const {
4434   assert(LHS.isReg() && RHS.isReg() && "Expected register operands?");
4435   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4436   LLT Ty = MRI.getType(LHS.getReg());
4437   unsigned RegSize = Ty.getSizeInBits();
4438   bool Is32Bit = (RegSize == 32);
4439   const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri},
4440                                    {AArch64::ANDSXrs, AArch64::ANDSWrs},
4441                                    {AArch64::ANDSXrr, AArch64::ANDSWrr}};
4442   // ANDS needs a logical immediate for its immediate form. Check if we can
4443   // fold one in.
4444   if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) {
4445     int64_t Imm = ValAndVReg->Value.getSExtValue();
4446 
4447     if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) {
4448       auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS});
4449       TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
4450       constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
4451       return &*TstMI;
4452     }
4453   }
4454 
4455   if (auto Fns = selectLogicalShiftedRegister(RHS))
4456     return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns);
4457   return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder);
4458 }
4459 
4460 MachineInstr *AArch64InstructionSelector::emitIntegerCompare(
4461     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4462     MachineIRBuilder &MIRBuilder) const {
4463   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
4464   assert(Predicate.isPredicate() && "Expected predicate?");
4465   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4466   LLT CmpTy = MRI.getType(LHS.getReg());
4467   assert(!CmpTy.isVector() && "Expected scalar or pointer");
4468   unsigned Size = CmpTy.getSizeInBits();
4469   (void)Size;
4470   assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?");
4471   // Fold the compare into a cmn or tst if possible.
4472   if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder))
4473     return FoldCmp;
4474   auto Dst = MRI.cloneVirtualRegister(LHS.getReg());
4475   return emitSUBS(Dst, LHS, RHS, MIRBuilder);
4476 }
4477 
4478 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp(
4479     Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const {
4480   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4481 #ifndef NDEBUG
4482   LLT Ty = MRI.getType(Dst);
4483   assert(!Ty.isVector() && Ty.getSizeInBits() == 32 &&
4484          "Expected a 32-bit scalar register?");
4485 #endif
4486   const Register ZReg = AArch64::WZR;
4487   AArch64CC::CondCode CC1, CC2;
4488   changeFCMPPredToAArch64CC(Pred, CC1, CC2);
4489   auto InvCC1 = AArch64CC::getInvertedCondCode(CC1);
4490   if (CC2 == AArch64CC::AL)
4491     return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1,
4492                      MIRBuilder);
4493   const TargetRegisterClass *RC = &AArch64::GPR32RegClass;
4494   Register Def1Reg = MRI.createVirtualRegister(RC);
4495   Register Def2Reg = MRI.createVirtualRegister(RC);
4496   auto InvCC2 = AArch64CC::getInvertedCondCode(CC2);
4497   emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder);
4498   emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder);
4499   auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg});
4500   constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI);
4501   return &*OrMI;
4502 }
4503 
4504 MachineInstr *
4505 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS,
4506                                           MachineIRBuilder &MIRBuilder,
4507                                           Optional<CmpInst::Predicate> Pred) const {
4508   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4509   LLT Ty = MRI.getType(LHS);
4510   if (Ty.isVector())
4511     return nullptr;
4512   unsigned OpSize = Ty.getSizeInBits();
4513   if (OpSize != 32 && OpSize != 64)
4514     return nullptr;
4515 
4516   // If this is a compare against +0.0, then we don't have
4517   // to explicitly materialize a constant.
4518   const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI);
4519   bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
4520 
4521   auto IsEqualityPred = [](CmpInst::Predicate P) {
4522     return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE ||
4523            P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE;
4524   };
4525   if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) {
4526     // Try commutating the operands.
4527     const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI);
4528     if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) {
4529       ShouldUseImm = true;
4530       std::swap(LHS, RHS);
4531     }
4532   }
4533   unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
4534                               {AArch64::FCMPSri, AArch64::FCMPDri}};
4535   unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64];
4536 
4537   // Partially build the compare. Decide if we need to add a use for the
4538   // third operand based off whether or not we're comparing against 0.0.
4539   auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS);
4540   if (!ShouldUseImm)
4541     CmpMI.addUse(RHS);
4542   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4543   return &*CmpMI;
4544 }
4545 
4546 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
4547     Optional<Register> Dst, Register Op1, Register Op2,
4548     MachineIRBuilder &MIRBuilder) const {
4549   // We implement a vector concat by:
4550   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
4551   // 2. Insert the upper vector into the destination's upper element
4552   // TODO: some of this code is common with G_BUILD_VECTOR handling.
4553   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
4554 
4555   const LLT Op1Ty = MRI.getType(Op1);
4556   const LLT Op2Ty = MRI.getType(Op2);
4557 
4558   if (Op1Ty != Op2Ty) {
4559     LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
4560     return nullptr;
4561   }
4562   assert(Op1Ty.isVector() && "Expected a vector for vector concat");
4563 
4564   if (Op1Ty.getSizeInBits() >= 128) {
4565     LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
4566     return nullptr;
4567   }
4568 
4569   // At the moment we just support 64 bit vector concats.
4570   if (Op1Ty.getSizeInBits() != 64) {
4571     LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
4572     return nullptr;
4573   }
4574 
4575   const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
4576   const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
4577   const TargetRegisterClass *DstRC =
4578       getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
4579 
4580   MachineInstr *WidenedOp1 =
4581       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
4582   MachineInstr *WidenedOp2 =
4583       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
4584   if (!WidenedOp1 || !WidenedOp2) {
4585     LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
4586     return nullptr;
4587   }
4588 
4589   // Now do the insert of the upper element.
4590   unsigned InsertOpc, InsSubRegIdx;
4591   std::tie(InsertOpc, InsSubRegIdx) =
4592       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
4593 
4594   if (!Dst)
4595     Dst = MRI.createVirtualRegister(DstRC);
4596   auto InsElt =
4597       MIRBuilder
4598           .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
4599           .addImm(1) /* Lane index */
4600           .addUse(WidenedOp2->getOperand(0).getReg())
4601           .addImm(0);
4602   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4603   return &*InsElt;
4604 }
4605 
4606 MachineInstr *
4607 AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1,
4608                                       Register Src2, AArch64CC::CondCode Pred,
4609                                       MachineIRBuilder &MIRBuilder) const {
4610   auto &MRI = *MIRBuilder.getMRI();
4611   const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst);
4612   // If we used a register class, then this won't necessarily have an LLT.
4613   // Compute the size based off whether or not we have a class or bank.
4614   unsigned Size;
4615   if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
4616     Size = TRI.getRegSizeInBits(*RC);
4617   else
4618     Size = MRI.getType(Dst).getSizeInBits();
4619   // Some opcodes use s1.
4620   assert(Size <= 64 && "Expected 64 bits or less only!");
4621   static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr};
4622   unsigned Opc = OpcTable[Size == 64];
4623   auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred);
4624   constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI);
4625   return &*CSINC;
4626 }
4627 
4628 std::pair<MachineInstr *, AArch64CC::CondCode>
4629 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst,
4630                                            MachineOperand &LHS,
4631                                            MachineOperand &RHS,
4632                                            MachineIRBuilder &MIRBuilder) const {
4633   switch (Opcode) {
4634   default:
4635     llvm_unreachable("Unexpected opcode!");
4636   case TargetOpcode::G_SADDO:
4637     return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4638   case TargetOpcode::G_UADDO:
4639     return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS);
4640   case TargetOpcode::G_SSUBO:
4641     return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS);
4642   case TargetOpcode::G_USUBO:
4643     return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO);
4644   }
4645 }
4646 
4647 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) {
4648   MachineRegisterInfo &MRI = *MIB.getMRI();
4649   // We want to recognize this pattern:
4650   //
4651   // $z = G_FCMP pred, $x, $y
4652   // ...
4653   // $w = G_SELECT $z, $a, $b
4654   //
4655   // Where the value of $z is *only* ever used by the G_SELECT (possibly with
4656   // some copies/truncs in between.)
4657   //
4658   // If we see this, then we can emit something like this:
4659   //
4660   // fcmp $x, $y
4661   // fcsel $w, $a, $b, pred
4662   //
4663   // Rather than emitting both of the rather long sequences in the standard
4664   // G_FCMP/G_SELECT select methods.
4665 
4666   // First, check if the condition is defined by a compare.
4667   MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
4668   while (CondDef) {
4669     // We can only fold if all of the defs have one use.
4670     Register CondDefReg = CondDef->getOperand(0).getReg();
4671     if (!MRI.hasOneNonDBGUse(CondDefReg)) {
4672       // Unless it's another select.
4673       for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
4674         if (CondDef == &UI)
4675           continue;
4676         if (UI.getOpcode() != TargetOpcode::G_SELECT)
4677           return false;
4678       }
4679     }
4680 
4681     // We can skip over G_TRUNC since the condition is 1-bit.
4682     // Truncating/extending can have no impact on the value.
4683     unsigned Opc = CondDef->getOpcode();
4684     if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
4685       break;
4686 
4687     // Can't see past copies from physregs.
4688     if (Opc == TargetOpcode::COPY &&
4689         Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
4690       return false;
4691 
4692     CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
4693   }
4694 
4695   // Is the condition defined by a compare?
4696   if (!CondDef)
4697     return false;
4698 
4699   unsigned CondOpc = CondDef->getOpcode();
4700   if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4701     return false;
4702 
4703   AArch64CC::CondCode CondCode;
4704   if (CondOpc == TargetOpcode::G_ICMP) {
4705     auto Pred =
4706         static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4707     CondCode = changeICMPPredToAArch64CC(Pred);
4708     emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4709                        CondDef->getOperand(1), MIB);
4710   } else {
4711     // Get the condition code for the select.
4712     auto Pred =
4713         static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate());
4714     AArch64CC::CondCode CondCode2;
4715     changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2);
4716 
4717     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4718     // instructions to emit the comparison.
4719     // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4720     // unnecessary.
4721     if (CondCode2 != AArch64CC::AL)
4722       return false;
4723 
4724     if (!emitFPCompare(CondDef->getOperand(2).getReg(),
4725                        CondDef->getOperand(3).getReg(), MIB)) {
4726       LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4727       return false;
4728     }
4729   }
4730 
4731   // Emit the select.
4732   emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(),
4733              I.getOperand(3).getReg(), CondCode, MIB);
4734   I.eraseFromParent();
4735   return true;
4736 }
4737 
4738 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4739     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4740     MachineIRBuilder &MIRBuilder) const {
4741   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4742          "Unexpected MachineOperand");
4743   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4744   // We want to find this sort of thing:
4745   // x = G_SUB 0, y
4746   // G_ICMP z, x
4747   //
4748   // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4749   // e.g:
4750   //
4751   // cmn z, y
4752 
4753   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4754   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4755   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4756   auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate());
4757   // Given this:
4758   //
4759   // x = G_SUB 0, y
4760   // G_ICMP x, z
4761   //
4762   // Produce this:
4763   //
4764   // cmn y, z
4765   if (isCMN(LHSDef, P, MRI))
4766     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4767 
4768   // Same idea here, but with the RHS of the compare instead:
4769   //
4770   // Given this:
4771   //
4772   // x = G_SUB 0, y
4773   // G_ICMP z, x
4774   //
4775   // Produce this:
4776   //
4777   // cmn z, y
4778   if (isCMN(RHSDef, P, MRI))
4779     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4780 
4781   // Given this:
4782   //
4783   // z = G_AND x, y
4784   // G_ICMP z, 0
4785   //
4786   // Produce this if the compare is signed:
4787   //
4788   // tst x, y
4789   if (!CmpInst::isUnsigned(P) && LHSDef &&
4790       LHSDef->getOpcode() == TargetOpcode::G_AND) {
4791     // Make sure that the RHS is 0.
4792     auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4793     if (!ValAndVReg || ValAndVReg->Value != 0)
4794       return nullptr;
4795 
4796     return emitTST(LHSDef->getOperand(1),
4797                    LHSDef->getOperand(2), MIRBuilder);
4798   }
4799 
4800   return nullptr;
4801 }
4802 
4803 bool AArch64InstructionSelector::selectShuffleVector(
4804     MachineInstr &I, MachineRegisterInfo &MRI) {
4805   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4806   Register Src1Reg = I.getOperand(1).getReg();
4807   const LLT Src1Ty = MRI.getType(Src1Reg);
4808   Register Src2Reg = I.getOperand(2).getReg();
4809   const LLT Src2Ty = MRI.getType(Src2Reg);
4810   ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4811 
4812   MachineBasicBlock &MBB = *I.getParent();
4813   MachineFunction &MF = *MBB.getParent();
4814   LLVMContext &Ctx = MF.getFunction().getContext();
4815 
4816   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4817   // it's originated from a <1 x T> type. Those should have been lowered into
4818   // G_BUILD_VECTOR earlier.
4819   if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4820     LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4821     return false;
4822   }
4823 
4824   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4825 
4826   SmallVector<Constant *, 64> CstIdxs;
4827   for (int Val : Mask) {
4828     // For now, any undef indexes we'll just assume to be 0. This should be
4829     // optimized in future, e.g. to select DUP etc.
4830     Val = Val < 0 ? 0 : Val;
4831     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4832       unsigned Offset = Byte + Val * BytesPerElt;
4833       CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4834     }
4835   }
4836 
4837   // Use a constant pool to load the index vector for TBL.
4838   Constant *CPVal = ConstantVector::get(CstIdxs);
4839   MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB);
4840   if (!IndexLoad) {
4841     LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4842     return false;
4843   }
4844 
4845   if (DstTy.getSizeInBits() != 128) {
4846     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4847     // This case can be done with TBL1.
4848     MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB);
4849     if (!Concat) {
4850       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4851       return false;
4852     }
4853 
4854     // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4855     IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass,
4856                                    IndexLoad->getOperand(0).getReg(), MIB);
4857 
4858     auto TBL1 = MIB.buildInstr(
4859         AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4860         {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4861     constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4862 
4863     auto Copy =
4864         MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4865             .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4866     RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4867     I.eraseFromParent();
4868     return true;
4869   }
4870 
4871   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4872   // Q registers for regalloc.
4873   SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg};
4874   auto RegSeq = createQTuple(Regs, MIB);
4875   auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4876                              {RegSeq, IndexLoad->getOperand(0)});
4877   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4878   I.eraseFromParent();
4879   return true;
4880 }
4881 
4882 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4883     Optional<Register> DstReg, Register SrcReg, Register EltReg,
4884     unsigned LaneIdx, const RegisterBank &RB,
4885     MachineIRBuilder &MIRBuilder) const {
4886   MachineInstr *InsElt = nullptr;
4887   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4888   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4889 
4890   // Create a register to define with the insert if one wasn't passed in.
4891   if (!DstReg)
4892     DstReg = MRI.createVirtualRegister(DstRC);
4893 
4894   unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4895   unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4896 
4897   if (RB.getID() == AArch64::FPRRegBankID) {
4898     auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4899     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4900                  .addImm(LaneIdx)
4901                  .addUse(InsSub->getOperand(0).getReg())
4902                  .addImm(0);
4903   } else {
4904     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4905                  .addImm(LaneIdx)
4906                  .addUse(EltReg);
4907   }
4908 
4909   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4910   return InsElt;
4911 }
4912 
4913 bool AArch64InstructionSelector::selectUSMovFromExtend(
4914     MachineInstr &MI, MachineRegisterInfo &MRI) {
4915   if (MI.getOpcode() != TargetOpcode::G_SEXT &&
4916       MI.getOpcode() != TargetOpcode::G_ZEXT &&
4917       MI.getOpcode() != TargetOpcode::G_ANYEXT)
4918     return false;
4919   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT;
4920   const Register DefReg = MI.getOperand(0).getReg();
4921   const LLT DstTy = MRI.getType(DefReg);
4922   unsigned DstSize = DstTy.getSizeInBits();
4923 
4924   if (DstSize != 32 && DstSize != 64)
4925     return false;
4926 
4927   MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT,
4928                                        MI.getOperand(1).getReg(), MRI);
4929   int64_t Lane;
4930   if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane)))
4931     return false;
4932   Register Src0 = Extract->getOperand(1).getReg();
4933 
4934   const LLT &VecTy = MRI.getType(Src0);
4935 
4936   if (VecTy.getSizeInBits() != 128) {
4937     const MachineInstr *ScalarToVector = emitScalarToVector(
4938         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB);
4939     assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!");
4940     Src0 = ScalarToVector->getOperand(0).getReg();
4941   }
4942 
4943   unsigned Opcode;
4944   if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32)
4945     Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32;
4946   else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16)
4947     Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16;
4948   else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8)
4949     Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8;
4950   else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16)
4951     Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16;
4952   else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8)
4953     Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8;
4954   else
4955     llvm_unreachable("Unexpected type combo for S/UMov!");
4956 
4957   // We may need to generate one of these, depending on the type and sign of the
4958   // input:
4959   //  DstReg = SMOV Src0, Lane;
4960   //  NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32;
4961   MachineInstr *ExtI = nullptr;
4962   if (DstSize == 64 && !IsSigned) {
4963     Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4964     MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane);
4965     ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
4966                .addImm(0)
4967                .addUse(NewReg)
4968                .addImm(AArch64::sub_32);
4969     RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
4970   } else
4971     ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane);
4972 
4973   constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
4974   MI.eraseFromParent();
4975   return true;
4976 }
4977 
4978 bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I,
4979                                                  MachineRegisterInfo &MRI) {
4980   assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4981 
4982   // Get information on the destination.
4983   Register DstReg = I.getOperand(0).getReg();
4984   const LLT DstTy = MRI.getType(DstReg);
4985   unsigned VecSize = DstTy.getSizeInBits();
4986 
4987   // Get information on the element we want to insert into the destination.
4988   Register EltReg = I.getOperand(2).getReg();
4989   const LLT EltTy = MRI.getType(EltReg);
4990   unsigned EltSize = EltTy.getSizeInBits();
4991   if (EltSize < 16 || EltSize > 64)
4992     return false; // Don't support all element types yet.
4993 
4994   // Find the definition of the index. Bail out if it's not defined by a
4995   // G_CONSTANT.
4996   Register IdxReg = I.getOperand(3).getReg();
4997   auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI);
4998   if (!VRegAndVal)
4999     return false;
5000   unsigned LaneIdx = VRegAndVal->Value.getSExtValue();
5001 
5002   // Perform the lane insert.
5003   Register SrcReg = I.getOperand(1).getReg();
5004   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5005 
5006   if (VecSize < 128) {
5007     // If the vector we're inserting into is smaller than 128 bits, widen it
5008     // to 128 to do the insert.
5009     MachineInstr *ScalarToVec =
5010         emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB);
5011     if (!ScalarToVec)
5012       return false;
5013     SrcReg = ScalarToVec->getOperand(0).getReg();
5014   }
5015 
5016   // Create an insert into a new FPR128 register.
5017   // Note that if our vector is already 128 bits, we end up emitting an extra
5018   // register.
5019   MachineInstr *InsMI =
5020       emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB);
5021 
5022   if (VecSize < 128) {
5023     // If we had to widen to perform the insert, then we have to demote back to
5024     // the original size to get the result we want.
5025     Register DemoteVec = InsMI->getOperand(0).getReg();
5026     const TargetRegisterClass *RC =
5027         getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
5028     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5029       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5030       return false;
5031     }
5032     unsigned SubReg = 0;
5033     if (!getSubRegForClass(RC, TRI, SubReg))
5034       return false;
5035     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5036       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
5037                         << "\n");
5038       return false;
5039     }
5040     MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {})
5041         .addReg(DemoteVec, 0, SubReg);
5042     RBI.constrainGenericRegister(DstReg, *RC, MRI);
5043   } else {
5044     // No widening needed.
5045     InsMI->getOperand(0).setReg(DstReg);
5046     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
5047   }
5048 
5049   I.eraseFromParent();
5050   return true;
5051 }
5052 
5053 MachineInstr *
5054 AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV,
5055                                                MachineIRBuilder &MIRBuilder,
5056                                                MachineRegisterInfo &MRI) {
5057   LLT DstTy = MRI.getType(Dst);
5058   unsigned DstSize = DstTy.getSizeInBits();
5059   if (CV->isNullValue()) {
5060     if (DstSize == 128) {
5061       auto Mov =
5062           MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0);
5063       constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI);
5064       return &*Mov;
5065     }
5066 
5067     if (DstSize == 64) {
5068       auto Mov =
5069           MIRBuilder
5070               .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {})
5071               .addImm(0);
5072       auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {})
5073                       .addReg(Mov.getReg(0), 0, AArch64::dsub);
5074       RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI);
5075       return &*Copy;
5076     }
5077   }
5078 
5079   auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder);
5080   if (!CPLoad) {
5081     LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!");
5082     return nullptr;
5083   }
5084 
5085   auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0));
5086   RBI.constrainGenericRegister(
5087       Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI);
5088   return &*Copy;
5089 }
5090 
5091 bool AArch64InstructionSelector::tryOptConstantBuildVec(
5092     MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) {
5093   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5094   unsigned DstSize = DstTy.getSizeInBits();
5095   assert(DstSize <= 128 && "Unexpected build_vec type!");
5096   if (DstSize < 32)
5097     return false;
5098   // Check if we're building a constant vector, in which case we want to
5099   // generate a constant pool load instead of a vector insert sequence.
5100   SmallVector<Constant *, 16> Csts;
5101   for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
5102     // Try to find G_CONSTANT or G_FCONSTANT
5103     auto *OpMI =
5104         getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
5105     if (OpMI)
5106       Csts.emplace_back(
5107           const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
5108     else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
5109                                   I.getOperand(Idx).getReg(), MRI)))
5110       Csts.emplace_back(
5111           const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
5112     else
5113       return false;
5114   }
5115   Constant *CV = ConstantVector::get(Csts);
5116   if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI))
5117     return false;
5118   I.eraseFromParent();
5119   return true;
5120 }
5121 
5122 bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg(
5123     MachineInstr &I, MachineRegisterInfo &MRI) {
5124   // Given:
5125   //  %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef
5126   //
5127   // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt.
5128   Register Dst = I.getOperand(0).getReg();
5129   Register EltReg = I.getOperand(1).getReg();
5130   LLT EltTy = MRI.getType(EltReg);
5131   // If the index isn't on the same bank as its elements, then this can't be a
5132   // SUBREG_TO_REG.
5133   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
5134   const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI);
5135   if (EltRB != DstRB)
5136     return false;
5137   if (any_of(make_range(I.operands_begin() + 2, I.operands_end()),
5138              [&MRI](const MachineOperand &Op) {
5139                return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(),
5140                                     MRI);
5141              }))
5142     return false;
5143   unsigned SubReg;
5144   const TargetRegisterClass *EltRC =
5145       getMinClassForRegBank(EltRB, EltTy.getSizeInBits());
5146   if (!EltRC)
5147     return false;
5148   const TargetRegisterClass *DstRC =
5149       getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits());
5150   if (!DstRC)
5151     return false;
5152   if (!getSubRegForClass(EltRC, TRI, SubReg))
5153     return false;
5154   auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {})
5155                          .addImm(0)
5156                          .addUse(EltReg)
5157                          .addImm(SubReg);
5158   I.eraseFromParent();
5159   constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI);
5160   return RBI.constrainGenericRegister(Dst, *DstRC, MRI);
5161 }
5162 
5163 bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I,
5164                                                    MachineRegisterInfo &MRI) {
5165   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
5166   // Until we port more of the optimized selections, for now just use a vector
5167   // insert sequence.
5168   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
5169   const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
5170   unsigned EltSize = EltTy.getSizeInBits();
5171 
5172   if (tryOptConstantBuildVec(I, DstTy, MRI))
5173     return true;
5174   if (tryOptBuildVecToSubregToReg(I, MRI))
5175     return true;
5176 
5177   if (EltSize < 16 || EltSize > 64)
5178     return false; // Don't support all element types yet.
5179   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
5180 
5181   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
5182   MachineInstr *ScalarToVec =
5183       emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
5184                          I.getOperand(1).getReg(), MIB);
5185   if (!ScalarToVec)
5186     return false;
5187 
5188   Register DstVec = ScalarToVec->getOperand(0).getReg();
5189   unsigned DstSize = DstTy.getSizeInBits();
5190 
5191   // Keep track of the last MI we inserted. Later on, we might be able to save
5192   // a copy using it.
5193   MachineInstr *PrevMI = nullptr;
5194   for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
5195     // Note that if we don't do a subregister copy, we can end up making an
5196     // extra register.
5197     PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
5198                               MIB);
5199     DstVec = PrevMI->getOperand(0).getReg();
5200   }
5201 
5202   // If DstTy's size in bits is less than 128, then emit a subregister copy
5203   // from DstVec to the last register we've defined.
5204   if (DstSize < 128) {
5205     // Force this to be FPR using the destination vector.
5206     const TargetRegisterClass *RC =
5207         getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
5208     if (!RC)
5209       return false;
5210     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
5211       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
5212       return false;
5213     }
5214 
5215     unsigned SubReg = 0;
5216     if (!getSubRegForClass(RC, TRI, SubReg))
5217       return false;
5218     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
5219       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
5220                         << "\n");
5221       return false;
5222     }
5223 
5224     Register Reg = MRI.createVirtualRegister(RC);
5225     Register DstReg = I.getOperand(0).getReg();
5226 
5227     MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg);
5228     MachineOperand &RegOp = I.getOperand(1);
5229     RegOp.setReg(Reg);
5230     RBI.constrainGenericRegister(DstReg, *RC, MRI);
5231   } else {
5232     // We don't need a subregister copy. Save a copy by re-using the
5233     // destination register on the final insert.
5234     assert(PrevMI && "PrevMI was null?");
5235     PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
5236     constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
5237   }
5238 
5239   I.eraseFromParent();
5240   return true;
5241 }
5242 
5243 bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc,
5244                                                            unsigned NumVecs,
5245                                                            MachineInstr &I) {
5246   assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
5247   assert(Opc && "Expected an opcode?");
5248   assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors");
5249   auto &MRI = *MIB.getMRI();
5250   LLT Ty = MRI.getType(I.getOperand(0).getReg());
5251   unsigned Size = Ty.getSizeInBits();
5252   assert((Size == 64 || Size == 128) &&
5253          "Destination must be 64 bits or 128 bits?");
5254   unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0;
5255   auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg();
5256   assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?");
5257   auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr});
5258   Load.cloneMemRefs(I);
5259   constrainSelectedInstRegOperands(*Load, TII, TRI, RBI);
5260   Register SelectedLoadDst = Load->getOperand(0).getReg();
5261   for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
5262     auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {})
5263                    .addReg(SelectedLoadDst, 0, SubReg + Idx);
5264     // Emit the subreg copies and immediately select them.
5265     // FIXME: We should refactor our copy code into an emitCopy helper and
5266     // clean up uses of this pattern elsewhere in the selector.
5267     selectCopy(*Vec, TII, MRI, TRI, RBI);
5268   }
5269   return true;
5270 }
5271 
5272 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
5273     MachineInstr &I, MachineRegisterInfo &MRI) {
5274   // Find the intrinsic ID.
5275   unsigned IntrinID = I.getIntrinsicID();
5276 
5277   const LLT S8 = LLT::scalar(8);
5278   const LLT S16 = LLT::scalar(16);
5279   const LLT S32 = LLT::scalar(32);
5280   const LLT S64 = LLT::scalar(64);
5281   const LLT P0 = LLT::pointer(0, 64);
5282   // Select the instruction.
5283   switch (IntrinID) {
5284   default:
5285     return false;
5286   case Intrinsic::aarch64_ldxp:
5287   case Intrinsic::aarch64_ldaxp: {
5288     auto NewI = MIB.buildInstr(
5289         IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX,
5290         {I.getOperand(0).getReg(), I.getOperand(1).getReg()},
5291         {I.getOperand(3)});
5292     NewI.cloneMemRefs(I);
5293     constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
5294     break;
5295   }
5296   case Intrinsic::trap:
5297     MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1);
5298     break;
5299   case Intrinsic::debugtrap:
5300     MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
5301     break;
5302   case Intrinsic::ubsantrap:
5303     MIB.buildInstr(AArch64::BRK, {}, {})
5304         .addImm(I.getOperand(1).getImm() | ('U' << 8));
5305     break;
5306   case Intrinsic::aarch64_neon_ld2: {
5307     LLT Ty = MRI.getType(I.getOperand(0).getReg());
5308     unsigned Opc = 0;
5309     if (Ty == LLT::fixed_vector(8, S8))
5310       Opc = AArch64::LD2Twov8b;
5311     else if (Ty == LLT::fixed_vector(16, S8))
5312       Opc = AArch64::LD2Twov16b;
5313     else if (Ty == LLT::fixed_vector(4, S16))
5314       Opc = AArch64::LD2Twov4h;
5315     else if (Ty == LLT::fixed_vector(8, S16))
5316       Opc = AArch64::LD2Twov8h;
5317     else if (Ty == LLT::fixed_vector(2, S32))
5318       Opc = AArch64::LD2Twov2s;
5319     else if (Ty == LLT::fixed_vector(4, S32))
5320       Opc = AArch64::LD2Twov4s;
5321     else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5322       Opc = AArch64::LD2Twov2d;
5323     else if (Ty == S64 || Ty == P0)
5324       Opc = AArch64::LD1Twov1d;
5325     else
5326       llvm_unreachable("Unexpected type for ld2!");
5327     selectVectorLoadIntrinsic(Opc, 2, I);
5328     break;
5329   }
5330   case Intrinsic::aarch64_neon_ld4: {
5331     LLT Ty = MRI.getType(I.getOperand(0).getReg());
5332     unsigned Opc = 0;
5333     if (Ty == LLT::fixed_vector(8, S8))
5334       Opc = AArch64::LD4Fourv8b;
5335     else if (Ty == LLT::fixed_vector(16, S8))
5336       Opc = AArch64::LD4Fourv16b;
5337     else if (Ty == LLT::fixed_vector(4, S16))
5338       Opc = AArch64::LD4Fourv4h;
5339     else if (Ty == LLT::fixed_vector(8, S16))
5340       Opc = AArch64::LD4Fourv8h;
5341     else if (Ty == LLT::fixed_vector(2, S32))
5342       Opc = AArch64::LD4Fourv2s;
5343     else if (Ty == LLT::fixed_vector(4, S32))
5344       Opc = AArch64::LD4Fourv4s;
5345     else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5346       Opc = AArch64::LD4Fourv2d;
5347     else if (Ty == S64 || Ty == P0)
5348       Opc = AArch64::LD1Fourv1d;
5349     else
5350       llvm_unreachable("Unexpected type for ld4!");
5351     selectVectorLoadIntrinsic(Opc, 4, I);
5352     break;
5353   }
5354   case Intrinsic::aarch64_neon_st2: {
5355     Register Src1 = I.getOperand(1).getReg();
5356     Register Src2 = I.getOperand(2).getReg();
5357     Register Ptr = I.getOperand(3).getReg();
5358     LLT Ty = MRI.getType(Src1);
5359     unsigned Opc;
5360     if (Ty == LLT::fixed_vector(8, S8))
5361       Opc = AArch64::ST2Twov8b;
5362     else if (Ty == LLT::fixed_vector(16, S8))
5363       Opc = AArch64::ST2Twov16b;
5364     else if (Ty == LLT::fixed_vector(4, S16))
5365       Opc = AArch64::ST2Twov4h;
5366     else if (Ty == LLT::fixed_vector(8, S16))
5367       Opc = AArch64::ST2Twov8h;
5368     else if (Ty == LLT::fixed_vector(2, S32))
5369       Opc = AArch64::ST2Twov2s;
5370     else if (Ty == LLT::fixed_vector(4, S32))
5371       Opc = AArch64::ST2Twov4s;
5372     else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0))
5373       Opc = AArch64::ST2Twov2d;
5374     else if (Ty == S64 || Ty == P0)
5375       Opc = AArch64::ST1Twov1d;
5376     else
5377       llvm_unreachable("Unexpected type for st2!");
5378     SmallVector<Register, 2> Regs = {Src1, Src2};
5379     Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB)
5380                                                : createDTuple(Regs, MIB);
5381     auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr});
5382     Store.cloneMemRefs(I);
5383     constrainSelectedInstRegOperands(*Store, TII, TRI, RBI);
5384     break;
5385   }
5386   }
5387 
5388   I.eraseFromParent();
5389   return true;
5390 }
5391 
5392 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
5393                                                  MachineRegisterInfo &MRI) {
5394   unsigned IntrinID = I.getIntrinsicID();
5395 
5396   switch (IntrinID) {
5397   default:
5398     break;
5399   case Intrinsic::aarch64_crypto_sha1h: {
5400     Register DstReg = I.getOperand(0).getReg();
5401     Register SrcReg = I.getOperand(2).getReg();
5402 
5403     // FIXME: Should this be an assert?
5404     if (MRI.getType(DstReg).getSizeInBits() != 32 ||
5405         MRI.getType(SrcReg).getSizeInBits() != 32)
5406       return false;
5407 
5408     // The operation has to happen on FPRs. Set up some new FPR registers for
5409     // the source and destination if they are on GPRs.
5410     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
5411       SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5412       MIB.buildCopy({SrcReg}, {I.getOperand(2)});
5413 
5414       // Make sure the copy ends up getting constrained properly.
5415       RBI.constrainGenericRegister(I.getOperand(2).getReg(),
5416                                    AArch64::GPR32RegClass, MRI);
5417     }
5418 
5419     if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
5420       DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
5421 
5422     // Actually insert the instruction.
5423     auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
5424     constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
5425 
5426     // Did we create a new register for the destination?
5427     if (DstReg != I.getOperand(0).getReg()) {
5428       // Yep. Copy the result of the instruction back into the original
5429       // destination.
5430       MIB.buildCopy({I.getOperand(0)}, {DstReg});
5431       RBI.constrainGenericRegister(I.getOperand(0).getReg(),
5432                                    AArch64::GPR32RegClass, MRI);
5433     }
5434 
5435     I.eraseFromParent();
5436     return true;
5437   }
5438   case Intrinsic::ptrauth_sign: {
5439     Register DstReg = I.getOperand(0).getReg();
5440     Register ValReg = I.getOperand(2).getReg();
5441     uint64_t Key = I.getOperand(3).getImm();
5442     Register DiscReg = I.getOperand(4).getReg();
5443     auto DiscVal = getIConstantVRegVal(DiscReg, MRI);
5444     bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue();
5445 
5446     if (Key > 3)
5447       return false;
5448 
5449     unsigned Opcodes[][4] = {
5450         {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB},
5451         {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}};
5452     unsigned Opcode = Opcodes[IsDiscZero][Key];
5453 
5454     auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg});
5455 
5456     if (!IsDiscZero) {
5457       PAC.addUse(DiscReg);
5458       RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI);
5459     }
5460 
5461     RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5462     I.eraseFromParent();
5463     return true;
5464   }
5465   case Intrinsic::frameaddress:
5466   case Intrinsic::returnaddress: {
5467     MachineFunction &MF = *I.getParent()->getParent();
5468     MachineFrameInfo &MFI = MF.getFrameInfo();
5469 
5470     unsigned Depth = I.getOperand(2).getImm();
5471     Register DstReg = I.getOperand(0).getReg();
5472     RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
5473 
5474     if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
5475       if (!MFReturnAddr) {
5476         // Insert the copy from LR/X30 into the entry block, before it can be
5477         // clobbered by anything.
5478         MFI.setReturnAddressIsTaken(true);
5479         MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR,
5480                                                 AArch64::GPR64RegClass);
5481       }
5482 
5483       if (STI.hasPAuth()) {
5484         MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr});
5485       } else {
5486         MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr});
5487         MIB.buildInstr(AArch64::XPACLRI);
5488         MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5489       }
5490 
5491       I.eraseFromParent();
5492       return true;
5493     }
5494 
5495     MFI.setFrameAddressIsTaken(true);
5496     Register FrameAddr(AArch64::FP);
5497     while (Depth--) {
5498       Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
5499       auto Ldr =
5500           MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0);
5501       constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
5502       FrameAddr = NextFrame;
5503     }
5504 
5505     if (IntrinID == Intrinsic::frameaddress)
5506       MIB.buildCopy({DstReg}, {FrameAddr});
5507     else {
5508       MFI.setReturnAddressIsTaken(true);
5509 
5510       if (STI.hasPAuth()) {
5511         Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
5512         MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1);
5513         MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg});
5514       } else {
5515         MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr})
5516             .addImm(1);
5517         MIB.buildInstr(AArch64::XPACLRI);
5518         MIB.buildCopy({DstReg}, {Register(AArch64::LR)});
5519       }
5520     }
5521 
5522     I.eraseFromParent();
5523     return true;
5524   }
5525   case Intrinsic::swift_async_context_addr:
5526     auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()},
5527                               {Register(AArch64::FP)})
5528                    .addImm(8)
5529                    .addImm(0);
5530     constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI);
5531 
5532     MF->getFrameInfo().setFrameAddressIsTaken(true);
5533     MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5534     I.eraseFromParent();
5535     return true;
5536   }
5537   return false;
5538 }
5539 
5540 InstructionSelector::ComplexRendererFns
5541 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
5542   auto MaybeImmed = getImmedFromMO(Root);
5543   if (MaybeImmed == None || *MaybeImmed > 31)
5544     return None;
5545   uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
5546   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5547 }
5548 
5549 InstructionSelector::ComplexRendererFns
5550 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
5551   auto MaybeImmed = getImmedFromMO(Root);
5552   if (MaybeImmed == None || *MaybeImmed > 31)
5553     return None;
5554   uint64_t Enc = 31 - *MaybeImmed;
5555   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5556 }
5557 
5558 InstructionSelector::ComplexRendererFns
5559 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
5560   auto MaybeImmed = getImmedFromMO(Root);
5561   if (MaybeImmed == None || *MaybeImmed > 63)
5562     return None;
5563   uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
5564   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5565 }
5566 
5567 InstructionSelector::ComplexRendererFns
5568 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
5569   auto MaybeImmed = getImmedFromMO(Root);
5570   if (MaybeImmed == None || *MaybeImmed > 63)
5571     return None;
5572   uint64_t Enc = 63 - *MaybeImmed;
5573   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
5574 }
5575 
5576 /// Helper to select an immediate value that can be represented as a 12-bit
5577 /// value shifted left by either 0 or 12. If it is possible to do so, return
5578 /// the immediate and shift value. If not, return None.
5579 ///
5580 /// Used by selectArithImmed and selectNegArithImmed.
5581 InstructionSelector::ComplexRendererFns
5582 AArch64InstructionSelector::select12BitValueWithLeftShift(
5583     uint64_t Immed) const {
5584   unsigned ShiftAmt;
5585   if (Immed >> 12 == 0) {
5586     ShiftAmt = 0;
5587   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
5588     ShiftAmt = 12;
5589     Immed = Immed >> 12;
5590   } else
5591     return None;
5592 
5593   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
5594   return {{
5595       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
5596       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
5597   }};
5598 }
5599 
5600 /// SelectArithImmed - Select an immediate value that can be represented as
5601 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
5602 /// Val set to the 12-bit value and Shift set to the shifter operand.
5603 InstructionSelector::ComplexRendererFns
5604 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
5605   // This function is called from the addsub_shifted_imm ComplexPattern,
5606   // which lists [imm] as the list of opcode it's interested in, however
5607   // we still need to check whether the operand is actually an immediate
5608   // here because the ComplexPattern opcode list is only used in
5609   // root-level opcode matching.
5610   auto MaybeImmed = getImmedFromMO(Root);
5611   if (MaybeImmed == None)
5612     return None;
5613   return select12BitValueWithLeftShift(*MaybeImmed);
5614 }
5615 
5616 /// SelectNegArithImmed - As above, but negates the value before trying to
5617 /// select it.
5618 InstructionSelector::ComplexRendererFns
5619 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
5620   // We need a register here, because we need to know if we have a 64 or 32
5621   // bit immediate.
5622   if (!Root.isReg())
5623     return None;
5624   auto MaybeImmed = getImmedFromMO(Root);
5625   if (MaybeImmed == None)
5626     return None;
5627   uint64_t Immed = *MaybeImmed;
5628 
5629   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
5630   // have the opposite effect on the C flag, so this pattern mustn't match under
5631   // those circumstances.
5632   if (Immed == 0)
5633     return None;
5634 
5635   // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
5636   // the root.
5637   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5638   if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
5639     Immed = ~((uint32_t)Immed) + 1;
5640   else
5641     Immed = ~Immed + 1ULL;
5642 
5643   if (Immed & 0xFFFFFFFFFF000000ULL)
5644     return None;
5645 
5646   Immed &= 0xFFFFFFULL;
5647   return select12BitValueWithLeftShift(Immed);
5648 }
5649 
5650 /// Return true if it is worth folding MI into an extended register. That is,
5651 /// if it's safe to pull it into the addressing mode of a load or store as a
5652 /// shift.
5653 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
5654     MachineInstr &MI, const MachineRegisterInfo &MRI) const {
5655   // Always fold if there is one use, or if we're optimizing for size.
5656   Register DefReg = MI.getOperand(0).getReg();
5657   if (MRI.hasOneNonDBGUse(DefReg) ||
5658       MI.getParent()->getParent()->getFunction().hasOptSize())
5659     return true;
5660 
5661   // It's better to avoid folding and recomputing shifts when we don't have a
5662   // fastpath.
5663   if (!STI.hasLSLFast())
5664     return false;
5665 
5666   // We have a fastpath, so folding a shift in and potentially computing it
5667   // many times may be beneficial. Check if this is only used in memory ops.
5668   // If it is, then we should fold.
5669   return all_of(MRI.use_nodbg_instructions(DefReg),
5670                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
5671 }
5672 
5673 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
5674   switch (Type) {
5675   case AArch64_AM::SXTB:
5676   case AArch64_AM::SXTH:
5677   case AArch64_AM::SXTW:
5678     return true;
5679   default:
5680     return false;
5681   }
5682 }
5683 
5684 InstructionSelector::ComplexRendererFns
5685 AArch64InstructionSelector::selectExtendedSHL(
5686     MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
5687     unsigned SizeInBytes, bool WantsExt) const {
5688   assert(Base.isReg() && "Expected base to be a register operand");
5689   assert(Offset.isReg() && "Expected offset to be a register operand");
5690 
5691   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5692   MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
5693   if (!OffsetInst)
5694     return None;
5695 
5696   unsigned OffsetOpc = OffsetInst->getOpcode();
5697   bool LookedThroughZExt = false;
5698   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
5699     // Try to look through a ZEXT.
5700     if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
5701       return None;
5702 
5703     OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
5704     OffsetOpc = OffsetInst->getOpcode();
5705     LookedThroughZExt = true;
5706 
5707     if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
5708       return None;
5709   }
5710   // Make sure that the memory op is a valid size.
5711   int64_t LegalShiftVal = Log2_32(SizeInBytes);
5712   if (LegalShiftVal == 0)
5713     return None;
5714   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5715     return None;
5716 
5717   // Now, try to find the specific G_CONSTANT. Start by assuming that the
5718   // register we will offset is the LHS, and the register containing the
5719   // constant is the RHS.
5720   Register OffsetReg = OffsetInst->getOperand(1).getReg();
5721   Register ConstantReg = OffsetInst->getOperand(2).getReg();
5722   auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
5723   if (!ValAndVReg) {
5724     // We didn't get a constant on the RHS. If the opcode is a shift, then
5725     // we're done.
5726     if (OffsetOpc == TargetOpcode::G_SHL)
5727       return None;
5728 
5729     // If we have a G_MUL, we can use either register. Try looking at the RHS.
5730     std::swap(OffsetReg, ConstantReg);
5731     ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI);
5732     if (!ValAndVReg)
5733       return None;
5734   }
5735 
5736   // The value must fit into 3 bits, and must be positive. Make sure that is
5737   // true.
5738   int64_t ImmVal = ValAndVReg->Value.getSExtValue();
5739 
5740   // Since we're going to pull this into a shift, the constant value must be
5741   // a power of 2. If we got a multiply, then we need to check this.
5742   if (OffsetOpc == TargetOpcode::G_MUL) {
5743     if (!isPowerOf2_32(ImmVal))
5744       return None;
5745 
5746     // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
5747     ImmVal = Log2_32(ImmVal);
5748   }
5749 
5750   if ((ImmVal & 0x7) != ImmVal)
5751     return None;
5752 
5753   // We are only allowed to shift by LegalShiftVal. This shift value is built
5754   // into the instruction, so we can't just use whatever we want.
5755   if (ImmVal != LegalShiftVal)
5756     return None;
5757 
5758   unsigned SignExtend = 0;
5759   if (WantsExt) {
5760     // Check if the offset is defined by an extend, unless we looked through a
5761     // G_ZEXT earlier.
5762     if (!LookedThroughZExt) {
5763       MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
5764       auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
5765       if (Ext == AArch64_AM::InvalidShiftExtend)
5766         return None;
5767 
5768       SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
5769       // We only support SXTW for signed extension here.
5770       if (SignExtend && Ext != AArch64_AM::SXTW)
5771         return None;
5772       OffsetReg = ExtInst->getOperand(1).getReg();
5773     }
5774 
5775     // Need a 32-bit wide register here.
5776     MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
5777     OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
5778   }
5779 
5780   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
5781   // offset. Signify that we are shifting by setting the shift flag to 1.
5782   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
5783            [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
5784            [=](MachineInstrBuilder &MIB) {
5785              // Need to add both immediates here to make sure that they are both
5786              // added to the instruction.
5787              MIB.addImm(SignExtend);
5788              MIB.addImm(1);
5789            }}};
5790 }
5791 
5792 /// This is used for computing addresses like this:
5793 ///
5794 /// ldr x1, [x2, x3, lsl #3]
5795 ///
5796 /// Where x2 is the base register, and x3 is an offset register. The shift-left
5797 /// is a constant value specific to this load instruction. That is, we'll never
5798 /// see anything other than a 3 here (which corresponds to the size of the
5799 /// element being loaded.)
5800 InstructionSelector::ComplexRendererFns
5801 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5802     MachineOperand &Root, unsigned SizeInBytes) const {
5803   if (!Root.isReg())
5804     return None;
5805   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5806 
5807   // We want to find something like this:
5808   //
5809   // val = G_CONSTANT LegalShiftVal
5810   // shift = G_SHL off_reg val
5811   // ptr = G_PTR_ADD base_reg shift
5812   // x = G_LOAD ptr
5813   //
5814   // And fold it into this addressing mode:
5815   //
5816   // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5817 
5818   // Check if we can find the G_PTR_ADD.
5819   MachineInstr *PtrAdd =
5820       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5821   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5822     return None;
5823 
5824   // Now, try to match an opcode which will match our specific offset.
5825   // We want a G_SHL or a G_MUL.
5826   MachineInstr *OffsetInst =
5827       getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5828   return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5829                            OffsetInst->getOperand(0), SizeInBytes,
5830                            /*WantsExt=*/false);
5831 }
5832 
5833 /// This is used for computing addresses like this:
5834 ///
5835 /// ldr x1, [x2, x3]
5836 ///
5837 /// Where x2 is the base register, and x3 is an offset register.
5838 ///
5839 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5840 /// this will do so. Otherwise, it will return None.
5841 InstructionSelector::ComplexRendererFns
5842 AArch64InstructionSelector::selectAddrModeRegisterOffset(
5843     MachineOperand &Root) const {
5844   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5845 
5846   // We need a GEP.
5847   MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5848   if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5849     return None;
5850 
5851   // If this is used more than once, let's not bother folding.
5852   // TODO: Check if they are memory ops. If they are, then we can still fold
5853   // without having to recompute anything.
5854   if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5855     return None;
5856 
5857   // Base is the GEP's LHS, offset is its RHS.
5858   return {{[=](MachineInstrBuilder &MIB) {
5859              MIB.addUse(Gep->getOperand(1).getReg());
5860            },
5861            [=](MachineInstrBuilder &MIB) {
5862              MIB.addUse(Gep->getOperand(2).getReg());
5863            },
5864            [=](MachineInstrBuilder &MIB) {
5865              // Need to add both immediates here to make sure that they are both
5866              // added to the instruction.
5867              MIB.addImm(0);
5868              MIB.addImm(0);
5869            }}};
5870 }
5871 
5872 /// This is intended to be equivalent to selectAddrModeXRO in
5873 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5874 InstructionSelector::ComplexRendererFns
5875 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5876                                               unsigned SizeInBytes) const {
5877   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5878   if (!Root.isReg())
5879     return None;
5880   MachineInstr *PtrAdd =
5881       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5882   if (!PtrAdd)
5883     return None;
5884 
5885   // Check for an immediates which cannot be encoded in the [base + imm]
5886   // addressing mode, and can't be encoded in an add/sub. If this happens, we'll
5887   // end up with code like:
5888   //
5889   // mov x0, wide
5890   // add x1 base, x0
5891   // ldr x2, [x1, x0]
5892   //
5893   // In this situation, we can use the [base, xreg] addressing mode to save an
5894   // add/sub:
5895   //
5896   // mov x0, wide
5897   // ldr x2, [base, x0]
5898   auto ValAndVReg =
5899       getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI);
5900   if (ValAndVReg) {
5901     unsigned Scale = Log2_32(SizeInBytes);
5902     int64_t ImmOff = ValAndVReg->Value.getSExtValue();
5903 
5904     // Skip immediates that can be selected in the load/store addresing
5905     // mode.
5906     if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 &&
5907         ImmOff < (0x1000 << Scale))
5908       return None;
5909 
5910     // Helper lambda to decide whether or not it is preferable to emit an add.
5911     auto isPreferredADD = [](int64_t ImmOff) {
5912       // Constants in [0x0, 0xfff] can be encoded in an add.
5913       if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
5914         return true;
5915 
5916       // Can it be encoded in an add lsl #12?
5917       if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL)
5918         return false;
5919 
5920       // It can be encoded in an add lsl #12, but we may not want to. If it is
5921       // possible to select this as a single movz, then prefer that. A single
5922       // movz is faster than an add with a shift.
5923       return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
5924              (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
5925     };
5926 
5927     // If the immediate can be encoded in a single add/sub, then bail out.
5928     if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
5929       return None;
5930   }
5931 
5932   // Try to fold shifts into the addressing mode.
5933   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5934   if (AddrModeFns)
5935     return AddrModeFns;
5936 
5937   // If that doesn't work, see if it's possible to fold in registers from
5938   // a GEP.
5939   return selectAddrModeRegisterOffset(Root);
5940 }
5941 
5942 /// This is used for computing addresses like this:
5943 ///
5944 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5945 ///
5946 /// Where we have a 64-bit base register, a 32-bit offset register, and an
5947 /// extend (which may or may not be signed).
5948 InstructionSelector::ComplexRendererFns
5949 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5950                                               unsigned SizeInBytes) const {
5951   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5952 
5953   MachineInstr *PtrAdd =
5954       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5955   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5956     return None;
5957 
5958   MachineOperand &LHS = PtrAdd->getOperand(1);
5959   MachineOperand &RHS = PtrAdd->getOperand(2);
5960   MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5961 
5962   // The first case is the same as selectAddrModeXRO, except we need an extend.
5963   // In this case, we try to find a shift and extend, and fold them into the
5964   // addressing mode.
5965   //
5966   // E.g.
5967   //
5968   // off_reg = G_Z/S/ANYEXT ext_reg
5969   // val = G_CONSTANT LegalShiftVal
5970   // shift = G_SHL off_reg val
5971   // ptr = G_PTR_ADD base_reg shift
5972   // x = G_LOAD ptr
5973   //
5974   // In this case we can get a load like this:
5975   //
5976   // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5977   auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5978                                        SizeInBytes, /*WantsExt=*/true);
5979   if (ExtendedShl)
5980     return ExtendedShl;
5981 
5982   // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5983   //
5984   // e.g.
5985   // ldr something, [base_reg, ext_reg, sxtw]
5986   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5987     return None;
5988 
5989   // Check if this is an extend. We'll get an extend type if it is.
5990   AArch64_AM::ShiftExtendType Ext =
5991       getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5992   if (Ext == AArch64_AM::InvalidShiftExtend)
5993     return None;
5994 
5995   // Need a 32-bit wide register.
5996   MachineIRBuilder MIB(*PtrAdd);
5997   Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5998                                        AArch64::GPR32RegClass, MIB);
5999   unsigned SignExtend = Ext == AArch64_AM::SXTW;
6000 
6001   // Base is LHS, offset is ExtReg.
6002   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
6003            [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
6004            [=](MachineInstrBuilder &MIB) {
6005              MIB.addImm(SignExtend);
6006              MIB.addImm(0);
6007            }}};
6008 }
6009 
6010 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
6011 /// should only match when there is an offset that is not valid for a scaled
6012 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
6013 /// memory reference, which is needed here to know what is valid for a scaled
6014 /// immediate.
6015 InstructionSelector::ComplexRendererFns
6016 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
6017                                                    unsigned Size) const {
6018   MachineRegisterInfo &MRI =
6019       Root.getParent()->getParent()->getParent()->getRegInfo();
6020 
6021   if (!Root.isReg())
6022     return None;
6023 
6024   if (!isBaseWithConstantOffset(Root, MRI))
6025     return None;
6026 
6027   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
6028   if (!RootDef)
6029     return None;
6030 
6031   MachineOperand &OffImm = RootDef->getOperand(2);
6032   if (!OffImm.isReg())
6033     return None;
6034   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
6035   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
6036     return None;
6037   int64_t RHSC;
6038   MachineOperand &RHSOp1 = RHS->getOperand(1);
6039   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
6040     return None;
6041   RHSC = RHSOp1.getCImm()->getSExtValue();
6042 
6043   // If the offset is valid as a scaled immediate, don't match here.
6044   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
6045     return None;
6046   if (RHSC >= -256 && RHSC < 256) {
6047     MachineOperand &Base = RootDef->getOperand(1);
6048     return {{
6049         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
6050         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
6051     }};
6052   }
6053   return None;
6054 }
6055 
6056 InstructionSelector::ComplexRendererFns
6057 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
6058                                                  unsigned Size,
6059                                                  MachineRegisterInfo &MRI) const {
6060   if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
6061     return None;
6062   MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
6063   if (Adrp.getOpcode() != AArch64::ADRP)
6064     return None;
6065 
6066   // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
6067   auto Offset = Adrp.getOperand(1).getOffset();
6068   if (Offset % Size != 0)
6069     return None;
6070 
6071   auto GV = Adrp.getOperand(1).getGlobal();
6072   if (GV->isThreadLocal())
6073     return None;
6074 
6075   auto &MF = *RootDef.getParent()->getParent();
6076   if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
6077     return None;
6078 
6079   unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
6080   MachineIRBuilder MIRBuilder(RootDef);
6081   Register AdrpReg = Adrp.getOperand(0).getReg();
6082   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
6083            [=](MachineInstrBuilder &MIB) {
6084              MIB.addGlobalAddress(GV, Offset,
6085                                   OpFlags | AArch64II::MO_PAGEOFF |
6086                                       AArch64II::MO_NC);
6087            }}};
6088 }
6089 
6090 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
6091 /// "Size" argument is the size in bytes of the memory reference, which
6092 /// determines the scale.
6093 InstructionSelector::ComplexRendererFns
6094 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
6095                                                   unsigned Size) const {
6096   MachineFunction &MF = *Root.getParent()->getParent()->getParent();
6097   MachineRegisterInfo &MRI = MF.getRegInfo();
6098 
6099   if (!Root.isReg())
6100     return None;
6101 
6102   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
6103   if (!RootDef)
6104     return None;
6105 
6106   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
6107     return {{
6108         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
6109         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
6110     }};
6111   }
6112 
6113   CodeModel::Model CM = MF.getTarget().getCodeModel();
6114   // Check if we can fold in the ADD of small code model ADRP + ADD address.
6115   if (CM == CodeModel::Small) {
6116     auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
6117     if (OpFns)
6118       return OpFns;
6119   }
6120 
6121   if (isBaseWithConstantOffset(Root, MRI)) {
6122     MachineOperand &LHS = RootDef->getOperand(1);
6123     MachineOperand &RHS = RootDef->getOperand(2);
6124     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
6125     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
6126     if (LHSDef && RHSDef) {
6127       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
6128       unsigned Scale = Log2_32(Size);
6129       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
6130         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
6131           return {{
6132               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
6133               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
6134           }};
6135 
6136         return {{
6137             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
6138             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
6139         }};
6140       }
6141     }
6142   }
6143 
6144   // Before falling back to our general case, check if the unscaled
6145   // instructions can handle this. If so, that's preferable.
6146   if (selectAddrModeUnscaled(Root, Size).hasValue())
6147     return None;
6148 
6149   return {{
6150       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
6151       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
6152   }};
6153 }
6154 
6155 /// Given a shift instruction, return the correct shift type for that
6156 /// instruction.
6157 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
6158   switch (MI.getOpcode()) {
6159   default:
6160     return AArch64_AM::InvalidShiftExtend;
6161   case TargetOpcode::G_SHL:
6162     return AArch64_AM::LSL;
6163   case TargetOpcode::G_LSHR:
6164     return AArch64_AM::LSR;
6165   case TargetOpcode::G_ASHR:
6166     return AArch64_AM::ASR;
6167   case TargetOpcode::G_ROTR:
6168     return AArch64_AM::ROR;
6169   }
6170 }
6171 
6172 /// Select a "shifted register" operand. If the value is not shifted, set the
6173 /// shift operand to a default value of "lsl 0".
6174 InstructionSelector::ComplexRendererFns
6175 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root,
6176                                                   bool AllowROR) const {
6177   if (!Root.isReg())
6178     return None;
6179   MachineRegisterInfo &MRI =
6180       Root.getParent()->getParent()->getParent()->getRegInfo();
6181 
6182   // Check if the operand is defined by an instruction which corresponds to
6183   // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
6184   MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
6185   if (!ShiftInst)
6186     return None;
6187   AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
6188   if (ShType == AArch64_AM::InvalidShiftExtend)
6189     return None;
6190   if (ShType == AArch64_AM::ROR && !AllowROR)
6191     return None;
6192   if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
6193     return None;
6194 
6195   // Need an immediate on the RHS.
6196   MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
6197   auto Immed = getImmedFromMO(ShiftRHS);
6198   if (!Immed)
6199     return None;
6200 
6201   // We have something that we can fold. Fold in the shift's LHS and RHS into
6202   // the instruction.
6203   MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
6204   Register ShiftReg = ShiftLHS.getReg();
6205 
6206   unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
6207   unsigned Val = *Immed & (NumBits - 1);
6208   unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
6209 
6210   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
6211            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
6212 }
6213 
6214 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
6215     MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
6216   unsigned Opc = MI.getOpcode();
6217 
6218   // Handle explicit extend instructions first.
6219   if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
6220     unsigned Size;
6221     if (Opc == TargetOpcode::G_SEXT)
6222       Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6223     else
6224       Size = MI.getOperand(2).getImm();
6225     assert(Size != 64 && "Extend from 64 bits?");
6226     switch (Size) {
6227     case 8:
6228       return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB;
6229     case 16:
6230       return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH;
6231     case 32:
6232       return AArch64_AM::SXTW;
6233     default:
6234       return AArch64_AM::InvalidShiftExtend;
6235     }
6236   }
6237 
6238   if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
6239     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
6240     assert(Size != 64 && "Extend from 64 bits?");
6241     switch (Size) {
6242     case 8:
6243       return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB;
6244     case 16:
6245       return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH;
6246     case 32:
6247       return AArch64_AM::UXTW;
6248     default:
6249       return AArch64_AM::InvalidShiftExtend;
6250     }
6251   }
6252 
6253   // Don't have an explicit extend. Try to handle a G_AND with a constant mask
6254   // on the RHS.
6255   if (Opc != TargetOpcode::G_AND)
6256     return AArch64_AM::InvalidShiftExtend;
6257 
6258   Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
6259   if (!MaybeAndMask)
6260     return AArch64_AM::InvalidShiftExtend;
6261   uint64_t AndMask = *MaybeAndMask;
6262   switch (AndMask) {
6263   default:
6264     return AArch64_AM::InvalidShiftExtend;
6265   case 0xFF:
6266     return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
6267   case 0xFFFF:
6268     return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
6269   case 0xFFFFFFFF:
6270     return AArch64_AM::UXTW;
6271   }
6272 }
6273 
6274 Register AArch64InstructionSelector::moveScalarRegClass(
6275     Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
6276   MachineRegisterInfo &MRI = *MIB.getMRI();
6277   auto Ty = MRI.getType(Reg);
6278   assert(!Ty.isVector() && "Expected scalars only!");
6279   if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
6280     return Reg;
6281 
6282   // Create a copy and immediately select it.
6283   // FIXME: We should have an emitCopy function?
6284   auto Copy = MIB.buildCopy({&RC}, {Reg});
6285   selectCopy(*Copy, TII, MRI, TRI, RBI);
6286   return Copy.getReg(0);
6287 }
6288 
6289 /// Select an "extended register" operand. This operand folds in an extend
6290 /// followed by an optional left shift.
6291 InstructionSelector::ComplexRendererFns
6292 AArch64InstructionSelector::selectArithExtendedRegister(
6293     MachineOperand &Root) const {
6294   if (!Root.isReg())
6295     return None;
6296   MachineRegisterInfo &MRI =
6297       Root.getParent()->getParent()->getParent()->getRegInfo();
6298 
6299   uint64_t ShiftVal = 0;
6300   Register ExtReg;
6301   AArch64_AM::ShiftExtendType Ext;
6302   MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
6303   if (!RootDef)
6304     return None;
6305 
6306   if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
6307     return None;
6308 
6309   // Check if we can fold a shift and an extend.
6310   if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
6311     // Look for a constant on the RHS of the shift.
6312     MachineOperand &RHS = RootDef->getOperand(2);
6313     Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
6314     if (!MaybeShiftVal)
6315       return None;
6316     ShiftVal = *MaybeShiftVal;
6317     if (ShiftVal > 4)
6318       return None;
6319     // Look for a valid extend instruction on the LHS of the shift.
6320     MachineOperand &LHS = RootDef->getOperand(1);
6321     MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
6322     if (!ExtDef)
6323       return None;
6324     Ext = getExtendTypeForInst(*ExtDef, MRI);
6325     if (Ext == AArch64_AM::InvalidShiftExtend)
6326       return None;
6327     ExtReg = ExtDef->getOperand(1).getReg();
6328   } else {
6329     // Didn't get a shift. Try just folding an extend.
6330     Ext = getExtendTypeForInst(*RootDef, MRI);
6331     if (Ext == AArch64_AM::InvalidShiftExtend)
6332       return None;
6333     ExtReg = RootDef->getOperand(1).getReg();
6334 
6335     // If we have a 32 bit instruction which zeroes out the high half of a
6336     // register, we get an implicit zero extend for free. Check if we have one.
6337     // FIXME: We actually emit the extend right now even though we don't have
6338     // to.
6339     if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
6340       MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
6341       if (ExtInst && isDef32(*ExtInst))
6342         return None;
6343     }
6344   }
6345 
6346   // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
6347   // copy.
6348   MachineIRBuilder MIB(*RootDef);
6349   ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
6350 
6351   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
6352            [=](MachineInstrBuilder &MIB) {
6353              MIB.addImm(getArithExtendImm(Ext, ShiftVal));
6354            }}};
6355 }
6356 
6357 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
6358                                                 const MachineInstr &MI,
6359                                                 int OpIdx) const {
6360   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
6361   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6362          "Expected G_CONSTANT");
6363   Optional<int64_t> CstVal =
6364       getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI);
6365   assert(CstVal && "Expected constant value");
6366   MIB.addImm(CstVal.getValue());
6367 }
6368 
6369 void AArch64InstructionSelector::renderLogicalImm32(
6370   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
6371   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6372          "Expected G_CONSTANT");
6373   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
6374   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
6375   MIB.addImm(Enc);
6376 }
6377 
6378 void AArch64InstructionSelector::renderLogicalImm64(
6379   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
6380   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
6381          "Expected G_CONSTANT");
6382   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
6383   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
6384   MIB.addImm(Enc);
6385 }
6386 
6387 void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB,
6388                                                const MachineInstr &MI,
6389                                                int OpIdx) const {
6390   assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
6391          "Expected G_FCONSTANT");
6392   MIB.addImm(
6393       AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6394 }
6395 
6396 void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB,
6397                                                const MachineInstr &MI,
6398                                                int OpIdx) const {
6399   assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
6400          "Expected G_FCONSTANT");
6401   MIB.addImm(
6402       AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6403 }
6404 
6405 void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB,
6406                                                const MachineInstr &MI,
6407                                                int OpIdx) const {
6408   assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 &&
6409          "Expected G_FCONSTANT");
6410   MIB.addImm(
6411       AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF()));
6412 }
6413 
6414 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
6415     const MachineInstr &MI, unsigned NumBytes) const {
6416   if (!MI.mayLoadOrStore())
6417     return false;
6418   assert(MI.hasOneMemOperand() &&
6419          "Expected load/store to have only one mem op!");
6420   return (*MI.memoperands_begin())->getSize() == NumBytes;
6421 }
6422 
6423 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
6424   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
6425   if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
6426     return false;
6427 
6428   // Only return true if we know the operation will zero-out the high half of
6429   // the 64-bit register. Truncates can be subregister copies, which don't
6430   // zero out the high bits. Copies and other copy-like instructions can be
6431   // fed by truncates, or could be lowered as subregister copies.
6432   switch (MI.getOpcode()) {
6433   default:
6434     return true;
6435   case TargetOpcode::COPY:
6436   case TargetOpcode::G_BITCAST:
6437   case TargetOpcode::G_TRUNC:
6438   case TargetOpcode::G_PHI:
6439     return false;
6440   }
6441 }
6442 
6443 
6444 // Perform fixups on the given PHI instruction's operands to force them all
6445 // to be the same as the destination regbank.
6446 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
6447                             const AArch64RegisterBankInfo &RBI) {
6448   assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
6449   Register DstReg = MI.getOperand(0).getReg();
6450   const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
6451   assert(DstRB && "Expected PHI dst to have regbank assigned");
6452   MachineIRBuilder MIB(MI);
6453 
6454   // Go through each operand and ensure it has the same regbank.
6455   for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
6456     MachineOperand &MO = MI.getOperand(OpIdx);
6457     if (!MO.isReg())
6458       continue;
6459     Register OpReg = MO.getReg();
6460     const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
6461     if (RB != DstRB) {
6462       // Insert a cross-bank copy.
6463       auto *OpDef = MRI.getVRegDef(OpReg);
6464       const LLT &Ty = MRI.getType(OpReg);
6465       MachineBasicBlock &OpDefBB = *OpDef->getParent();
6466 
6467       // Any instruction we insert must appear after all PHIs in the block
6468       // for the block to be valid MIR.
6469       MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator());
6470       if (InsertPt != OpDefBB.end() && InsertPt->isPHI())
6471         InsertPt = OpDefBB.getFirstNonPHI();
6472       MIB.setInsertPt(*OpDef->getParent(), InsertPt);
6473       auto Copy = MIB.buildCopy(Ty, OpReg);
6474       MRI.setRegBank(Copy.getReg(0), *DstRB);
6475       MO.setReg(Copy.getReg(0));
6476     }
6477   }
6478 }
6479 
6480 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
6481   // We're looking for PHIs, build a list so we don't invalidate iterators.
6482   MachineRegisterInfo &MRI = MF.getRegInfo();
6483   SmallVector<MachineInstr *, 32> Phis;
6484   for (auto &BB : MF) {
6485     for (auto &MI : BB) {
6486       if (MI.getOpcode() == TargetOpcode::G_PHI)
6487         Phis.emplace_back(&MI);
6488     }
6489   }
6490 
6491   for (auto *MI : Phis) {
6492     // We need to do some work here if the operand types are < 16 bit and they
6493     // are split across fpr/gpr banks. Since all types <32b on gpr
6494     // end up being assigned gpr32 regclasses, we can end up with PHIs here
6495     // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
6496     // be selecting heterogenous regbanks for operands if possible, but we
6497     // still need to be able to deal with it here.
6498     //
6499     // To fix this, if we have a gpr-bank operand < 32b in size and at least
6500     // one other operand is on the fpr bank, then we add cross-bank copies
6501     // to homogenize the operand banks. For simplicity the bank that we choose
6502     // to settle on is whatever bank the def operand has. For example:
6503     //
6504     // %endbb:
6505     //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
6506     //  =>
6507     // %bb2:
6508     //   ...
6509     //   %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
6510     //   ...
6511     // %endbb:
6512     //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
6513     bool HasGPROp = false, HasFPROp = false;
6514     for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
6515       const auto &MO = MI->getOperand(OpIdx);
6516       if (!MO.isReg())
6517         continue;
6518       const LLT &Ty = MRI.getType(MO.getReg());
6519       if (!Ty.isValid() || !Ty.isScalar())
6520         break;
6521       if (Ty.getSizeInBits() >= 32)
6522         break;
6523       const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
6524       // If for some reason we don't have a regbank yet. Don't try anything.
6525       if (!RB)
6526         break;
6527 
6528       if (RB->getID() == AArch64::GPRRegBankID)
6529         HasGPROp = true;
6530       else
6531         HasFPROp = true;
6532     }
6533     // We have heterogenous regbanks, need to fixup.
6534     if (HasGPROp && HasFPROp)
6535       fixupPHIOpBanks(*MI, MRI, RBI);
6536   }
6537 }
6538 
6539 namespace llvm {
6540 InstructionSelector *
6541 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
6542                                  AArch64Subtarget &Subtarget,
6543                                  AArch64RegisterBankInfo &RBI) {
6544   return new AArch64InstructionSelector(TM, Subtarget, RBI);
6545 }
6546 }
6547