xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp (revision dd41de95a84d979615a2ef11df6850622bf6184e)
1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// AArch64.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "AArch64InstrInfo.h"
15 #include "AArch64MachineFunctionInfo.h"
16 #include "AArch64RegisterBankInfo.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "AArch64TargetMachine.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/GlobalISel/Utils.h"
27 #include "llvm/CodeGen/MachineBasicBlock.h"
28 #include "llvm/CodeGen/MachineConstantPool.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineInstr.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/TargetOpcodes.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/Type.h"
37 #include "llvm/IR/IntrinsicsAArch64.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/raw_ostream.h"
40 
41 #define DEBUG_TYPE "aarch64-isel"
42 
43 using namespace llvm;
44 
45 namespace {
46 
47 #define GET_GLOBALISEL_PREDICATE_BITSET
48 #include "AArch64GenGlobalISel.inc"
49 #undef GET_GLOBALISEL_PREDICATE_BITSET
50 
51 class AArch64InstructionSelector : public InstructionSelector {
52 public:
53   AArch64InstructionSelector(const AArch64TargetMachine &TM,
54                              const AArch64Subtarget &STI,
55                              const AArch64RegisterBankInfo &RBI);
56 
57   bool select(MachineInstr &I) override;
58   static const char *getName() { return DEBUG_TYPE; }
59 
60   void setupMF(MachineFunction &MF, GISelKnownBits &KB,
61                CodeGenCoverage &CoverageInfo) override {
62     InstructionSelector::setupMF(MF, KB, CoverageInfo);
63 
64     // hasFnAttribute() is expensive to call on every BRCOND selection, so
65     // cache it here for each run of the selector.
66     ProduceNonFlagSettingCondBr =
67         !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
68     MFReturnAddr = Register();
69 
70     processPHIs(MF);
71   }
72 
73 private:
74   /// tblgen-erated 'select' implementation, used as the initial selector for
75   /// the patterns that don't require complex C++.
76   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
77 
78   // A lowering phase that runs before any selection attempts.
79   // Returns true if the instruction was modified.
80   bool preISelLower(MachineInstr &I);
81 
82   // An early selection function that runs before the selectImpl() call.
83   bool earlySelect(MachineInstr &I) const;
84 
85   // Do some preprocessing of G_PHIs before we begin selection.
86   void processPHIs(MachineFunction &MF);
87 
88   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
89 
90   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
91   bool contractCrossBankCopyIntoStore(MachineInstr &I,
92                                       MachineRegisterInfo &MRI);
93 
94   bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI);
95 
96   bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
97                           MachineRegisterInfo &MRI) const;
98   bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
99                            MachineRegisterInfo &MRI) const;
100 
101   bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
102                                   int64_t CmpConstant,
103                                   const CmpInst::Predicate &Pred,
104                                   MachineBasicBlock *DstMBB,
105                                   MachineIRBuilder &MIB) const;
106   bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
107                            MachineRegisterInfo &MRI) const;
108 
109   bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const;
110   bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
111 
112   // Helper to generate an equivalent of scalar_to_vector into a new register,
113   // returned via 'Dst'.
114   MachineInstr *emitScalarToVector(unsigned EltSize,
115                                    const TargetRegisterClass *DstRC,
116                                    Register Scalar,
117                                    MachineIRBuilder &MIRBuilder) const;
118 
119   /// Emit a lane insert into \p DstReg, or a new vector register if None is
120   /// provided.
121   ///
122   /// The lane inserted into is defined by \p LaneIdx. The vector source
123   /// register is given by \p SrcReg. The register containing the element is
124   /// given by \p EltReg.
125   MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg,
126                                Register EltReg, unsigned LaneIdx,
127                                const RegisterBank &RB,
128                                MachineIRBuilder &MIRBuilder) const;
129   bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
130   bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy,
131                               MachineRegisterInfo &MRI) const;
132   bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
133   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
134   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const;
135 
136   bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const;
137   bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const;
138   bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const;
139   bool selectSplitVectorUnmerge(MachineInstr &I,
140                                 MachineRegisterInfo &MRI) const;
141   bool selectIntrinsicWithSideEffects(MachineInstr &I,
142                                       MachineRegisterInfo &MRI) const;
143   bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI);
144   bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const;
145   bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const;
146   bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const;
147   bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const;
148   bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const;
149   bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const;
150 
151   unsigned emitConstantPoolEntry(const Constant *CPVal,
152                                  MachineFunction &MF) const;
153   MachineInstr *emitLoadFromConstantPool(const Constant *CPVal,
154                                          MachineIRBuilder &MIRBuilder) const;
155 
156   // Emit a vector concat operation.
157   MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1,
158                                  Register Op2,
159                                  MachineIRBuilder &MIRBuilder) const;
160 
161   // Emit an integer compare between LHS and RHS, which checks for Predicate.
162   //
163   // This returns the produced compare instruction, and the predicate which
164   // was ultimately used in the compare. The predicate may differ from what
165   // is passed in \p Predicate due to optimization.
166   std::pair<MachineInstr *, CmpInst::Predicate>
167   emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
168                      MachineOperand &Predicate,
169                      MachineIRBuilder &MIRBuilder) const;
170   MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS,
171                         MachineIRBuilder &MIRBuilder) const;
172   MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS,
173                         MachineIRBuilder &MIRBuilder) const;
174   MachineInstr *emitTST(const Register &LHS, const Register &RHS,
175                         MachineIRBuilder &MIRBuilder) const;
176   MachineInstr *emitExtractVectorElt(Optional<Register> DstReg,
177                                      const RegisterBank &DstRB, LLT ScalarTy,
178                                      Register VecReg, unsigned LaneIdx,
179                                      MachineIRBuilder &MIRBuilder) const;
180 
181   /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be
182   /// materialized using a FMOV instruction, then update MI and return it.
183   /// Otherwise, do nothing and return a nullptr.
184   MachineInstr *emitFMovForFConstant(MachineInstr &MI,
185                                      MachineRegisterInfo &MRI) const;
186 
187   /// Emit a CSet for a compare.
188   MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred,
189                                 MachineIRBuilder &MIRBuilder) const;
190 
191   /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg.
192   /// \p IsNegative is true if the test should be "not zero".
193   /// This will also optimize the test bit instruction when possible.
194   MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative,
195                             MachineBasicBlock *DstMBB,
196                             MachineIRBuilder &MIB) const;
197 
198   // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td.
199   // We use these manually instead of using the importer since it doesn't
200   // support SDNodeXForm.
201   ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const;
202   ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const;
203   ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const;
204   ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const;
205 
206   ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const;
207   ComplexRendererFns selectArithImmed(MachineOperand &Root) const;
208   ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const;
209 
210   ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root,
211                                             unsigned Size) const;
212 
213   ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const {
214     return selectAddrModeUnscaled(Root, 1);
215   }
216   ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const {
217     return selectAddrModeUnscaled(Root, 2);
218   }
219   ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const {
220     return selectAddrModeUnscaled(Root, 4);
221   }
222   ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const {
223     return selectAddrModeUnscaled(Root, 8);
224   }
225   ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const {
226     return selectAddrModeUnscaled(Root, 16);
227   }
228 
229   /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used
230   /// from complex pattern matchers like selectAddrModeIndexed().
231   ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size,
232                                           MachineRegisterInfo &MRI) const;
233 
234   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root,
235                                            unsigned Size) const;
236   template <int Width>
237   ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
238     return selectAddrModeIndexed(Root, Width / 8);
239   }
240 
241   bool isWorthFoldingIntoExtendedReg(MachineInstr &MI,
242                                      const MachineRegisterInfo &MRI) const;
243   ComplexRendererFns
244   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
245                                   unsigned SizeInBytes) const;
246 
247   /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
248   /// or not a shift + extend should be folded into an addressing mode. Returns
249   /// None when this is not profitable or possible.
250   ComplexRendererFns
251   selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
252                     MachineOperand &Offset, unsigned SizeInBytes,
253                     bool WantsExt) const;
254   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
255   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
256                                        unsigned SizeInBytes) const;
257   template <int Width>
258   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const {
259     return selectAddrModeXRO(Root, Width / 8);
260   }
261 
262   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
263                                        unsigned SizeInBytes) const;
264   template <int Width>
265   ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
266     return selectAddrModeWRO(Root, Width / 8);
267   }
268 
269   ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
270 
271   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
272     return selectShiftedRegister(Root);
273   }
274 
275   ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const {
276     // TODO: selectShiftedRegister should allow for rotates on logical shifts.
277     // For now, make them the same. The only difference between the two is that
278     // logical shifts are allowed to fold in rotates. Otherwise, these are
279     // functionally the same.
280     return selectShiftedRegister(Root);
281   }
282 
283   /// Given an extend instruction, determine the correct shift-extend type for
284   /// that instruction.
285   ///
286   /// If the instruction is going to be used in a load or store, pass
287   /// \p IsLoadStore = true.
288   AArch64_AM::ShiftExtendType
289   getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
290                        bool IsLoadStore = false) const;
291 
292   /// Move \p Reg to \p RC if \p Reg is not already on \p RC.
293   ///
294   /// \returns Either \p Reg if no change was necessary, or the new register
295   /// created by moving \p Reg.
296   ///
297   /// Note: This uses emitCopy right now.
298   Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC,
299                               MachineIRBuilder &MIB) const;
300 
301   ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
302 
303   void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
304                       int OpIdx = -1) const;
305   void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I,
306                           int OpIdx = -1) const;
307   void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I,
308                           int OpIdx = -1) const;
309 
310   // Materialize a GlobalValue or BlockAddress using a movz+movk sequence.
311   void materializeLargeCMVal(MachineInstr &I, const Value *V,
312                              unsigned OpFlags) const;
313 
314   // Optimization methods.
315   bool tryOptSelect(MachineInstr &MI) const;
316   MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS,
317                                       MachineOperand &Predicate,
318                                       MachineIRBuilder &MIRBuilder) const;
319   MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS,
320                                                MachineOperand &RHS,
321                                                CmpInst::Predicate &Predicate,
322                                                MachineIRBuilder &MIB) const;
323   MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS,
324                                           MachineOperand &RHS,
325                                           MachineIRBuilder &MIB) const;
326 
327   /// Return true if \p MI is a load or store of \p NumBytes bytes.
328   bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const;
329 
330   /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit
331   /// register zeroed out. In other words, the result of MI has been explicitly
332   /// zero extended.
333   bool isDef32(const MachineInstr &MI) const;
334 
335   const AArch64TargetMachine &TM;
336   const AArch64Subtarget &STI;
337   const AArch64InstrInfo &TII;
338   const AArch64RegisterInfo &TRI;
339   const AArch64RegisterBankInfo &RBI;
340 
341   bool ProduceNonFlagSettingCondBr = false;
342 
343   // Some cached values used during selection.
344   // We use LR as a live-in register, and we keep track of it here as it can be
345   // clobbered by calls.
346   Register MFReturnAddr;
347 
348 #define GET_GLOBALISEL_PREDICATES_DECL
349 #include "AArch64GenGlobalISel.inc"
350 #undef GET_GLOBALISEL_PREDICATES_DECL
351 
352 // We declare the temporaries used by selectImpl() in the class to minimize the
353 // cost of constructing placeholder values.
354 #define GET_GLOBALISEL_TEMPORARIES_DECL
355 #include "AArch64GenGlobalISel.inc"
356 #undef GET_GLOBALISEL_TEMPORARIES_DECL
357 };
358 
359 } // end anonymous namespace
360 
361 #define GET_GLOBALISEL_IMPL
362 #include "AArch64GenGlobalISel.inc"
363 #undef GET_GLOBALISEL_IMPL
364 
365 AArch64InstructionSelector::AArch64InstructionSelector(
366     const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
367     const AArch64RegisterBankInfo &RBI)
368     : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
369       TRI(*STI.getRegisterInfo()), RBI(RBI),
370 #define GET_GLOBALISEL_PREDICATES_INIT
371 #include "AArch64GenGlobalISel.inc"
372 #undef GET_GLOBALISEL_PREDICATES_INIT
373 #define GET_GLOBALISEL_TEMPORARIES_INIT
374 #include "AArch64GenGlobalISel.inc"
375 #undef GET_GLOBALISEL_TEMPORARIES_INIT
376 {
377 }
378 
379 // FIXME: This should be target-independent, inferred from the types declared
380 // for each class in the bank.
381 static const TargetRegisterClass *
382 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
383                          const RegisterBankInfo &RBI,
384                          bool GetAllRegSet = false) {
385   if (RB.getID() == AArch64::GPRRegBankID) {
386     if (Ty.getSizeInBits() <= 32)
387       return GetAllRegSet ? &AArch64::GPR32allRegClass
388                           : &AArch64::GPR32RegClass;
389     if (Ty.getSizeInBits() == 64)
390       return GetAllRegSet ? &AArch64::GPR64allRegClass
391                           : &AArch64::GPR64RegClass;
392     return nullptr;
393   }
394 
395   if (RB.getID() == AArch64::FPRRegBankID) {
396     if (Ty.getSizeInBits() <= 16)
397       return &AArch64::FPR16RegClass;
398     if (Ty.getSizeInBits() == 32)
399       return &AArch64::FPR32RegClass;
400     if (Ty.getSizeInBits() == 64)
401       return &AArch64::FPR64RegClass;
402     if (Ty.getSizeInBits() == 128)
403       return &AArch64::FPR128RegClass;
404     return nullptr;
405   }
406 
407   return nullptr;
408 }
409 
410 /// Given a register bank, and size in bits, return the smallest register class
411 /// that can represent that combination.
412 static const TargetRegisterClass *
413 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits,
414                       bool GetAllRegSet = false) {
415   unsigned RegBankID = RB.getID();
416 
417   if (RegBankID == AArch64::GPRRegBankID) {
418     if (SizeInBits <= 32)
419       return GetAllRegSet ? &AArch64::GPR32allRegClass
420                           : &AArch64::GPR32RegClass;
421     if (SizeInBits == 64)
422       return GetAllRegSet ? &AArch64::GPR64allRegClass
423                           : &AArch64::GPR64RegClass;
424   }
425 
426   if (RegBankID == AArch64::FPRRegBankID) {
427     switch (SizeInBits) {
428     default:
429       return nullptr;
430     case 8:
431       return &AArch64::FPR8RegClass;
432     case 16:
433       return &AArch64::FPR16RegClass;
434     case 32:
435       return &AArch64::FPR32RegClass;
436     case 64:
437       return &AArch64::FPR64RegClass;
438     case 128:
439       return &AArch64::FPR128RegClass;
440     }
441   }
442 
443   return nullptr;
444 }
445 
446 /// Returns the correct subregister to use for a given register class.
447 static bool getSubRegForClass(const TargetRegisterClass *RC,
448                               const TargetRegisterInfo &TRI, unsigned &SubReg) {
449   switch (TRI.getRegSizeInBits(*RC)) {
450   case 8:
451     SubReg = AArch64::bsub;
452     break;
453   case 16:
454     SubReg = AArch64::hsub;
455     break;
456   case 32:
457     if (RC != &AArch64::FPR32RegClass)
458       SubReg = AArch64::sub_32;
459     else
460       SubReg = AArch64::ssub;
461     break;
462   case 64:
463     SubReg = AArch64::dsub;
464     break;
465   default:
466     LLVM_DEBUG(
467         dbgs() << "Couldn't find appropriate subregister for register class.");
468     return false;
469   }
470 
471   return true;
472 }
473 
474 /// Returns the minimum size the given register bank can hold.
475 static unsigned getMinSizeForRegBank(const RegisterBank &RB) {
476   switch (RB.getID()) {
477   case AArch64::GPRRegBankID:
478     return 32;
479   case AArch64::FPRRegBankID:
480     return 8;
481   default:
482     llvm_unreachable("Tried to get minimum size for unknown register bank.");
483   }
484 }
485 
486 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) {
487   auto &MI = *Root.getParent();
488   auto &MBB = *MI.getParent();
489   auto &MF = *MBB.getParent();
490   auto &MRI = MF.getRegInfo();
491   uint64_t Immed;
492   if (Root.isImm())
493     Immed = Root.getImm();
494   else if (Root.isCImm())
495     Immed = Root.getCImm()->getZExtValue();
496   else if (Root.isReg()) {
497     auto ValAndVReg =
498         getConstantVRegValWithLookThrough(Root.getReg(), MRI, true);
499     if (!ValAndVReg)
500       return None;
501     Immed = ValAndVReg->Value;
502   } else
503     return None;
504   return Immed;
505 }
506 
507 /// Check whether \p I is a currently unsupported binary operation:
508 /// - it has an unsized type
509 /// - an operand is not a vreg
510 /// - all operands are not in the same bank
511 /// These are checks that should someday live in the verifier, but right now,
512 /// these are mostly limitations of the aarch64 selector.
513 static bool unsupportedBinOp(const MachineInstr &I,
514                              const AArch64RegisterBankInfo &RBI,
515                              const MachineRegisterInfo &MRI,
516                              const AArch64RegisterInfo &TRI) {
517   LLT Ty = MRI.getType(I.getOperand(0).getReg());
518   if (!Ty.isValid()) {
519     LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n");
520     return true;
521   }
522 
523   const RegisterBank *PrevOpBank = nullptr;
524   for (auto &MO : I.operands()) {
525     // FIXME: Support non-register operands.
526     if (!MO.isReg()) {
527       LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
528       return true;
529     }
530 
531     // FIXME: Can generic operations have physical registers operands? If
532     // so, this will need to be taught about that, and we'll need to get the
533     // bank out of the minimal class for the register.
534     // Either way, this needs to be documented (and possibly verified).
535     if (!Register::isVirtualRegister(MO.getReg())) {
536       LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n");
537       return true;
538     }
539 
540     const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
541     if (!OpBank) {
542       LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n");
543       return true;
544     }
545 
546     if (PrevOpBank && OpBank != PrevOpBank) {
547       LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n");
548       return true;
549     }
550     PrevOpBank = OpBank;
551   }
552   return false;
553 }
554 
555 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc
556 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID
557 /// and of size \p OpSize.
558 /// \returns \p GenericOpc if the combination is unsupported.
559 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
560                                unsigned OpSize) {
561   switch (RegBankID) {
562   case AArch64::GPRRegBankID:
563     if (OpSize == 32) {
564       switch (GenericOpc) {
565       case TargetOpcode::G_SHL:
566         return AArch64::LSLVWr;
567       case TargetOpcode::G_LSHR:
568         return AArch64::LSRVWr;
569       case TargetOpcode::G_ASHR:
570         return AArch64::ASRVWr;
571       default:
572         return GenericOpc;
573       }
574     } else if (OpSize == 64) {
575       switch (GenericOpc) {
576       case TargetOpcode::G_PTR_ADD:
577         return AArch64::ADDXrr;
578       case TargetOpcode::G_SHL:
579         return AArch64::LSLVXr;
580       case TargetOpcode::G_LSHR:
581         return AArch64::LSRVXr;
582       case TargetOpcode::G_ASHR:
583         return AArch64::ASRVXr;
584       default:
585         return GenericOpc;
586       }
587     }
588     break;
589   case AArch64::FPRRegBankID:
590     switch (OpSize) {
591     case 32:
592       switch (GenericOpc) {
593       case TargetOpcode::G_FADD:
594         return AArch64::FADDSrr;
595       case TargetOpcode::G_FSUB:
596         return AArch64::FSUBSrr;
597       case TargetOpcode::G_FMUL:
598         return AArch64::FMULSrr;
599       case TargetOpcode::G_FDIV:
600         return AArch64::FDIVSrr;
601       default:
602         return GenericOpc;
603       }
604     case 64:
605       switch (GenericOpc) {
606       case TargetOpcode::G_FADD:
607         return AArch64::FADDDrr;
608       case TargetOpcode::G_FSUB:
609         return AArch64::FSUBDrr;
610       case TargetOpcode::G_FMUL:
611         return AArch64::FMULDrr;
612       case TargetOpcode::G_FDIV:
613         return AArch64::FDIVDrr;
614       case TargetOpcode::G_OR:
615         return AArch64::ORRv8i8;
616       default:
617         return GenericOpc;
618       }
619     }
620     break;
621   }
622   return GenericOpc;
623 }
624 
625 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
626 /// appropriate for the (value) register bank \p RegBankID and of memory access
627 /// size \p OpSize.  This returns the variant with the base+unsigned-immediate
628 /// addressing mode (e.g., LDRXui).
629 /// \returns \p GenericOpc if the combination is unsupported.
630 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
631                                     unsigned OpSize) {
632   const bool isStore = GenericOpc == TargetOpcode::G_STORE;
633   switch (RegBankID) {
634   case AArch64::GPRRegBankID:
635     switch (OpSize) {
636     case 8:
637       return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
638     case 16:
639       return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
640     case 32:
641       return isStore ? AArch64::STRWui : AArch64::LDRWui;
642     case 64:
643       return isStore ? AArch64::STRXui : AArch64::LDRXui;
644     }
645     break;
646   case AArch64::FPRRegBankID:
647     switch (OpSize) {
648     case 8:
649       return isStore ? AArch64::STRBui : AArch64::LDRBui;
650     case 16:
651       return isStore ? AArch64::STRHui : AArch64::LDRHui;
652     case 32:
653       return isStore ? AArch64::STRSui : AArch64::LDRSui;
654     case 64:
655       return isStore ? AArch64::STRDui : AArch64::LDRDui;
656     }
657     break;
658   }
659   return GenericOpc;
660 }
661 
662 #ifndef NDEBUG
663 /// Helper function that verifies that we have a valid copy at the end of
664 /// selectCopy. Verifies that the source and dest have the expected sizes and
665 /// then returns true.
666 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank,
667                         const MachineRegisterInfo &MRI,
668                         const TargetRegisterInfo &TRI,
669                         const RegisterBankInfo &RBI) {
670   const Register DstReg = I.getOperand(0).getReg();
671   const Register SrcReg = I.getOperand(1).getReg();
672   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
673   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
674 
675   // Make sure the size of the source and dest line up.
676   assert(
677       (DstSize == SrcSize ||
678        // Copies are a mean to setup initial types, the number of
679        // bits may not exactly match.
680        (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) ||
681        // Copies are a mean to copy bits around, as long as we are
682        // on the same register class, that's fine. Otherwise, that
683        // means we need some SUBREG_TO_REG or AND & co.
684        (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
685       "Copy with different width?!");
686 
687   // Check the size of the destination.
688   assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) &&
689          "GPRs cannot get more than 64-bit width values");
690 
691   return true;
692 }
693 #endif
694 
695 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg
696 /// to \p *To.
697 ///
698 /// E.g "To = COPY SrcReg:SubReg"
699 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI,
700                        const RegisterBankInfo &RBI, Register SrcReg,
701                        const TargetRegisterClass *To, unsigned SubReg) {
702   assert(SrcReg.isValid() && "Expected a valid source register?");
703   assert(To && "Destination register class cannot be null");
704   assert(SubReg && "Expected a valid subregister");
705 
706   MachineIRBuilder MIB(I);
707   auto SubRegCopy =
708       MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg);
709   MachineOperand &RegOp = I.getOperand(1);
710   RegOp.setReg(SubRegCopy.getReg(0));
711 
712   // It's possible that the destination register won't be constrained. Make
713   // sure that happens.
714   if (!Register::isPhysicalRegister(I.getOperand(0).getReg()))
715     RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI);
716 
717   return true;
718 }
719 
720 /// Helper function to get the source and destination register classes for a
721 /// copy. Returns a std::pair containing the source register class for the
722 /// copy, and the destination register class for the copy. If a register class
723 /// cannot be determined, then it will be nullptr.
724 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *>
725 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII,
726                      MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
727                      const RegisterBankInfo &RBI) {
728   Register DstReg = I.getOperand(0).getReg();
729   Register SrcReg = I.getOperand(1).getReg();
730   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
731   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
732   unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
733   unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
734 
735   // Special casing for cross-bank copies of s1s. We can technically represent
736   // a 1-bit value with any size of register. The minimum size for a GPR is 32
737   // bits. So, we need to put the FPR on 32 bits as well.
738   //
739   // FIXME: I'm not sure if this case holds true outside of copies. If it does,
740   // then we can pull it into the helpers that get the appropriate class for a
741   // register bank. Or make a new helper that carries along some constraint
742   // information.
743   if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1))
744     SrcSize = DstSize = 32;
745 
746   return {getMinClassForRegBank(SrcRegBank, SrcSize, true),
747           getMinClassForRegBank(DstRegBank, DstSize, true)};
748 }
749 
750 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
751                        MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
752                        const RegisterBankInfo &RBI) {
753   Register DstReg = I.getOperand(0).getReg();
754   Register SrcReg = I.getOperand(1).getReg();
755   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
756   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
757 
758   // Find the correct register classes for the source and destination registers.
759   const TargetRegisterClass *SrcRC;
760   const TargetRegisterClass *DstRC;
761   std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI);
762 
763   if (!DstRC) {
764     LLVM_DEBUG(dbgs() << "Unexpected dest size "
765                       << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n');
766     return false;
767   }
768 
769   // A couple helpers below, for making sure that the copy we produce is valid.
770 
771   // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want
772   // to verify that the src and dst are the same size, since that's handled by
773   // the SUBREG_TO_REG.
774   bool KnownValid = false;
775 
776   // Returns true, or asserts if something we don't expect happens. Instead of
777   // returning true, we return isValidCopy() to ensure that we verify the
778   // result.
779   auto CheckCopy = [&]() {
780     // If we have a bitcast or something, we can't have physical registers.
781     assert((I.isCopy() ||
782             (!Register::isPhysicalRegister(I.getOperand(0).getReg()) &&
783              !Register::isPhysicalRegister(I.getOperand(1).getReg()))) &&
784            "No phys reg on generic operator!");
785     bool ValidCopy = true;
786 #ifndef NDEBUG
787     ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI);
788     assert(ValidCopy && "Invalid copy.");
789 #endif
790     return ValidCopy;
791   };
792 
793   // Is this a copy? If so, then we may need to insert a subregister copy.
794   if (I.isCopy()) {
795     // Yes. Check if there's anything to fix up.
796     if (!SrcRC) {
797       LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n");
798       return false;
799     }
800 
801     unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC);
802     unsigned DstSize = TRI.getRegSizeInBits(*DstRC);
803     unsigned SubReg;
804 
805     // If the source bank doesn't support a subregister copy small enough,
806     // then we first need to copy to the destination bank.
807     if (getMinSizeForRegBank(SrcRegBank) > DstSize) {
808       const TargetRegisterClass *DstTempRC =
809           getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true);
810       getSubRegForClass(DstRC, TRI, SubReg);
811 
812       MachineIRBuilder MIB(I);
813       auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg});
814       copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg);
815     } else if (SrcSize > DstSize) {
816       // If the source register is bigger than the destination we need to
817       // perform a subregister copy.
818       const TargetRegisterClass *SubRegRC =
819           getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
820       getSubRegForClass(SubRegRC, TRI, SubReg);
821       copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg);
822     } else if (DstSize > SrcSize) {
823       // If the destination register is bigger than the source we need to do
824       // a promotion using SUBREG_TO_REG.
825       const TargetRegisterClass *PromotionRC =
826           getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true);
827       getSubRegForClass(SrcRC, TRI, SubReg);
828 
829       Register PromoteReg = MRI.createVirtualRegister(PromotionRC);
830       BuildMI(*I.getParent(), I, I.getDebugLoc(),
831               TII.get(AArch64::SUBREG_TO_REG), PromoteReg)
832           .addImm(0)
833           .addUse(SrcReg)
834           .addImm(SubReg);
835       MachineOperand &RegOp = I.getOperand(1);
836       RegOp.setReg(PromoteReg);
837 
838       // Promise that the copy is implicitly validated by the SUBREG_TO_REG.
839       KnownValid = true;
840     }
841 
842     // If the destination is a physical register, then there's nothing to
843     // change, so we're done.
844     if (Register::isPhysicalRegister(DstReg))
845       return CheckCopy();
846   }
847 
848   // No need to constrain SrcReg. It will get constrained when we hit another
849   // of its use or its defs. Copies do not have constraints.
850   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
851     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
852                       << " operand\n");
853     return false;
854   }
855   I.setDesc(TII.get(AArch64::COPY));
856   return CheckCopy();
857 }
858 
859 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
860   if (!DstTy.isScalar() || !SrcTy.isScalar())
861     return GenericOpc;
862 
863   const unsigned DstSize = DstTy.getSizeInBits();
864   const unsigned SrcSize = SrcTy.getSizeInBits();
865 
866   switch (DstSize) {
867   case 32:
868     switch (SrcSize) {
869     case 32:
870       switch (GenericOpc) {
871       case TargetOpcode::G_SITOFP:
872         return AArch64::SCVTFUWSri;
873       case TargetOpcode::G_UITOFP:
874         return AArch64::UCVTFUWSri;
875       case TargetOpcode::G_FPTOSI:
876         return AArch64::FCVTZSUWSr;
877       case TargetOpcode::G_FPTOUI:
878         return AArch64::FCVTZUUWSr;
879       default:
880         return GenericOpc;
881       }
882     case 64:
883       switch (GenericOpc) {
884       case TargetOpcode::G_SITOFP:
885         return AArch64::SCVTFUXSri;
886       case TargetOpcode::G_UITOFP:
887         return AArch64::UCVTFUXSri;
888       case TargetOpcode::G_FPTOSI:
889         return AArch64::FCVTZSUWDr;
890       case TargetOpcode::G_FPTOUI:
891         return AArch64::FCVTZUUWDr;
892       default:
893         return GenericOpc;
894       }
895     default:
896       return GenericOpc;
897     }
898   case 64:
899     switch (SrcSize) {
900     case 32:
901       switch (GenericOpc) {
902       case TargetOpcode::G_SITOFP:
903         return AArch64::SCVTFUWDri;
904       case TargetOpcode::G_UITOFP:
905         return AArch64::UCVTFUWDri;
906       case TargetOpcode::G_FPTOSI:
907         return AArch64::FCVTZSUXSr;
908       case TargetOpcode::G_FPTOUI:
909         return AArch64::FCVTZUUXSr;
910       default:
911         return GenericOpc;
912       }
913     case 64:
914       switch (GenericOpc) {
915       case TargetOpcode::G_SITOFP:
916         return AArch64::SCVTFUXDri;
917       case TargetOpcode::G_UITOFP:
918         return AArch64::UCVTFUXDri;
919       case TargetOpcode::G_FPTOSI:
920         return AArch64::FCVTZSUXDr;
921       case TargetOpcode::G_FPTOUI:
922         return AArch64::FCVTZUUXDr;
923       default:
924         return GenericOpc;
925       }
926     default:
927       return GenericOpc;
928     }
929   default:
930     return GenericOpc;
931   };
932   return GenericOpc;
933 }
934 
935 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI,
936                                 const RegisterBankInfo &RBI) {
937   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
938   bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
939                AArch64::GPRRegBankID);
940   LLT Ty = MRI.getType(I.getOperand(0).getReg());
941   if (Ty == LLT::scalar(32))
942     return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr;
943   else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64))
944     return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr;
945   return 0;
946 }
947 
948 /// Helper function to select the opcode for a G_FCMP.
949 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) {
950   // If this is a compare against +0.0, then we don't have to explicitly
951   // materialize a constant.
952   const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI);
953   bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative());
954   unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
955   if (OpSize != 32 && OpSize != 64)
956     return 0;
957   unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr},
958                               {AArch64::FCMPSri, AArch64::FCMPDri}};
959   return CmpOpcTbl[ShouldUseImm][OpSize == 64];
960 }
961 
962 /// Returns true if \p P is an unsigned integer comparison predicate.
963 static bool isUnsignedICMPPred(const CmpInst::Predicate P) {
964   switch (P) {
965   default:
966     return false;
967   case CmpInst::ICMP_UGT:
968   case CmpInst::ICMP_UGE:
969   case CmpInst::ICMP_ULT:
970   case CmpInst::ICMP_ULE:
971     return true;
972   }
973 }
974 
975 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
976   switch (P) {
977   default:
978     llvm_unreachable("Unknown condition code!");
979   case CmpInst::ICMP_NE:
980     return AArch64CC::NE;
981   case CmpInst::ICMP_EQ:
982     return AArch64CC::EQ;
983   case CmpInst::ICMP_SGT:
984     return AArch64CC::GT;
985   case CmpInst::ICMP_SGE:
986     return AArch64CC::GE;
987   case CmpInst::ICMP_SLT:
988     return AArch64CC::LT;
989   case CmpInst::ICMP_SLE:
990     return AArch64CC::LE;
991   case CmpInst::ICMP_UGT:
992     return AArch64CC::HI;
993   case CmpInst::ICMP_UGE:
994     return AArch64CC::HS;
995   case CmpInst::ICMP_ULT:
996     return AArch64CC::LO;
997   case CmpInst::ICMP_ULE:
998     return AArch64CC::LS;
999   }
1000 }
1001 
1002 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
1003                                       AArch64CC::CondCode &CondCode,
1004                                       AArch64CC::CondCode &CondCode2) {
1005   CondCode2 = AArch64CC::AL;
1006   switch (P) {
1007   default:
1008     llvm_unreachable("Unknown FP condition!");
1009   case CmpInst::FCMP_OEQ:
1010     CondCode = AArch64CC::EQ;
1011     break;
1012   case CmpInst::FCMP_OGT:
1013     CondCode = AArch64CC::GT;
1014     break;
1015   case CmpInst::FCMP_OGE:
1016     CondCode = AArch64CC::GE;
1017     break;
1018   case CmpInst::FCMP_OLT:
1019     CondCode = AArch64CC::MI;
1020     break;
1021   case CmpInst::FCMP_OLE:
1022     CondCode = AArch64CC::LS;
1023     break;
1024   case CmpInst::FCMP_ONE:
1025     CondCode = AArch64CC::MI;
1026     CondCode2 = AArch64CC::GT;
1027     break;
1028   case CmpInst::FCMP_ORD:
1029     CondCode = AArch64CC::VC;
1030     break;
1031   case CmpInst::FCMP_UNO:
1032     CondCode = AArch64CC::VS;
1033     break;
1034   case CmpInst::FCMP_UEQ:
1035     CondCode = AArch64CC::EQ;
1036     CondCode2 = AArch64CC::VS;
1037     break;
1038   case CmpInst::FCMP_UGT:
1039     CondCode = AArch64CC::HI;
1040     break;
1041   case CmpInst::FCMP_UGE:
1042     CondCode = AArch64CC::PL;
1043     break;
1044   case CmpInst::FCMP_ULT:
1045     CondCode = AArch64CC::LT;
1046     break;
1047   case CmpInst::FCMP_ULE:
1048     CondCode = AArch64CC::LE;
1049     break;
1050   case CmpInst::FCMP_UNE:
1051     CondCode = AArch64CC::NE;
1052     break;
1053   }
1054 }
1055 
1056 /// Return a register which can be used as a bit to test in a TB(N)Z.
1057 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
1058                               MachineRegisterInfo &MRI) {
1059   assert(Reg.isValid() && "Expected valid register!");
1060   while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) {
1061     unsigned Opc = MI->getOpcode();
1062 
1063     if (!MI->getOperand(0).isReg() ||
1064         !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1065       break;
1066 
1067     // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
1068     //
1069     // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number
1070     // on the truncated x is the same as the bit number on x.
1071     if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT ||
1072         Opc == TargetOpcode::G_TRUNC) {
1073       Register NextReg = MI->getOperand(1).getReg();
1074       // Did we find something worth folding?
1075       if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg))
1076         break;
1077 
1078       // NextReg is worth folding. Keep looking.
1079       Reg = NextReg;
1080       continue;
1081     }
1082 
1083     // Attempt to find a suitable operation with a constant on one side.
1084     Optional<uint64_t> C;
1085     Register TestReg;
1086     switch (Opc) {
1087     default:
1088       break;
1089     case TargetOpcode::G_AND:
1090     case TargetOpcode::G_XOR: {
1091       TestReg = MI->getOperand(1).getReg();
1092       Register ConstantReg = MI->getOperand(2).getReg();
1093       auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1094       if (!VRegAndVal) {
1095         // AND commutes, check the other side for a constant.
1096         // FIXME: Can we canonicalize the constant so that it's always on the
1097         // same side at some point earlier?
1098         std::swap(ConstantReg, TestReg);
1099         VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI);
1100       }
1101       if (VRegAndVal)
1102         C = VRegAndVal->Value;
1103       break;
1104     }
1105     case TargetOpcode::G_ASHR:
1106     case TargetOpcode::G_LSHR:
1107     case TargetOpcode::G_SHL: {
1108       TestReg = MI->getOperand(1).getReg();
1109       auto VRegAndVal =
1110           getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1111       if (VRegAndVal)
1112         C = VRegAndVal->Value;
1113       break;
1114     }
1115     }
1116 
1117     // Didn't find a constant or viable register. Bail out of the loop.
1118     if (!C || !TestReg.isValid())
1119       break;
1120 
1121     // We found a suitable instruction with a constant. Check to see if we can
1122     // walk through the instruction.
1123     Register NextReg;
1124     unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits();
1125     switch (Opc) {
1126     default:
1127       break;
1128     case TargetOpcode::G_AND:
1129       // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set.
1130       if ((*C >> Bit) & 1)
1131         NextReg = TestReg;
1132       break;
1133     case TargetOpcode::G_SHL:
1134       // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in
1135       // the type of the register.
1136       if (*C <= Bit && (Bit - *C) < TestRegSize) {
1137         NextReg = TestReg;
1138         Bit = Bit - *C;
1139       }
1140       break;
1141     case TargetOpcode::G_ASHR:
1142       // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits
1143       // in x
1144       NextReg = TestReg;
1145       Bit = Bit + *C;
1146       if (Bit >= TestRegSize)
1147         Bit = TestRegSize - 1;
1148       break;
1149     case TargetOpcode::G_LSHR:
1150       // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x
1151       if ((Bit + *C) < TestRegSize) {
1152         NextReg = TestReg;
1153         Bit = Bit + *C;
1154       }
1155       break;
1156     case TargetOpcode::G_XOR:
1157       // We can walk through a G_XOR by inverting whether we use tbz/tbnz when
1158       // appropriate.
1159       //
1160       // e.g. If x' = xor x, c, and the b-th bit is set in c then
1161       //
1162       // tbz x', b -> tbnz x, b
1163       //
1164       // Because x' only has the b-th bit set if x does not.
1165       if ((*C >> Bit) & 1)
1166         Invert = !Invert;
1167       NextReg = TestReg;
1168       break;
1169     }
1170 
1171     // Check if we found anything worth folding.
1172     if (!NextReg.isValid())
1173       return Reg;
1174     Reg = NextReg;
1175   }
1176 
1177   return Reg;
1178 }
1179 
1180 MachineInstr *AArch64InstructionSelector::emitTestBit(
1181     Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
1182     MachineIRBuilder &MIB) const {
1183   assert(TestReg.isValid());
1184   assert(ProduceNonFlagSettingCondBr &&
1185          "Cannot emit TB(N)Z with speculation tracking!");
1186   MachineRegisterInfo &MRI = *MIB.getMRI();
1187 
1188   // Attempt to optimize the test bit by walking over instructions.
1189   TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
1190   LLT Ty = MRI.getType(TestReg);
1191   unsigned Size = Ty.getSizeInBits();
1192   assert(!Ty.isVector() && "Expected a scalar!");
1193   assert(Bit < 64 && "Bit is too large!");
1194 
1195   // When the test register is a 64-bit register, we have to narrow to make
1196   // TBNZW work.
1197   bool UseWReg = Bit < 32;
1198   unsigned NecessarySize = UseWReg ? 32 : 64;
1199   if (Size != NecessarySize)
1200     TestReg = moveScalarRegClass(
1201         TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass,
1202         MIB);
1203 
1204   static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
1205                                           {AArch64::TBZW, AArch64::TBNZW}};
1206   unsigned Opc = OpcTable[UseWReg][IsNegative];
1207   auto TestBitMI =
1208       MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
1209   constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI);
1210   return &*TestBitMI;
1211 }
1212 
1213 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
1214     MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
1215     MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
1216   // Given something like this:
1217   //
1218   //  %x = ...Something...
1219   //  %one = G_CONSTANT i64 1
1220   //  %zero = G_CONSTANT i64 0
1221   //  %and = G_AND %x, %one
1222   //  %cmp = G_ICMP intpred(ne), %and, %zero
1223   //  %cmp_trunc = G_TRUNC %cmp
1224   //  G_BRCOND %cmp_trunc, %bb.3
1225   //
1226   // We want to try and fold the AND into the G_BRCOND and produce either a
1227   // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
1228   //
1229   // In this case, we'd get
1230   //
1231   // TBNZ %x %bb.3
1232   //
1233   if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
1234     return false;
1235 
1236   // Need to be comparing against 0 to fold.
1237   if (CmpConstant != 0)
1238     return false;
1239 
1240   MachineRegisterInfo &MRI = *MIB.getMRI();
1241 
1242   // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
1243   // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
1244   // so folding would be redundant.
1245   if (Pred != CmpInst::Predicate::ICMP_EQ &&
1246       Pred != CmpInst::Predicate::ICMP_NE)
1247     return false;
1248 
1249   // Check if the AND has a constant on its RHS which we can use as a mask.
1250   // If it's a power of 2, then it's the same as checking a specific bit.
1251   // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
1252   auto MaybeBit =
1253       getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
1254   if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
1255     return false;
1256 
1257   uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
1258   Register TestReg = AndInst->getOperand(1).getReg();
1259   bool Invert = Pred == CmpInst::Predicate::ICMP_NE;
1260 
1261   // Emit a TB(N)Z.
1262   emitTestBit(TestReg, Bit, Invert, DstMBB, MIB);
1263   return true;
1264 }
1265 
1266 bool AArch64InstructionSelector::selectCompareBranch(
1267     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1268 
1269   const Register CondReg = I.getOperand(0).getReg();
1270   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1271   MachineInstr *CCMI = MRI.getVRegDef(CondReg);
1272   if (CCMI->getOpcode() == TargetOpcode::G_TRUNC)
1273     CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg());
1274   if (CCMI->getOpcode() != TargetOpcode::G_ICMP)
1275     return false;
1276 
1277   Register LHS = CCMI->getOperand(2).getReg();
1278   Register RHS = CCMI->getOperand(3).getReg();
1279   auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1280   MachineIRBuilder MIB(I);
1281   CmpInst::Predicate Pred =
1282       (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
1283   MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
1284 
1285   // When we can emit a TB(N)Z, prefer that.
1286   //
1287   // Handle non-commutative condition codes first.
1288   // Note that we don't want to do this when we have a G_AND because it can
1289   // become a tst. The tst will make the test bit in the TB(N)Z redundant.
1290   if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
1291     int64_t C = VRegAndVal->Value;
1292 
1293     // When we have a greater-than comparison, we can just test if the msb is
1294     // zero.
1295     if (C == -1 && Pred == CmpInst::ICMP_SGT) {
1296       uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1297       emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
1298       I.eraseFromParent();
1299       return true;
1300     }
1301 
1302     // When we have a less than comparison, we can just test if the msb is not
1303     // zero.
1304     if (C == 0 && Pred == CmpInst::ICMP_SLT) {
1305       uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
1306       emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
1307       I.eraseFromParent();
1308       return true;
1309     }
1310   }
1311 
1312   if (!VRegAndVal) {
1313     std::swap(RHS, LHS);
1314     VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
1315     LHSMI = getDefIgnoringCopies(LHS, MRI);
1316   }
1317 
1318   if (!VRegAndVal || VRegAndVal->Value != 0) {
1319     // If we can't select a CBZ then emit a cmp + Bcc.
1320     MachineInstr *Cmp;
1321     std::tie(Cmp, Pred) = emitIntegerCompare(
1322         CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB);
1323     if (!Cmp)
1324       return false;
1325     const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
1326     MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
1327     I.eraseFromParent();
1328     return true;
1329   }
1330 
1331   // Try to emit a TB(N)Z for an eq or ne condition.
1332   if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
1333                                  MIB)) {
1334     I.eraseFromParent();
1335     return true;
1336   }
1337 
1338   const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
1339   if (RB.getID() != AArch64::GPRRegBankID)
1340     return false;
1341   if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
1342     return false;
1343 
1344   const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits();
1345   unsigned CBOpc = 0;
1346   if (CmpWidth <= 32)
1347     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW);
1348   else if (CmpWidth == 64)
1349     CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX);
1350   else
1351     return false;
1352 
1353   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc))
1354       .addUse(LHS)
1355       .addMBB(DestMBB)
1356       .constrainAllUses(TII, TRI, RBI);
1357 
1358   I.eraseFromParent();
1359   return true;
1360 }
1361 
1362 /// Returns the element immediate value of a vector shift operand if found.
1363 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR.
1364 static Optional<int64_t> getVectorShiftImm(Register Reg,
1365                                            MachineRegisterInfo &MRI) {
1366   assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand");
1367   MachineInstr *OpMI = MRI.getVRegDef(Reg);
1368   assert(OpMI && "Expected to find a vreg def for vector shift operand");
1369   if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR)
1370     return None;
1371 
1372   // Check all operands are identical immediates.
1373   int64_t ImmVal = 0;
1374   for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) {
1375     auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI);
1376     if (!VRegAndVal)
1377       return None;
1378 
1379     if (Idx == 1)
1380       ImmVal = VRegAndVal->Value;
1381     if (ImmVal != VRegAndVal->Value)
1382       return None;
1383   }
1384 
1385   return ImmVal;
1386 }
1387 
1388 /// Matches and returns the shift immediate value for a SHL instruction given
1389 /// a shift operand.
1390 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) {
1391   Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI);
1392   if (!ShiftImm)
1393     return None;
1394   // Check the immediate is in range for a SHL.
1395   int64_t Imm = *ShiftImm;
1396   if (Imm < 0)
1397     return None;
1398   switch (SrcTy.getElementType().getSizeInBits()) {
1399   default:
1400     LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift");
1401     return None;
1402   case 8:
1403     if (Imm > 7)
1404       return None;
1405     break;
1406   case 16:
1407     if (Imm > 15)
1408       return None;
1409     break;
1410   case 32:
1411     if (Imm > 31)
1412       return None;
1413     break;
1414   case 64:
1415     if (Imm > 63)
1416       return None;
1417     break;
1418   }
1419   return Imm;
1420 }
1421 
1422 bool AArch64InstructionSelector::selectVectorSHL(
1423     MachineInstr &I, MachineRegisterInfo &MRI) const {
1424   assert(I.getOpcode() == TargetOpcode::G_SHL);
1425   Register DstReg = I.getOperand(0).getReg();
1426   const LLT Ty = MRI.getType(DstReg);
1427   Register Src1Reg = I.getOperand(1).getReg();
1428   Register Src2Reg = I.getOperand(2).getReg();
1429 
1430   if (!Ty.isVector())
1431     return false;
1432 
1433   // Check if we have a vector of constants on RHS that we can select as the
1434   // immediate form.
1435   Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI);
1436 
1437   unsigned Opc = 0;
1438   if (Ty == LLT::vector(2, 64)) {
1439     Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64;
1440   } else if (Ty == LLT::vector(4, 32)) {
1441     Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32;
1442   } else if (Ty == LLT::vector(2, 32)) {
1443     Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32;
1444   } else {
1445     LLVM_DEBUG(dbgs() << "Unhandled G_SHL type");
1446     return false;
1447   }
1448 
1449   MachineIRBuilder MIB(I);
1450   auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg});
1451   if (ImmVal)
1452     Shl.addImm(*ImmVal);
1453   else
1454     Shl.addUse(Src2Reg);
1455   constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI);
1456   I.eraseFromParent();
1457   return true;
1458 }
1459 
1460 bool AArch64InstructionSelector::selectVectorASHR(
1461     MachineInstr &I, MachineRegisterInfo &MRI) const {
1462   assert(I.getOpcode() == TargetOpcode::G_ASHR);
1463   Register DstReg = I.getOperand(0).getReg();
1464   const LLT Ty = MRI.getType(DstReg);
1465   Register Src1Reg = I.getOperand(1).getReg();
1466   Register Src2Reg = I.getOperand(2).getReg();
1467 
1468   if (!Ty.isVector())
1469     return false;
1470 
1471   // There is not a shift right register instruction, but the shift left
1472   // register instruction takes a signed value, where negative numbers specify a
1473   // right shift.
1474 
1475   unsigned Opc = 0;
1476   unsigned NegOpc = 0;
1477   const TargetRegisterClass *RC = nullptr;
1478   if (Ty == LLT::vector(2, 64)) {
1479     Opc = AArch64::SSHLv2i64;
1480     NegOpc = AArch64::NEGv2i64;
1481     RC = &AArch64::FPR128RegClass;
1482   } else if (Ty == LLT::vector(4, 32)) {
1483     Opc = AArch64::SSHLv4i32;
1484     NegOpc = AArch64::NEGv4i32;
1485     RC = &AArch64::FPR128RegClass;
1486   } else if (Ty == LLT::vector(2, 32)) {
1487     Opc = AArch64::SSHLv2i32;
1488     NegOpc = AArch64::NEGv2i32;
1489     RC = &AArch64::FPR64RegClass;
1490   } else {
1491     LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type");
1492     return false;
1493   }
1494 
1495   MachineIRBuilder MIB(I);
1496   auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg});
1497   constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI);
1498   auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg});
1499   constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI);
1500   I.eraseFromParent();
1501   return true;
1502 }
1503 
1504 bool AArch64InstructionSelector::selectVaStartAAPCS(
1505     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1506   return false;
1507 }
1508 
1509 bool AArch64InstructionSelector::selectVaStartDarwin(
1510     MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
1511   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
1512   Register ListReg = I.getOperand(0).getReg();
1513 
1514   Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1515 
1516   auto MIB =
1517       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri))
1518           .addDef(ArgsAddrReg)
1519           .addFrameIndex(FuncInfo->getVarArgsStackIndex())
1520           .addImm(0)
1521           .addImm(0);
1522 
1523   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1524 
1525   MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui))
1526             .addUse(ArgsAddrReg)
1527             .addUse(ListReg)
1528             .addImm(0)
1529             .addMemOperand(*I.memoperands_begin());
1530 
1531   constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
1532   I.eraseFromParent();
1533   return true;
1534 }
1535 
1536 void AArch64InstructionSelector::materializeLargeCMVal(
1537     MachineInstr &I, const Value *V, unsigned OpFlags) const {
1538   MachineBasicBlock &MBB = *I.getParent();
1539   MachineFunction &MF = *MBB.getParent();
1540   MachineRegisterInfo &MRI = MF.getRegInfo();
1541   MachineIRBuilder MIB(I);
1542 
1543   auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {});
1544   MovZ->addOperand(MF, I.getOperand(1));
1545   MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 |
1546                                      AArch64II::MO_NC);
1547   MovZ->addOperand(MF, MachineOperand::CreateImm(0));
1548   constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI);
1549 
1550   auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset,
1551                        Register ForceDstReg) {
1552     Register DstReg = ForceDstReg
1553                           ? ForceDstReg
1554                           : MRI.createVirtualRegister(&AArch64::GPR64RegClass);
1555     auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg);
1556     if (auto *GV = dyn_cast<GlobalValue>(V)) {
1557       MovI->addOperand(MF, MachineOperand::CreateGA(
1558                                GV, MovZ->getOperand(1).getOffset(), Flags));
1559     } else {
1560       MovI->addOperand(
1561           MF, MachineOperand::CreateBA(cast<BlockAddress>(V),
1562                                        MovZ->getOperand(1).getOffset(), Flags));
1563     }
1564     MovI->addOperand(MF, MachineOperand::CreateImm(Offset));
1565     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
1566     return DstReg;
1567   };
1568   Register DstReg = BuildMovK(MovZ.getReg(0),
1569                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
1570   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
1571   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
1572   return;
1573 }
1574 
1575 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) {
1576   MachineBasicBlock &MBB = *I.getParent();
1577   MachineFunction &MF = *MBB.getParent();
1578   MachineRegisterInfo &MRI = MF.getRegInfo();
1579 
1580   switch (I.getOpcode()) {
1581   case TargetOpcode::G_SHL:
1582   case TargetOpcode::G_ASHR:
1583   case TargetOpcode::G_LSHR: {
1584     // These shifts are legalized to have 64 bit shift amounts because we want
1585     // to take advantage of the existing imported selection patterns that assume
1586     // the immediates are s64s. However, if the shifted type is 32 bits and for
1587     // some reason we receive input GMIR that has an s64 shift amount that's not
1588     // a G_CONSTANT, insert a truncate so that we can still select the s32
1589     // register-register variant.
1590     Register SrcReg = I.getOperand(1).getReg();
1591     Register ShiftReg = I.getOperand(2).getReg();
1592     const LLT ShiftTy = MRI.getType(ShiftReg);
1593     const LLT SrcTy = MRI.getType(SrcReg);
1594     if (SrcTy.isVector())
1595       return false;
1596     assert(!ShiftTy.isVector() && "unexpected vector shift ty");
1597     if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64)
1598       return false;
1599     auto *AmtMI = MRI.getVRegDef(ShiftReg);
1600     assert(AmtMI && "could not find a vreg definition for shift amount");
1601     if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) {
1602       // Insert a subregister copy to implement a 64->32 trunc
1603       MachineIRBuilder MIB(I);
1604       auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {})
1605                        .addReg(ShiftReg, 0, AArch64::sub_32);
1606       MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1607       I.getOperand(2).setReg(Trunc.getReg(0));
1608     }
1609     return true;
1610   }
1611   case TargetOpcode::G_STORE:
1612     return contractCrossBankCopyIntoStore(I, MRI);
1613   case TargetOpcode::G_PTR_ADD:
1614     return convertPtrAddToAdd(I, MRI);
1615   case TargetOpcode::G_LOAD: {
1616     // For scalar loads of pointers, we try to convert the dest type from p0
1617     // to s64 so that our imported patterns can match. Like with the G_PTR_ADD
1618     // conversion, this should be ok because all users should have been
1619     // selected already, so the type doesn't matter for them.
1620     Register DstReg = I.getOperand(0).getReg();
1621     const LLT DstTy = MRI.getType(DstReg);
1622     if (!DstTy.isPointer())
1623       return false;
1624     MRI.setType(DstReg, LLT::scalar(64));
1625     return true;
1626   }
1627   default:
1628     return false;
1629   }
1630 }
1631 
1632 /// This lowering tries to look for G_PTR_ADD instructions and then converts
1633 /// them to a standard G_ADD with a COPY on the source.
1634 ///
1635 /// The motivation behind this is to expose the add semantics to the imported
1636 /// tablegen patterns. We shouldn't need to check for uses being loads/stores,
1637 /// because the selector works bottom up, uses before defs. By the time we
1638 /// end up trying to select a G_PTR_ADD, we should have already attempted to
1639 /// fold this into addressing modes and were therefore unsuccessful.
1640 bool AArch64InstructionSelector::convertPtrAddToAdd(
1641     MachineInstr &I, MachineRegisterInfo &MRI) {
1642   assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1643   Register DstReg = I.getOperand(0).getReg();
1644   Register AddOp1Reg = I.getOperand(1).getReg();
1645   const LLT PtrTy = MRI.getType(DstReg);
1646   if (PtrTy.getAddressSpace() != 0)
1647     return false;
1648 
1649   MachineIRBuilder MIB(I);
1650   const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64);
1651   auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg);
1652   // Set regbanks on the registers.
1653   if (PtrTy.isVector())
1654     MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID));
1655   else
1656     MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID));
1657 
1658   // Now turn the %dst(p0) = G_PTR_ADD %base, off into:
1659   // %dst(intty) = G_ADD %intbase, off
1660   I.setDesc(TII.get(TargetOpcode::G_ADD));
1661   MRI.setType(DstReg, CastPtrTy);
1662   I.getOperand(1).setReg(PtrToInt.getReg(0));
1663   if (!select(*PtrToInt)) {
1664     LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd");
1665     return false;
1666   }
1667   return true;
1668 }
1669 
1670 bool AArch64InstructionSelector::earlySelectSHL(
1671     MachineInstr &I, MachineRegisterInfo &MRI) const {
1672   // We try to match the immediate variant of LSL, which is actually an alias
1673   // for a special case of UBFM. Otherwise, we fall back to the imported
1674   // selector which will match the register variant.
1675   assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op");
1676   const auto &MO = I.getOperand(2);
1677   auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI);
1678   if (!VRegAndVal)
1679     return false;
1680 
1681   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
1682   if (DstTy.isVector())
1683     return false;
1684   bool Is64Bit = DstTy.getSizeInBits() == 64;
1685   auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO);
1686   auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO);
1687   MachineIRBuilder MIB(I);
1688 
1689   if (!Imm1Fn || !Imm2Fn)
1690     return false;
1691 
1692   auto NewI =
1693       MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri,
1694                      {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()});
1695 
1696   for (auto &RenderFn : *Imm1Fn)
1697     RenderFn(NewI);
1698   for (auto &RenderFn : *Imm2Fn)
1699     RenderFn(NewI);
1700 
1701   I.eraseFromParent();
1702   return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
1703 }
1704 
1705 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore(
1706     MachineInstr &I, MachineRegisterInfo &MRI) {
1707   assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE");
1708   // If we're storing a scalar, it doesn't matter what register bank that
1709   // scalar is on. All that matters is the size.
1710   //
1711   // So, if we see something like this (with a 32-bit scalar as an example):
1712   //
1713   // %x:gpr(s32) = ... something ...
1714   // %y:fpr(s32) = COPY %x:gpr(s32)
1715   // G_STORE %y:fpr(s32)
1716   //
1717   // We can fix this up into something like this:
1718   //
1719   // G_STORE %x:gpr(s32)
1720   //
1721   // And then continue the selection process normally.
1722   Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI);
1723   if (!DefDstReg.isValid())
1724     return false;
1725   LLT DefDstTy = MRI.getType(DefDstReg);
1726   Register StoreSrcReg = I.getOperand(0).getReg();
1727   LLT StoreSrcTy = MRI.getType(StoreSrcReg);
1728 
1729   // If we get something strange like a physical register, then we shouldn't
1730   // go any further.
1731   if (!DefDstTy.isValid())
1732     return false;
1733 
1734   // Are the source and dst types the same size?
1735   if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits())
1736     return false;
1737 
1738   if (RBI.getRegBank(StoreSrcReg, MRI, TRI) ==
1739       RBI.getRegBank(DefDstReg, MRI, TRI))
1740     return false;
1741 
1742   // We have a cross-bank copy, which is entering a store. Let's fold it.
1743   I.getOperand(0).setReg(DefDstReg);
1744   return true;
1745 }
1746 
1747 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
1748   assert(I.getParent() && "Instruction should be in a basic block!");
1749   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1750 
1751   MachineBasicBlock &MBB = *I.getParent();
1752   MachineFunction &MF = *MBB.getParent();
1753   MachineRegisterInfo &MRI = MF.getRegInfo();
1754 
1755   switch (I.getOpcode()) {
1756   case TargetOpcode::G_SHL:
1757     return earlySelectSHL(I, MRI);
1758   case TargetOpcode::G_CONSTANT: {
1759     bool IsZero = false;
1760     if (I.getOperand(1).isCImm())
1761       IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0;
1762     else if (I.getOperand(1).isImm())
1763       IsZero = I.getOperand(1).getImm() == 0;
1764 
1765     if (!IsZero)
1766       return false;
1767 
1768     Register DefReg = I.getOperand(0).getReg();
1769     LLT Ty = MRI.getType(DefReg);
1770     if (Ty.getSizeInBits() == 64) {
1771       I.getOperand(1).ChangeToRegister(AArch64::XZR, false);
1772       RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI);
1773     } else if (Ty.getSizeInBits() == 32) {
1774       I.getOperand(1).ChangeToRegister(AArch64::WZR, false);
1775       RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI);
1776     } else
1777       return false;
1778 
1779     I.setDesc(TII.get(TargetOpcode::COPY));
1780     return true;
1781   }
1782   default:
1783     return false;
1784   }
1785 }
1786 
1787 bool AArch64InstructionSelector::select(MachineInstr &I) {
1788   assert(I.getParent() && "Instruction should be in a basic block!");
1789   assert(I.getParent()->getParent() && "Instruction should be in a function!");
1790 
1791   MachineBasicBlock &MBB = *I.getParent();
1792   MachineFunction &MF = *MBB.getParent();
1793   MachineRegisterInfo &MRI = MF.getRegInfo();
1794 
1795   const AArch64Subtarget *Subtarget =
1796       &static_cast<const AArch64Subtarget &>(MF.getSubtarget());
1797   if (Subtarget->requiresStrictAlign()) {
1798     // We don't support this feature yet.
1799     LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n");
1800     return false;
1801   }
1802 
1803   unsigned Opcode = I.getOpcode();
1804   // G_PHI requires same handling as PHI
1805   if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) {
1806     // Certain non-generic instructions also need some special handling.
1807 
1808     if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
1809       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1810 
1811     if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) {
1812       const Register DefReg = I.getOperand(0).getReg();
1813       const LLT DefTy = MRI.getType(DefReg);
1814 
1815       const RegClassOrRegBank &RegClassOrBank =
1816         MRI.getRegClassOrRegBank(DefReg);
1817 
1818       const TargetRegisterClass *DefRC
1819         = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
1820       if (!DefRC) {
1821         if (!DefTy.isValid()) {
1822           LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
1823           return false;
1824         }
1825         const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
1826         DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
1827         if (!DefRC) {
1828           LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
1829           return false;
1830         }
1831       }
1832 
1833       I.setDesc(TII.get(TargetOpcode::PHI));
1834 
1835       return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
1836     }
1837 
1838     if (I.isCopy())
1839       return selectCopy(I, TII, MRI, TRI, RBI);
1840 
1841     return true;
1842   }
1843 
1844 
1845   if (I.getNumOperands() != I.getNumExplicitOperands()) {
1846     LLVM_DEBUG(
1847         dbgs() << "Generic instruction has unexpected implicit operands\n");
1848     return false;
1849   }
1850 
1851   // Try to do some lowering before we start instruction selecting. These
1852   // lowerings are purely transformations on the input G_MIR and so selection
1853   // must continue after any modification of the instruction.
1854   if (preISelLower(I)) {
1855     Opcode = I.getOpcode(); // The opcode may have been modified, refresh it.
1856   }
1857 
1858   // There may be patterns where the importer can't deal with them optimally,
1859   // but does select it to a suboptimal sequence so our custom C++ selection
1860   // code later never has a chance to work on it. Therefore, we have an early
1861   // selection attempt here to give priority to certain selection routines
1862   // over the imported ones.
1863   if (earlySelect(I))
1864     return true;
1865 
1866   if (selectImpl(I, *CoverageInfo))
1867     return true;
1868 
1869   LLT Ty =
1870       I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
1871 
1872   MachineIRBuilder MIB(I);
1873 
1874   switch (Opcode) {
1875   case TargetOpcode::G_BRCOND: {
1876     if (Ty.getSizeInBits() > 32) {
1877       // We shouldn't need this on AArch64, but it would be implemented as an
1878       // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
1879       // bit being tested is < 32.
1880       LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty
1881                         << ", expected at most 32-bits");
1882       return false;
1883     }
1884 
1885     const Register CondReg = I.getOperand(0).getReg();
1886     MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1887 
1888     // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z
1889     // instructions will not be produced, as they are conditional branch
1890     // instructions that do not set flags.
1891     if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI))
1892       return true;
1893 
1894     if (ProduceNonFlagSettingCondBr) {
1895       auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
1896                      .addUse(CondReg)
1897                      .addImm(/*bit offset=*/0)
1898                      .addMBB(DestMBB);
1899 
1900       I.eraseFromParent();
1901       return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
1902     } else {
1903       auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
1904                      .addDef(AArch64::WZR)
1905                      .addUse(CondReg)
1906                      .addImm(1);
1907       constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI);
1908       auto Bcc =
1909           BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc))
1910               .addImm(AArch64CC::EQ)
1911               .addMBB(DestMBB);
1912 
1913       I.eraseFromParent();
1914       return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI);
1915     }
1916   }
1917 
1918   case TargetOpcode::G_BRINDIRECT: {
1919     I.setDesc(TII.get(AArch64::BR));
1920     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1921   }
1922 
1923   case TargetOpcode::G_BRJT:
1924     return selectBrJT(I, MRI);
1925 
1926   case AArch64::G_ADD_LOW: {
1927     // This op may have been separated from it's ADRP companion by the localizer
1928     // or some other code motion pass. Given that many CPUs will try to
1929     // macro fuse these operations anyway, select this into a MOVaddr pseudo
1930     // which will later be expanded into an ADRP+ADD pair after scheduling.
1931     MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg());
1932     if (BaseMI->getOpcode() != AArch64::ADRP) {
1933       I.setDesc(TII.get(AArch64::ADDXri));
1934       I.addOperand(MachineOperand::CreateImm(0));
1935       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1936     }
1937     assert(TM.getCodeModel() == CodeModel::Small &&
1938            "Expected small code model");
1939     MachineIRBuilder MIB(I);
1940     auto Op1 = BaseMI->getOperand(1);
1941     auto Op2 = I.getOperand(2);
1942     auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {})
1943                        .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(),
1944                                          Op1.getTargetFlags())
1945                        .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(),
1946                                          Op2.getTargetFlags());
1947     I.eraseFromParent();
1948     return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI);
1949   }
1950 
1951   case TargetOpcode::G_BSWAP: {
1952     // Handle vector types for G_BSWAP directly.
1953     Register DstReg = I.getOperand(0).getReg();
1954     LLT DstTy = MRI.getType(DstReg);
1955 
1956     // We should only get vector types here; everything else is handled by the
1957     // importer right now.
1958     if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) {
1959       LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n");
1960       return false;
1961     }
1962 
1963     // Only handle 4 and 2 element vectors for now.
1964     // TODO: 16-bit elements.
1965     unsigned NumElts = DstTy.getNumElements();
1966     if (NumElts != 4 && NumElts != 2) {
1967       LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n");
1968       return false;
1969     }
1970 
1971     // Choose the correct opcode for the supported types. Right now, that's
1972     // v2s32, v4s32, and v2s64.
1973     unsigned Opc = 0;
1974     unsigned EltSize = DstTy.getElementType().getSizeInBits();
1975     if (EltSize == 32)
1976       Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8
1977                                           : AArch64::REV32v16i8;
1978     else if (EltSize == 64)
1979       Opc = AArch64::REV64v16i8;
1980 
1981     // We should always get something by the time we get here...
1982     assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?");
1983 
1984     I.setDesc(TII.get(Opc));
1985     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1986   }
1987 
1988   case TargetOpcode::G_FCONSTANT:
1989   case TargetOpcode::G_CONSTANT: {
1990     const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
1991 
1992     const LLT s8 = LLT::scalar(8);
1993     const LLT s16 = LLT::scalar(16);
1994     const LLT s32 = LLT::scalar(32);
1995     const LLT s64 = LLT::scalar(64);
1996     const LLT p0 = LLT::pointer(0, 64);
1997 
1998     const Register DefReg = I.getOperand(0).getReg();
1999     const LLT DefTy = MRI.getType(DefReg);
2000     const unsigned DefSize = DefTy.getSizeInBits();
2001     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2002 
2003     // FIXME: Redundant check, but even less readable when factored out.
2004     if (isFP) {
2005       if (Ty != s32 && Ty != s64) {
2006         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2007                           << " constant, expected: " << s32 << " or " << s64
2008                           << '\n');
2009         return false;
2010       }
2011 
2012       if (RB.getID() != AArch64::FPRRegBankID) {
2013         LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty
2014                           << " constant on bank: " << RB
2015                           << ", expected: FPR\n");
2016         return false;
2017       }
2018 
2019       // The case when we have 0.0 is covered by tablegen. Reject it here so we
2020       // can be sure tablegen works correctly and isn't rescued by this code.
2021       if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0))
2022         return false;
2023     } else {
2024       // s32 and s64 are covered by tablegen.
2025       if (Ty != p0 && Ty != s8 && Ty != s16) {
2026         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2027                           << " constant, expected: " << s32 << ", " << s64
2028                           << ", or " << p0 << '\n');
2029         return false;
2030       }
2031 
2032       if (RB.getID() != AArch64::GPRRegBankID) {
2033         LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty
2034                           << " constant on bank: " << RB
2035                           << ", expected: GPR\n");
2036         return false;
2037       }
2038     }
2039 
2040     // We allow G_CONSTANT of types < 32b.
2041     const unsigned MovOpc =
2042         DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm;
2043 
2044     if (isFP) {
2045       // Either emit a FMOV, or emit a copy to emit a normal mov.
2046       const TargetRegisterClass &GPRRC =
2047           DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
2048       const TargetRegisterClass &FPRRC =
2049           DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
2050 
2051       // Can we use a FMOV instruction to represent the immediate?
2052       if (emitFMovForFConstant(I, MRI))
2053         return true;
2054 
2055       // For 64b values, emit a constant pool load instead.
2056       if (DefSize == 64) {
2057         auto *FPImm = I.getOperand(1).getFPImm();
2058         MachineIRBuilder MIB(I);
2059         auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB);
2060         if (!LoadMI) {
2061           LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n");
2062           return false;
2063         }
2064         MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()});
2065         I.eraseFromParent();
2066         return RBI.constrainGenericRegister(DefReg, FPRRC, MRI);
2067       }
2068 
2069       // Nope. Emit a copy and use a normal mov instead.
2070       const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC);
2071       MachineOperand &RegOp = I.getOperand(0);
2072       RegOp.setReg(DefGPRReg);
2073       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2074       MIB.buildCopy({DefReg}, {DefGPRReg});
2075 
2076       if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
2077         LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
2078         return false;
2079       }
2080 
2081       MachineOperand &ImmOp = I.getOperand(1);
2082       // FIXME: Is going through int64_t always correct?
2083       ImmOp.ChangeToImmediate(
2084           ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
2085     } else if (I.getOperand(1).isCImm()) {
2086       uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
2087       I.getOperand(1).ChangeToImmediate(Val);
2088     } else if (I.getOperand(1).isImm()) {
2089       uint64_t Val = I.getOperand(1).getImm();
2090       I.getOperand(1).ChangeToImmediate(Val);
2091     }
2092 
2093     I.setDesc(TII.get(MovOpc));
2094     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2095     return true;
2096   }
2097   case TargetOpcode::G_EXTRACT: {
2098     Register DstReg = I.getOperand(0).getReg();
2099     Register SrcReg = I.getOperand(1).getReg();
2100     LLT SrcTy = MRI.getType(SrcReg);
2101     LLT DstTy = MRI.getType(DstReg);
2102     (void)DstTy;
2103     unsigned SrcSize = SrcTy.getSizeInBits();
2104 
2105     if (SrcTy.getSizeInBits() > 64) {
2106       // This should be an extract of an s128, which is like a vector extract.
2107       if (SrcTy.getSizeInBits() != 128)
2108         return false;
2109       // Only support extracting 64 bits from an s128 at the moment.
2110       if (DstTy.getSizeInBits() != 64)
2111         return false;
2112 
2113       const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2114       const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2115       // Check we have the right regbank always.
2116       assert(SrcRB.getID() == AArch64::FPRRegBankID &&
2117              DstRB.getID() == AArch64::FPRRegBankID &&
2118              "Wrong extract regbank!");
2119       (void)SrcRB;
2120 
2121       // Emit the same code as a vector extract.
2122       // Offset must be a multiple of 64.
2123       unsigned Offset = I.getOperand(2).getImm();
2124       if (Offset % 64 != 0)
2125         return false;
2126       unsigned LaneIdx = Offset / 64;
2127       MachineIRBuilder MIB(I);
2128       MachineInstr *Extract = emitExtractVectorElt(
2129           DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB);
2130       if (!Extract)
2131         return false;
2132       I.eraseFromParent();
2133       return true;
2134     }
2135 
2136     I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
2137     MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
2138                                       Ty.getSizeInBits() - 1);
2139 
2140     if (SrcSize < 64) {
2141       assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
2142              "unexpected G_EXTRACT types");
2143       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2144     }
2145 
2146     DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2147     MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2148     MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
2149         .addReg(DstReg, 0, AArch64::sub_32);
2150     RBI.constrainGenericRegister(I.getOperand(0).getReg(),
2151                                  AArch64::GPR32RegClass, MRI);
2152     I.getOperand(0).setReg(DstReg);
2153 
2154     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2155   }
2156 
2157   case TargetOpcode::G_INSERT: {
2158     LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
2159     LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2160     unsigned DstSize = DstTy.getSizeInBits();
2161     // Larger inserts are vectors, same-size ones should be something else by
2162     // now (split up or turned into COPYs).
2163     if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
2164       return false;
2165 
2166     I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
2167     unsigned LSB = I.getOperand(3).getImm();
2168     unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
2169     I.getOperand(3).setImm((DstSize - LSB) % DstSize);
2170     MachineInstrBuilder(MF, I).addImm(Width - 1);
2171 
2172     if (DstSize < 64) {
2173       assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
2174              "unexpected G_INSERT types");
2175       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2176     }
2177 
2178     Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
2179     BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
2180             TII.get(AArch64::SUBREG_TO_REG))
2181         .addDef(SrcReg)
2182         .addImm(0)
2183         .addUse(I.getOperand(2).getReg())
2184         .addImm(AArch64::sub_32);
2185     RBI.constrainGenericRegister(I.getOperand(2).getReg(),
2186                                  AArch64::GPR32RegClass, MRI);
2187     I.getOperand(2).setReg(SrcReg);
2188 
2189     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2190   }
2191   case TargetOpcode::G_FRAME_INDEX: {
2192     // allocas and G_FRAME_INDEX are only supported in addrspace(0).
2193     if (Ty != LLT::pointer(0, 64)) {
2194       LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
2195                         << ", expected: " << LLT::pointer(0, 64) << '\n');
2196       return false;
2197     }
2198     I.setDesc(TII.get(AArch64::ADDXri));
2199 
2200     // MOs for a #0 shifted immediate.
2201     I.addOperand(MachineOperand::CreateImm(0));
2202     I.addOperand(MachineOperand::CreateImm(0));
2203 
2204     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2205   }
2206 
2207   case TargetOpcode::G_GLOBAL_VALUE: {
2208     auto GV = I.getOperand(1).getGlobal();
2209     if (GV->isThreadLocal())
2210       return selectTLSGlobalValue(I, MRI);
2211 
2212     unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM);
2213     if (OpFlags & AArch64II::MO_GOT) {
2214       I.setDesc(TII.get(AArch64::LOADgot));
2215       I.getOperand(1).setTargetFlags(OpFlags);
2216     } else if (TM.getCodeModel() == CodeModel::Large) {
2217       // Materialize the global using movz/movk instructions.
2218       materializeLargeCMVal(I, GV, OpFlags);
2219       I.eraseFromParent();
2220       return true;
2221     } else if (TM.getCodeModel() == CodeModel::Tiny) {
2222       I.setDesc(TII.get(AArch64::ADR));
2223       I.getOperand(1).setTargetFlags(OpFlags);
2224     } else {
2225       I.setDesc(TII.get(AArch64::MOVaddr));
2226       I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
2227       MachineInstrBuilder MIB(MF, I);
2228       MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
2229                            OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
2230     }
2231     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2232   }
2233 
2234   case TargetOpcode::G_ZEXTLOAD:
2235   case TargetOpcode::G_LOAD:
2236   case TargetOpcode::G_STORE: {
2237     bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD;
2238     MachineIRBuilder MIB(I);
2239 
2240     LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
2241 
2242     if (PtrTy != LLT::pointer(0, 64)) {
2243       LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
2244                         << ", expected: " << LLT::pointer(0, 64) << '\n');
2245       return false;
2246     }
2247 
2248     auto &MemOp = **I.memoperands_begin();
2249     if (MemOp.isAtomic()) {
2250       // For now we just support s8 acquire loads to be able to compile stack
2251       // protector code.
2252       if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
2253           MemOp.getSize() == 1) {
2254         I.setDesc(TII.get(AArch64::LDARB));
2255         return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2256       }
2257       LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
2258       return false;
2259     }
2260     unsigned MemSizeInBits = MemOp.getSize() * 8;
2261 
2262     const Register PtrReg = I.getOperand(1).getReg();
2263 #ifndef NDEBUG
2264     const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
2265     // Sanity-check the pointer register.
2266     assert(PtrRB.getID() == AArch64::GPRRegBankID &&
2267            "Load/Store pointer operand isn't a GPR");
2268     assert(MRI.getType(PtrReg).isPointer() &&
2269            "Load/Store pointer operand isn't a pointer");
2270 #endif
2271 
2272     const Register ValReg = I.getOperand(0).getReg();
2273     const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
2274 
2275     const unsigned NewOpc =
2276         selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
2277     if (NewOpc == I.getOpcode())
2278       return false;
2279 
2280     I.setDesc(TII.get(NewOpc));
2281 
2282     uint64_t Offset = 0;
2283     auto *PtrMI = MRI.getVRegDef(PtrReg);
2284 
2285     // Try to fold a GEP into our unsigned immediate addressing mode.
2286     if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
2287       if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
2288         int64_t Imm = *COff;
2289         const unsigned Size = MemSizeInBits / 8;
2290         const unsigned Scale = Log2_32(Size);
2291         if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
2292           Register Ptr2Reg = PtrMI->getOperand(1).getReg();
2293           I.getOperand(1).setReg(Ptr2Reg);
2294           PtrMI = MRI.getVRegDef(Ptr2Reg);
2295           Offset = Imm / Size;
2296         }
2297       }
2298     }
2299 
2300     // If we haven't folded anything into our addressing mode yet, try to fold
2301     // a frame index into the base+offset.
2302     if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
2303       I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
2304 
2305     I.addOperand(MachineOperand::CreateImm(Offset));
2306 
2307     // If we're storing a 0, use WZR/XZR.
2308     if (auto CVal = getConstantVRegVal(ValReg, MRI)) {
2309       if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) {
2310         if (I.getOpcode() == AArch64::STRWui)
2311           I.getOperand(0).setReg(AArch64::WZR);
2312         else if (I.getOpcode() == AArch64::STRXui)
2313           I.getOperand(0).setReg(AArch64::XZR);
2314       }
2315     }
2316 
2317     if (IsZExtLoad) {
2318       // The zextload from a smaller type to i32 should be handled by the importer.
2319       if (MRI.getType(ValReg).getSizeInBits() != 64)
2320         return false;
2321       // If we have a ZEXTLOAD then change the load's type to be a narrower reg
2322       //and zero_extend with SUBREG_TO_REG.
2323       Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2324       Register DstReg = I.getOperand(0).getReg();
2325       I.getOperand(0).setReg(LdReg);
2326 
2327       MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
2328       MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
2329           .addImm(0)
2330           .addUse(LdReg)
2331           .addImm(AArch64::sub_32);
2332       constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2333       return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
2334                                           MRI);
2335     }
2336     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2337   }
2338 
2339   case TargetOpcode::G_SMULH:
2340   case TargetOpcode::G_UMULH: {
2341     // Reject the various things we don't support yet.
2342     if (unsupportedBinOp(I, RBI, MRI, TRI))
2343       return false;
2344 
2345     const Register DefReg = I.getOperand(0).getReg();
2346     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2347 
2348     if (RB.getID() != AArch64::GPRRegBankID) {
2349       LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n");
2350       return false;
2351     }
2352 
2353     if (Ty != LLT::scalar(64)) {
2354       LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty
2355                         << ", expected: " << LLT::scalar(64) << '\n');
2356       return false;
2357     }
2358 
2359     unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr
2360                                                              : AArch64::UMULHrr;
2361     I.setDesc(TII.get(NewOpc));
2362 
2363     // Now that we selected an opcode, we need to constrain the register
2364     // operands to use appropriate classes.
2365     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2366   }
2367   case TargetOpcode::G_FADD:
2368   case TargetOpcode::G_FSUB:
2369   case TargetOpcode::G_FMUL:
2370   case TargetOpcode::G_FDIV:
2371 
2372   case TargetOpcode::G_ASHR:
2373     if (MRI.getType(I.getOperand(0).getReg()).isVector())
2374       return selectVectorASHR(I, MRI);
2375     LLVM_FALLTHROUGH;
2376   case TargetOpcode::G_SHL:
2377     if (Opcode == TargetOpcode::G_SHL &&
2378         MRI.getType(I.getOperand(0).getReg()).isVector())
2379       return selectVectorSHL(I, MRI);
2380     LLVM_FALLTHROUGH;
2381   case TargetOpcode::G_OR:
2382   case TargetOpcode::G_LSHR: {
2383     // Reject the various things we don't support yet.
2384     if (unsupportedBinOp(I, RBI, MRI, TRI))
2385       return false;
2386 
2387     const unsigned OpSize = Ty.getSizeInBits();
2388 
2389     const Register DefReg = I.getOperand(0).getReg();
2390     const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
2391 
2392     const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
2393     if (NewOpc == I.getOpcode())
2394       return false;
2395 
2396     I.setDesc(TII.get(NewOpc));
2397     // FIXME: Should the type be always reset in setDesc?
2398 
2399     // Now that we selected an opcode, we need to constrain the register
2400     // operands to use appropriate classes.
2401     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2402   }
2403 
2404   case TargetOpcode::G_PTR_ADD: {
2405     MachineIRBuilder MIRBuilder(I);
2406     emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2),
2407             MIRBuilder);
2408     I.eraseFromParent();
2409     return true;
2410   }
2411   case TargetOpcode::G_UADDO: {
2412     // TODO: Support other types.
2413     unsigned OpSize = Ty.getSizeInBits();
2414     if (OpSize != 32 && OpSize != 64) {
2415       LLVM_DEBUG(
2416           dbgs()
2417           << "G_UADDO currently only supported for 32 and 64 b types.\n");
2418       return false;
2419     }
2420 
2421     // TODO: Support vectors.
2422     if (Ty.isVector()) {
2423       LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n");
2424       return false;
2425     }
2426 
2427     // Add and set the set condition flag.
2428     unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr;
2429     MachineIRBuilder MIRBuilder(I);
2430     auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)},
2431                                         {I.getOperand(2), I.getOperand(3)});
2432     constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI);
2433 
2434     // Now, put the overflow result in the register given by the first operand
2435     // to the G_UADDO. CSINC increments the result when the predicate is false,
2436     // so to get the increment when it's true, we need to use the inverse. In
2437     // this case, we want to increment when carry is set.
2438     auto CsetMI = MIRBuilder
2439                       .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()},
2440                                   {Register(AArch64::WZR), Register(AArch64::WZR)})
2441                       .addImm(getInvertedCondCode(AArch64CC::HS));
2442     constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI);
2443     I.eraseFromParent();
2444     return true;
2445   }
2446 
2447   case TargetOpcode::G_PTRMASK: {
2448     Register MaskReg = I.getOperand(2).getReg();
2449     Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI);
2450     // TODO: Implement arbitrary cases
2451     if (!MaskVal || !isShiftedMask_64(*MaskVal))
2452       return false;
2453 
2454     uint64_t Mask = *MaskVal;
2455     I.setDesc(TII.get(AArch64::ANDXri));
2456     I.getOperand(2).ChangeToImmediate(
2457         AArch64_AM::encodeLogicalImmediate(Mask, 64));
2458 
2459     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2460   }
2461   case TargetOpcode::G_PTRTOINT:
2462   case TargetOpcode::G_TRUNC: {
2463     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2464     const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
2465 
2466     const Register DstReg = I.getOperand(0).getReg();
2467     const Register SrcReg = I.getOperand(1).getReg();
2468 
2469     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2470     const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
2471 
2472     if (DstRB.getID() != SrcRB.getID()) {
2473       LLVM_DEBUG(
2474           dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n");
2475       return false;
2476     }
2477 
2478     if (DstRB.getID() == AArch64::GPRRegBankID) {
2479       const TargetRegisterClass *DstRC =
2480           getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2481       if (!DstRC)
2482         return false;
2483 
2484       const TargetRegisterClass *SrcRC =
2485           getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
2486       if (!SrcRC)
2487         return false;
2488 
2489       if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
2490           !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
2491         LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n");
2492         return false;
2493       }
2494 
2495       if (DstRC == SrcRC) {
2496         // Nothing to be done
2497       } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) &&
2498                  SrcTy == LLT::scalar(64)) {
2499         llvm_unreachable("TableGen can import this case");
2500         return false;
2501       } else if (DstRC == &AArch64::GPR32RegClass &&
2502                  SrcRC == &AArch64::GPR64RegClass) {
2503         I.getOperand(1).setSubReg(AArch64::sub_32);
2504       } else {
2505         LLVM_DEBUG(
2506             dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n");
2507         return false;
2508       }
2509 
2510       I.setDesc(TII.get(TargetOpcode::COPY));
2511       return true;
2512     } else if (DstRB.getID() == AArch64::FPRRegBankID) {
2513       if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
2514         I.setDesc(TII.get(AArch64::XTNv4i16));
2515         constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2516         return true;
2517       }
2518 
2519       if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) {
2520         MachineIRBuilder MIB(I);
2521         MachineInstr *Extract = emitExtractVectorElt(
2522             DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB);
2523         if (!Extract)
2524           return false;
2525         I.eraseFromParent();
2526         return true;
2527       }
2528 
2529       // We might have a vector G_PTRTOINT, in which case just emit a COPY.
2530       if (Opcode == TargetOpcode::G_PTRTOINT) {
2531         assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector");
2532         I.setDesc(TII.get(TargetOpcode::COPY));
2533         return true;
2534       }
2535     }
2536 
2537     return false;
2538   }
2539 
2540   case TargetOpcode::G_ANYEXT: {
2541     const Register DstReg = I.getOperand(0).getReg();
2542     const Register SrcReg = I.getOperand(1).getReg();
2543 
2544     const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
2545     if (RBDst.getID() != AArch64::GPRRegBankID) {
2546       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst
2547                         << ", expected: GPR\n");
2548       return false;
2549     }
2550 
2551     const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
2552     if (RBSrc.getID() != AArch64::GPRRegBankID) {
2553       LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc
2554                         << ", expected: GPR\n");
2555       return false;
2556     }
2557 
2558     const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
2559 
2560     if (DstSize == 0) {
2561       LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
2562       return false;
2563     }
2564 
2565     if (DstSize != 64 && DstSize > 32) {
2566       LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
2567                         << ", expected: 32 or 64\n");
2568       return false;
2569     }
2570     // At this point G_ANYEXT is just like a plain COPY, but we need
2571     // to explicitly form the 64-bit value if any.
2572     if (DstSize > 32) {
2573       Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
2574       BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
2575           .addDef(ExtSrc)
2576           .addImm(0)
2577           .addUse(SrcReg)
2578           .addImm(AArch64::sub_32);
2579       I.getOperand(1).setReg(ExtSrc);
2580     }
2581     return selectCopy(I, TII, MRI, TRI, RBI);
2582   }
2583 
2584   case TargetOpcode::G_ZEXT:
2585   case TargetOpcode::G_SEXT_INREG:
2586   case TargetOpcode::G_SEXT: {
2587     unsigned Opcode = I.getOpcode();
2588     const bool IsSigned = Opcode != TargetOpcode::G_ZEXT;
2589     const Register DefReg = I.getOperand(0).getReg();
2590     Register SrcReg = I.getOperand(1).getReg();
2591     const LLT DstTy = MRI.getType(DefReg);
2592     const LLT SrcTy = MRI.getType(SrcReg);
2593     unsigned DstSize = DstTy.getSizeInBits();
2594     unsigned SrcSize = SrcTy.getSizeInBits();
2595 
2596     // SEXT_INREG has the same src reg size as dst, the size of the value to be
2597     // extended is encoded in the imm.
2598     if (Opcode == TargetOpcode::G_SEXT_INREG)
2599       SrcSize = I.getOperand(2).getImm();
2600 
2601     if (DstTy.isVector())
2602       return false; // Should be handled by imported patterns.
2603 
2604     assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() ==
2605                AArch64::GPRRegBankID &&
2606            "Unexpected ext regbank");
2607 
2608     MachineIRBuilder MIB(I);
2609     MachineInstr *ExtI;
2610 
2611     // First check if we're extending the result of a load which has a dest type
2612     // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest
2613     // GPR register on AArch64 and all loads which are smaller automatically
2614     // zero-extend the upper bits. E.g.
2615     // %v(s8) = G_LOAD %p, :: (load 1)
2616     // %v2(s32) = G_ZEXT %v(s8)
2617     if (!IsSigned) {
2618       auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
2619       bool IsGPR =
2620           RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID;
2621       if (LoadMI && IsGPR) {
2622         const MachineMemOperand *MemOp = *LoadMI->memoperands_begin();
2623         unsigned BytesLoaded = MemOp->getSize();
2624         if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded)
2625           return selectCopy(I, TII, MRI, TRI, RBI);
2626       }
2627 
2628       // If we are zero extending from 32 bits to 64 bits, it's possible that
2629       // the instruction implicitly does the zero extend for us. In that case,
2630       // we can just emit a SUBREG_TO_REG.
2631       if (IsGPR && SrcSize == 32 && DstSize == 64) {
2632         // Unlike with the G_LOAD case, we don't want to look through copies
2633         // here.
2634         MachineInstr *Def = MRI.getVRegDef(SrcReg);
2635         if (Def && isDef32(*Def)) {
2636           MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {})
2637               .addImm(0)
2638               .addUse(SrcReg)
2639               .addImm(AArch64::sub_32);
2640 
2641           if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass,
2642                                             MRI)) {
2643             LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n");
2644             return false;
2645           }
2646 
2647           if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2648                                             MRI)) {
2649             LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n");
2650             return false;
2651           }
2652 
2653           I.eraseFromParent();
2654           return true;
2655         }
2656       }
2657     }
2658 
2659     if (DstSize == 64) {
2660       if (Opcode != TargetOpcode::G_SEXT_INREG) {
2661         // FIXME: Can we avoid manually doing this?
2662         if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass,
2663                                           MRI)) {
2664           LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
2665                             << " operand\n");
2666           return false;
2667         }
2668         SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG,
2669                                 {&AArch64::GPR64RegClass}, {})
2670                      .addImm(0)
2671                      .addUse(SrcReg)
2672                      .addImm(AArch64::sub_32)
2673                      .getReg(0);
2674       }
2675 
2676       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri,
2677                              {DefReg}, {SrcReg})
2678                   .addImm(0)
2679                   .addImm(SrcSize - 1);
2680     } else if (DstSize <= 32) {
2681       ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri,
2682                              {DefReg}, {SrcReg})
2683                   .addImm(0)
2684                   .addImm(SrcSize - 1);
2685     } else {
2686       return false;
2687     }
2688 
2689     constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
2690     I.eraseFromParent();
2691     return true;
2692   }
2693 
2694   case TargetOpcode::G_SITOFP:
2695   case TargetOpcode::G_UITOFP:
2696   case TargetOpcode::G_FPTOSI:
2697   case TargetOpcode::G_FPTOUI: {
2698     const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
2699               SrcTy = MRI.getType(I.getOperand(1).getReg());
2700     const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
2701     if (NewOpc == Opcode)
2702       return false;
2703 
2704     I.setDesc(TII.get(NewOpc));
2705     constrainSelectedInstRegOperands(I, TII, TRI, RBI);
2706 
2707     return true;
2708   }
2709 
2710   case TargetOpcode::G_FREEZE:
2711     return selectCopy(I, TII, MRI, TRI, RBI);
2712 
2713   case TargetOpcode::G_INTTOPTR:
2714     // The importer is currently unable to import pointer types since they
2715     // didn't exist in SelectionDAG.
2716     return selectCopy(I, TII, MRI, TRI, RBI);
2717 
2718   case TargetOpcode::G_BITCAST:
2719     // Imported SelectionDAG rules can handle every bitcast except those that
2720     // bitcast from a type to the same type. Ideally, these shouldn't occur
2721     // but we might not run an optimizer that deletes them. The other exception
2722     // is bitcasts involving pointer types, as SelectionDAG has no knowledge
2723     // of them.
2724     return selectCopy(I, TII, MRI, TRI, RBI);
2725 
2726   case TargetOpcode::G_SELECT: {
2727     if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
2728       LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
2729                         << ", expected: " << LLT::scalar(1) << '\n');
2730       return false;
2731     }
2732 
2733     const Register CondReg = I.getOperand(1).getReg();
2734     const Register TReg = I.getOperand(2).getReg();
2735     const Register FReg = I.getOperand(3).getReg();
2736 
2737     if (tryOptSelect(I))
2738       return true;
2739 
2740     Register CSelOpc = selectSelectOpc(I, MRI, RBI);
2741     MachineInstr &TstMI =
2742         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
2743              .addDef(AArch64::WZR)
2744              .addUse(CondReg)
2745              .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2746 
2747     MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
2748                                 .addDef(I.getOperand(0).getReg())
2749                                 .addUse(TReg)
2750                                 .addUse(FReg)
2751                                 .addImm(AArch64CC::NE);
2752 
2753     constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
2754     constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
2755 
2756     I.eraseFromParent();
2757     return true;
2758   }
2759   case TargetOpcode::G_ICMP: {
2760     if (Ty.isVector())
2761       return selectVectorICmp(I, MRI);
2762 
2763     if (Ty != LLT::scalar(32)) {
2764       LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty
2765                         << ", expected: " << LLT::scalar(32) << '\n');
2766       return false;
2767     }
2768 
2769     MachineIRBuilder MIRBuilder(I);
2770     MachineInstr *Cmp;
2771     CmpInst::Predicate Pred;
2772     std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3),
2773                                              I.getOperand(1), MIRBuilder);
2774     if (!Cmp)
2775       return false;
2776     emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder);
2777     I.eraseFromParent();
2778     return true;
2779   }
2780 
2781   case TargetOpcode::G_FCMP: {
2782     if (Ty != LLT::scalar(32)) {
2783       LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty
2784                         << ", expected: " << LLT::scalar(32) << '\n');
2785       return false;
2786     }
2787 
2788     unsigned CmpOpc = selectFCMPOpc(I, MRI);
2789     if (!CmpOpc)
2790       return false;
2791 
2792     // FIXME: regbank
2793 
2794     AArch64CC::CondCode CC1, CC2;
2795     changeFCMPPredToAArch64CC(
2796         (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
2797 
2798     // Partially build the compare. Decide if we need to add a use for the
2799     // third operand based off whether or not we're comparing against 0.0.
2800     auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
2801                      .addUse(I.getOperand(2).getReg());
2802 
2803     // If we don't have an immediate compare, then we need to add a use of the
2804     // register which wasn't used for the immediate.
2805     // Note that the immediate will always be the last operand.
2806     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
2807       CmpMI = CmpMI.addUse(I.getOperand(3).getReg());
2808 
2809     const Register DefReg = I.getOperand(0).getReg();
2810     Register Def1Reg = DefReg;
2811     if (CC2 != AArch64CC::AL)
2812       Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2813 
2814     MachineInstr &CSetMI =
2815         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2816              .addDef(Def1Reg)
2817              .addUse(AArch64::WZR)
2818              .addUse(AArch64::WZR)
2819              .addImm(getInvertedCondCode(CC1));
2820 
2821     if (CC2 != AArch64CC::AL) {
2822       Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
2823       MachineInstr &CSet2MI =
2824           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
2825                .addDef(Def2Reg)
2826                .addUse(AArch64::WZR)
2827                .addUse(AArch64::WZR)
2828                .addImm(getInvertedCondCode(CC2));
2829       MachineInstr &OrMI =
2830           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
2831                .addDef(DefReg)
2832                .addUse(Def1Reg)
2833                .addUse(Def2Reg);
2834       constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
2835       constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
2836     }
2837     constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
2838     constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
2839 
2840     I.eraseFromParent();
2841     return true;
2842   }
2843   case TargetOpcode::G_VASTART:
2844     return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI)
2845                                 : selectVaStartAAPCS(I, MF, MRI);
2846   case TargetOpcode::G_INTRINSIC:
2847     return selectIntrinsic(I, MRI);
2848   case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
2849     return selectIntrinsicWithSideEffects(I, MRI);
2850   case TargetOpcode::G_IMPLICIT_DEF: {
2851     I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
2852     const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
2853     const Register DstReg = I.getOperand(0).getReg();
2854     const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
2855     const TargetRegisterClass *DstRC =
2856         getRegClassForTypeOnBank(DstTy, DstRB, RBI);
2857     RBI.constrainGenericRegister(DstReg, *DstRC, MRI);
2858     return true;
2859   }
2860   case TargetOpcode::G_BLOCK_ADDR: {
2861     if (TM.getCodeModel() == CodeModel::Large) {
2862       materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0);
2863       I.eraseFromParent();
2864       return true;
2865     } else {
2866       I.setDesc(TII.get(AArch64::MOVaddrBA));
2867       auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA),
2868                            I.getOperand(0).getReg())
2869                        .addBlockAddress(I.getOperand(1).getBlockAddress(),
2870                                         /* Offset */ 0, AArch64II::MO_PAGE)
2871                        .addBlockAddress(
2872                            I.getOperand(1).getBlockAddress(), /* Offset */ 0,
2873                            AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2874       I.eraseFromParent();
2875       return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2876     }
2877   }
2878   case TargetOpcode::G_INTRINSIC_TRUNC:
2879     return selectIntrinsicTrunc(I, MRI);
2880   case TargetOpcode::G_INTRINSIC_ROUND:
2881     return selectIntrinsicRound(I, MRI);
2882   case TargetOpcode::G_BUILD_VECTOR:
2883     return selectBuildVector(I, MRI);
2884   case TargetOpcode::G_MERGE_VALUES:
2885     return selectMergeValues(I, MRI);
2886   case TargetOpcode::G_UNMERGE_VALUES:
2887     return selectUnmergeValues(I, MRI);
2888   case TargetOpcode::G_SHUFFLE_VECTOR:
2889     return selectShuffleVector(I, MRI);
2890   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2891     return selectExtractElt(I, MRI);
2892   case TargetOpcode::G_INSERT_VECTOR_ELT:
2893     return selectInsertElt(I, MRI);
2894   case TargetOpcode::G_CONCAT_VECTORS:
2895     return selectConcatVectors(I, MRI);
2896   case TargetOpcode::G_JUMP_TABLE:
2897     return selectJumpTable(I, MRI);
2898   }
2899 
2900   return false;
2901 }
2902 
2903 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I,
2904                                             MachineRegisterInfo &MRI) const {
2905   assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT");
2906   Register JTAddr = I.getOperand(0).getReg();
2907   unsigned JTI = I.getOperand(1).getIndex();
2908   Register Index = I.getOperand(2).getReg();
2909   MachineIRBuilder MIB(I);
2910 
2911   Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
2912   Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
2913   auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32,
2914                                       {TargetReg, ScratchReg}, {JTAddr, Index})
2915                            .addJumpTableIndex(JTI);
2916   // Build the indirect branch.
2917   MIB.buildInstr(AArch64::BR, {}, {TargetReg});
2918   I.eraseFromParent();
2919   return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI);
2920 }
2921 
2922 bool AArch64InstructionSelector::selectJumpTable(
2923     MachineInstr &I, MachineRegisterInfo &MRI) const {
2924   assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table");
2925   assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!");
2926 
2927   Register DstReg = I.getOperand(0).getReg();
2928   unsigned JTI = I.getOperand(1).getIndex();
2929   // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later.
2930   MachineIRBuilder MIB(I);
2931   auto MovMI =
2932     MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {})
2933           .addJumpTableIndex(JTI, AArch64II::MO_PAGE)
2934           .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF);
2935   I.eraseFromParent();
2936   return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI);
2937 }
2938 
2939 bool AArch64InstructionSelector::selectTLSGlobalValue(
2940     MachineInstr &I, MachineRegisterInfo &MRI) const {
2941   if (!STI.isTargetMachO())
2942     return false;
2943   MachineFunction &MF = *I.getParent()->getParent();
2944   MF.getFrameInfo().setAdjustsStack(true);
2945 
2946   const GlobalValue &GV = *I.getOperand(1).getGlobal();
2947   MachineIRBuilder MIB(I);
2948 
2949   MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {})
2950       .addGlobalAddress(&GV, 0, AArch64II::MO_TLS);
2951 
2952   auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass},
2953                              {Register(AArch64::X0)})
2954                   .addImm(0);
2955 
2956   // TLS calls preserve all registers except those that absolutely must be
2957   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
2958   // silly).
2959   MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load})
2960       .addDef(AArch64::X0, RegState::Implicit)
2961       .addRegMask(TRI.getTLSCallPreservedMask());
2962 
2963   MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0));
2964   RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass,
2965                                MRI);
2966   I.eraseFromParent();
2967   return true;
2968 }
2969 
2970 bool AArch64InstructionSelector::selectIntrinsicTrunc(
2971     MachineInstr &I, MachineRegisterInfo &MRI) const {
2972   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
2973 
2974   // Select the correct opcode.
2975   unsigned Opc = 0;
2976   if (!SrcTy.isVector()) {
2977     switch (SrcTy.getSizeInBits()) {
2978     default:
2979     case 16:
2980       Opc = AArch64::FRINTZHr;
2981       break;
2982     case 32:
2983       Opc = AArch64::FRINTZSr;
2984       break;
2985     case 64:
2986       Opc = AArch64::FRINTZDr;
2987       break;
2988     }
2989   } else {
2990     unsigned NumElts = SrcTy.getNumElements();
2991     switch (SrcTy.getElementType().getSizeInBits()) {
2992     default:
2993       break;
2994     case 16:
2995       if (NumElts == 4)
2996         Opc = AArch64::FRINTZv4f16;
2997       else if (NumElts == 8)
2998         Opc = AArch64::FRINTZv8f16;
2999       break;
3000     case 32:
3001       if (NumElts == 2)
3002         Opc = AArch64::FRINTZv2f32;
3003       else if (NumElts == 4)
3004         Opc = AArch64::FRINTZv4f32;
3005       break;
3006     case 64:
3007       if (NumElts == 2)
3008         Opc = AArch64::FRINTZv2f64;
3009       break;
3010     }
3011   }
3012 
3013   if (!Opc) {
3014     // Didn't get an opcode above, bail.
3015     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n");
3016     return false;
3017   }
3018 
3019   // Legalization would have set us up perfectly for this; we just need to
3020   // set the opcode and move on.
3021   I.setDesc(TII.get(Opc));
3022   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3023 }
3024 
3025 bool AArch64InstructionSelector::selectIntrinsicRound(
3026     MachineInstr &I, MachineRegisterInfo &MRI) const {
3027   const LLT SrcTy = MRI.getType(I.getOperand(0).getReg());
3028 
3029   // Select the correct opcode.
3030   unsigned Opc = 0;
3031   if (!SrcTy.isVector()) {
3032     switch (SrcTy.getSizeInBits()) {
3033     default:
3034     case 16:
3035       Opc = AArch64::FRINTAHr;
3036       break;
3037     case 32:
3038       Opc = AArch64::FRINTASr;
3039       break;
3040     case 64:
3041       Opc = AArch64::FRINTADr;
3042       break;
3043     }
3044   } else {
3045     unsigned NumElts = SrcTy.getNumElements();
3046     switch (SrcTy.getElementType().getSizeInBits()) {
3047     default:
3048       break;
3049     case 16:
3050       if (NumElts == 4)
3051         Opc = AArch64::FRINTAv4f16;
3052       else if (NumElts == 8)
3053         Opc = AArch64::FRINTAv8f16;
3054       break;
3055     case 32:
3056       if (NumElts == 2)
3057         Opc = AArch64::FRINTAv2f32;
3058       else if (NumElts == 4)
3059         Opc = AArch64::FRINTAv4f32;
3060       break;
3061     case 64:
3062       if (NumElts == 2)
3063         Opc = AArch64::FRINTAv2f64;
3064       break;
3065     }
3066   }
3067 
3068   if (!Opc) {
3069     // Didn't get an opcode above, bail.
3070     LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n");
3071     return false;
3072   }
3073 
3074   // Legalization would have set us up perfectly for this; we just need to
3075   // set the opcode and move on.
3076   I.setDesc(TII.get(Opc));
3077   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3078 }
3079 
3080 bool AArch64InstructionSelector::selectVectorICmp(
3081     MachineInstr &I, MachineRegisterInfo &MRI) const {
3082   Register DstReg = I.getOperand(0).getReg();
3083   LLT DstTy = MRI.getType(DstReg);
3084   Register SrcReg = I.getOperand(2).getReg();
3085   Register Src2Reg = I.getOperand(3).getReg();
3086   LLT SrcTy = MRI.getType(SrcReg);
3087 
3088   unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
3089   unsigned NumElts = DstTy.getNumElements();
3090 
3091   // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b
3092   // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16
3093   // Third index is cc opcode:
3094   // 0 == eq
3095   // 1 == ugt
3096   // 2 == uge
3097   // 3 == ult
3098   // 4 == ule
3099   // 5 == sgt
3100   // 6 == sge
3101   // 7 == slt
3102   // 8 == sle
3103   // ne is done by negating 'eq' result.
3104 
3105   // This table below assumes that for some comparisons the operands will be
3106   // commuted.
3107   // ult op == commute + ugt op
3108   // ule op == commute + uge op
3109   // slt op == commute + sgt op
3110   // sle op == commute + sge op
3111   unsigned PredIdx = 0;
3112   bool SwapOperands = false;
3113   CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate();
3114   switch (Pred) {
3115   case CmpInst::ICMP_NE:
3116   case CmpInst::ICMP_EQ:
3117     PredIdx = 0;
3118     break;
3119   case CmpInst::ICMP_UGT:
3120     PredIdx = 1;
3121     break;
3122   case CmpInst::ICMP_UGE:
3123     PredIdx = 2;
3124     break;
3125   case CmpInst::ICMP_ULT:
3126     PredIdx = 3;
3127     SwapOperands = true;
3128     break;
3129   case CmpInst::ICMP_ULE:
3130     PredIdx = 4;
3131     SwapOperands = true;
3132     break;
3133   case CmpInst::ICMP_SGT:
3134     PredIdx = 5;
3135     break;
3136   case CmpInst::ICMP_SGE:
3137     PredIdx = 6;
3138     break;
3139   case CmpInst::ICMP_SLT:
3140     PredIdx = 7;
3141     SwapOperands = true;
3142     break;
3143   case CmpInst::ICMP_SLE:
3144     PredIdx = 8;
3145     SwapOperands = true;
3146     break;
3147   default:
3148     llvm_unreachable("Unhandled icmp predicate");
3149     return false;
3150   }
3151 
3152   // This table obviously should be tablegen'd when we have our GISel native
3153   // tablegen selector.
3154 
3155   static const unsigned OpcTable[4][4][9] = {
3156       {
3157           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3158            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3159            0 /* invalid */},
3160           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3161            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3162            0 /* invalid */},
3163           {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8,
3164            AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8,
3165            AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8},
3166           {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8,
3167            AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8,
3168            AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8}
3169       },
3170       {
3171           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3172            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3173            0 /* invalid */},
3174           {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16,
3175            AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16,
3176            AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16},
3177           {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16,
3178            AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16,
3179            AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16},
3180           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3181            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3182            0 /* invalid */}
3183       },
3184       {
3185           {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32,
3186            AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32,
3187            AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32},
3188           {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32,
3189            AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32,
3190            AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32},
3191           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3192            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3193            0 /* invalid */},
3194           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3195            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3196            0 /* invalid */}
3197       },
3198       {
3199           {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64,
3200            AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64,
3201            AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64},
3202           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3203            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3204            0 /* invalid */},
3205           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3206            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3207            0 /* invalid */},
3208           {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3209            0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */,
3210            0 /* invalid */}
3211       },
3212   };
3213   unsigned EltIdx = Log2_32(SrcEltSize / 8);
3214   unsigned NumEltsIdx = Log2_32(NumElts / 2);
3215   unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx];
3216   if (!Opc) {
3217     LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode");
3218     return false;
3219   }
3220 
3221   const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI);
3222   const TargetRegisterClass *SrcRC =
3223       getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true);
3224   if (!SrcRC) {
3225     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3226     return false;
3227   }
3228 
3229   unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0;
3230   if (SrcTy.getSizeInBits() == 128)
3231     NotOpc = NotOpc ? AArch64::NOTv16i8 : 0;
3232 
3233   if (SwapOperands)
3234     std::swap(SrcReg, Src2Reg);
3235 
3236   MachineIRBuilder MIB(I);
3237   auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg});
3238   constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3239 
3240   // Invert if we had a 'ne' cc.
3241   if (NotOpc) {
3242     Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp});
3243     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
3244   } else {
3245     MIB.buildCopy(DstReg, Cmp.getReg(0));
3246   }
3247   RBI.constrainGenericRegister(DstReg, *SrcRC, MRI);
3248   I.eraseFromParent();
3249   return true;
3250 }
3251 
3252 MachineInstr *AArch64InstructionSelector::emitScalarToVector(
3253     unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar,
3254     MachineIRBuilder &MIRBuilder) const {
3255   auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {});
3256 
3257   auto BuildFn = [&](unsigned SubregIndex) {
3258     auto Ins =
3259         MIRBuilder
3260             .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar})
3261             .addImm(SubregIndex);
3262     constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI);
3263     constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI);
3264     return &*Ins;
3265   };
3266 
3267   switch (EltSize) {
3268   case 16:
3269     return BuildFn(AArch64::hsub);
3270   case 32:
3271     return BuildFn(AArch64::ssub);
3272   case 64:
3273     return BuildFn(AArch64::dsub);
3274   default:
3275     return nullptr;
3276   }
3277 }
3278 
3279 bool AArch64InstructionSelector::selectMergeValues(
3280     MachineInstr &I, MachineRegisterInfo &MRI) const {
3281   assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode");
3282   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
3283   const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
3284   assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation");
3285   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
3286 
3287   if (I.getNumOperands() != 3)
3288     return false;
3289 
3290   // Merging 2 s64s into an s128.
3291   if (DstTy == LLT::scalar(128)) {
3292     if (SrcTy.getSizeInBits() != 64)
3293       return false;
3294     MachineIRBuilder MIB(I);
3295     Register DstReg = I.getOperand(0).getReg();
3296     Register Src1Reg = I.getOperand(1).getReg();
3297     Register Src2Reg = I.getOperand(2).getReg();
3298     auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {});
3299     MachineInstr *InsMI =
3300         emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB);
3301     if (!InsMI)
3302       return false;
3303     MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(),
3304                                           Src2Reg, /* LaneIdx */ 1, RB, MIB);
3305     if (!Ins2MI)
3306       return false;
3307     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
3308     constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI);
3309     I.eraseFromParent();
3310     return true;
3311   }
3312 
3313   if (RB.getID() != AArch64::GPRRegBankID)
3314     return false;
3315 
3316   if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32)
3317     return false;
3318 
3319   auto *DstRC = &AArch64::GPR64RegClass;
3320   Register SubToRegDef = MRI.createVirtualRegister(DstRC);
3321   MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3322                                     TII.get(TargetOpcode::SUBREG_TO_REG))
3323                                 .addDef(SubToRegDef)
3324                                 .addImm(0)
3325                                 .addUse(I.getOperand(1).getReg())
3326                                 .addImm(AArch64::sub_32);
3327   Register SubToRegDef2 = MRI.createVirtualRegister(DstRC);
3328   // Need to anyext the second scalar before we can use bfm
3329   MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
3330                                     TII.get(TargetOpcode::SUBREG_TO_REG))
3331                                 .addDef(SubToRegDef2)
3332                                 .addImm(0)
3333                                 .addUse(I.getOperand(2).getReg())
3334                                 .addImm(AArch64::sub_32);
3335   MachineInstr &BFM =
3336       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri))
3337            .addDef(I.getOperand(0).getReg())
3338            .addUse(SubToRegDef)
3339            .addUse(SubToRegDef2)
3340            .addImm(32)
3341            .addImm(31);
3342   constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI);
3343   constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI);
3344   constrainSelectedInstRegOperands(BFM, TII, TRI, RBI);
3345   I.eraseFromParent();
3346   return true;
3347 }
3348 
3349 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg,
3350                               const unsigned EltSize) {
3351   // Choose a lane copy opcode and subregister based off of the size of the
3352   // vector's elements.
3353   switch (EltSize) {
3354   case 16:
3355     CopyOpc = AArch64::CPYi16;
3356     ExtractSubReg = AArch64::hsub;
3357     break;
3358   case 32:
3359     CopyOpc = AArch64::CPYi32;
3360     ExtractSubReg = AArch64::ssub;
3361     break;
3362   case 64:
3363     CopyOpc = AArch64::CPYi64;
3364     ExtractSubReg = AArch64::dsub;
3365     break;
3366   default:
3367     // Unknown size, bail out.
3368     LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n");
3369     return false;
3370   }
3371   return true;
3372 }
3373 
3374 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt(
3375     Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy,
3376     Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const {
3377   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3378   unsigned CopyOpc = 0;
3379   unsigned ExtractSubReg = 0;
3380   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) {
3381     LLVM_DEBUG(
3382         dbgs() << "Couldn't determine lane copy opcode for instruction.\n");
3383     return nullptr;
3384   }
3385 
3386   const TargetRegisterClass *DstRC =
3387       getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true);
3388   if (!DstRC) {
3389     LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n");
3390     return nullptr;
3391   }
3392 
3393   const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI);
3394   const LLT &VecTy = MRI.getType(VecReg);
3395   const TargetRegisterClass *VecRC =
3396       getRegClassForTypeOnBank(VecTy, VecRB, RBI, true);
3397   if (!VecRC) {
3398     LLVM_DEBUG(dbgs() << "Could not determine source register class.\n");
3399     return nullptr;
3400   }
3401 
3402   // The register that we're going to copy into.
3403   Register InsertReg = VecReg;
3404   if (!DstReg)
3405     DstReg = MRI.createVirtualRegister(DstRC);
3406   // If the lane index is 0, we just use a subregister COPY.
3407   if (LaneIdx == 0) {
3408     auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {})
3409                     .addReg(VecReg, 0, ExtractSubReg);
3410     RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3411     return &*Copy;
3412   }
3413 
3414   // Lane copies require 128-bit wide registers. If we're dealing with an
3415   // unpacked vector, then we need to move up to that width. Insert an implicit
3416   // def and a subregister insert to get us there.
3417   if (VecTy.getSizeInBits() != 128) {
3418     MachineInstr *ScalarToVector = emitScalarToVector(
3419         VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder);
3420     if (!ScalarToVector)
3421       return nullptr;
3422     InsertReg = ScalarToVector->getOperand(0).getReg();
3423   }
3424 
3425   MachineInstr *LaneCopyMI =
3426       MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx);
3427   constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI);
3428 
3429   // Make sure that we actually constrain the initial copy.
3430   RBI.constrainGenericRegister(*DstReg, *DstRC, MRI);
3431   return LaneCopyMI;
3432 }
3433 
3434 bool AArch64InstructionSelector::selectExtractElt(
3435     MachineInstr &I, MachineRegisterInfo &MRI) const {
3436   assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT &&
3437          "unexpected opcode!");
3438   Register DstReg = I.getOperand(0).getReg();
3439   const LLT NarrowTy = MRI.getType(DstReg);
3440   const Register SrcReg = I.getOperand(1).getReg();
3441   const LLT WideTy = MRI.getType(SrcReg);
3442   (void)WideTy;
3443   assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() &&
3444          "source register size too small!");
3445   assert(NarrowTy.isScalar() && "cannot extract vector into vector!");
3446 
3447   // Need the lane index to determine the correct copy opcode.
3448   MachineOperand &LaneIdxOp = I.getOperand(2);
3449   assert(LaneIdxOp.isReg() && "Lane index operand was not a register?");
3450 
3451   if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
3452     LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n");
3453     return false;
3454   }
3455 
3456   // Find the index to extract from.
3457   auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI);
3458   if (!VRegAndVal)
3459     return false;
3460   unsigned LaneIdx = VRegAndVal->Value;
3461 
3462   MachineIRBuilder MIRBuilder(I);
3463 
3464   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
3465   MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg,
3466                                                LaneIdx, MIRBuilder);
3467   if (!Extract)
3468     return false;
3469 
3470   I.eraseFromParent();
3471   return true;
3472 }
3473 
3474 bool AArch64InstructionSelector::selectSplitVectorUnmerge(
3475     MachineInstr &I, MachineRegisterInfo &MRI) const {
3476   unsigned NumElts = I.getNumOperands() - 1;
3477   Register SrcReg = I.getOperand(NumElts).getReg();
3478   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3479   const LLT SrcTy = MRI.getType(SrcReg);
3480 
3481   assert(NarrowTy.isVector() && "Expected an unmerge into vectors");
3482   if (SrcTy.getSizeInBits() > 128) {
3483     LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge");
3484     return false;
3485   }
3486 
3487   MachineIRBuilder MIB(I);
3488 
3489   // We implement a split vector operation by treating the sub-vectors as
3490   // scalars and extracting them.
3491   const RegisterBank &DstRB =
3492       *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI);
3493   for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) {
3494     Register Dst = I.getOperand(OpIdx).getReg();
3495     MachineInstr *Extract =
3496         emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB);
3497     if (!Extract)
3498       return false;
3499   }
3500   I.eraseFromParent();
3501   return true;
3502 }
3503 
3504 bool AArch64InstructionSelector::selectUnmergeValues(
3505     MachineInstr &I, MachineRegisterInfo &MRI) const {
3506   assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
3507          "unexpected opcode");
3508 
3509   // TODO: Handle unmerging into GPRs and from scalars to scalars.
3510   if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() !=
3511           AArch64::FPRRegBankID ||
3512       RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() !=
3513           AArch64::FPRRegBankID) {
3514     LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar "
3515                          "currently unsupported.\n");
3516     return false;
3517   }
3518 
3519   // The last operand is the vector source register, and every other operand is
3520   // a register to unpack into.
3521   unsigned NumElts = I.getNumOperands() - 1;
3522   Register SrcReg = I.getOperand(NumElts).getReg();
3523   const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg());
3524   const LLT WideTy = MRI.getType(SrcReg);
3525   (void)WideTy;
3526   assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) &&
3527          "can only unmerge from vector or s128 types!");
3528   assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() &&
3529          "source register size too small!");
3530 
3531   if (!NarrowTy.isScalar())
3532     return selectSplitVectorUnmerge(I, MRI);
3533 
3534   MachineIRBuilder MIB(I);
3535 
3536   // Choose a lane copy opcode and subregister based off of the size of the
3537   // vector's elements.
3538   unsigned CopyOpc = 0;
3539   unsigned ExtractSubReg = 0;
3540   if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits()))
3541     return false;
3542 
3543   // Set up for the lane copies.
3544   MachineBasicBlock &MBB = *I.getParent();
3545 
3546   // Stores the registers we'll be copying from.
3547   SmallVector<Register, 4> InsertRegs;
3548 
3549   // We'll use the first register twice, so we only need NumElts-1 registers.
3550   unsigned NumInsertRegs = NumElts - 1;
3551 
3552   // If our elements fit into exactly 128 bits, then we can copy from the source
3553   // directly. Otherwise, we need to do a bit of setup with some subregister
3554   // inserts.
3555   if (NarrowTy.getSizeInBits() * NumElts == 128) {
3556     InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg);
3557   } else {
3558     // No. We have to perform subregister inserts. For each insert, create an
3559     // implicit def and a subregister insert, and save the register we create.
3560     for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) {
3561       Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3562       MachineInstr &ImpDefMI =
3563           *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF),
3564                    ImpDefReg);
3565 
3566       // Now, create the subregister insert from SrcReg.
3567       Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass);
3568       MachineInstr &InsMI =
3569           *BuildMI(MBB, I, I.getDebugLoc(),
3570                    TII.get(TargetOpcode::INSERT_SUBREG), InsertReg)
3571                .addUse(ImpDefReg)
3572                .addUse(SrcReg)
3573                .addImm(AArch64::dsub);
3574 
3575       constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI);
3576       constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI);
3577 
3578       // Save the register so that we can copy from it after.
3579       InsertRegs.push_back(InsertReg);
3580     }
3581   }
3582 
3583   // Now that we've created any necessary subregister inserts, we can
3584   // create the copies.
3585   //
3586   // Perform the first copy separately as a subregister copy.
3587   Register CopyTo = I.getOperand(0).getReg();
3588   auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {})
3589                        .addReg(InsertRegs[0], 0, ExtractSubReg);
3590   constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI);
3591 
3592   // Now, perform the remaining copies as vector lane copies.
3593   unsigned LaneIdx = 1;
3594   for (Register InsReg : InsertRegs) {
3595     Register CopyTo = I.getOperand(LaneIdx).getReg();
3596     MachineInstr &CopyInst =
3597         *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo)
3598              .addUse(InsReg)
3599              .addImm(LaneIdx);
3600     constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI);
3601     ++LaneIdx;
3602   }
3603 
3604   // Separately constrain the first copy's destination. Because of the
3605   // limitation in constrainOperandRegClass, we can't guarantee that this will
3606   // actually be constrained. So, do it ourselves using the second operand.
3607   const TargetRegisterClass *RC =
3608       MRI.getRegClassOrNull(I.getOperand(1).getReg());
3609   if (!RC) {
3610     LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n");
3611     return false;
3612   }
3613 
3614   RBI.constrainGenericRegister(CopyTo, *RC, MRI);
3615   I.eraseFromParent();
3616   return true;
3617 }
3618 
3619 bool AArch64InstructionSelector::selectConcatVectors(
3620     MachineInstr &I, MachineRegisterInfo &MRI) const {
3621   assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
3622          "Unexpected opcode");
3623   Register Dst = I.getOperand(0).getReg();
3624   Register Op1 = I.getOperand(1).getReg();
3625   Register Op2 = I.getOperand(2).getReg();
3626   MachineIRBuilder MIRBuilder(I);
3627   MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder);
3628   if (!ConcatMI)
3629     return false;
3630   I.eraseFromParent();
3631   return true;
3632 }
3633 
3634 unsigned
3635 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal,
3636                                                   MachineFunction &MF) const {
3637   Type *CPTy = CPVal->getType();
3638   Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy);
3639 
3640   MachineConstantPool *MCP = MF.getConstantPool();
3641   return MCP->getConstantPoolIndex(CPVal, Alignment);
3642 }
3643 
3644 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool(
3645     const Constant *CPVal, MachineIRBuilder &MIRBuilder) const {
3646   unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF());
3647 
3648   auto Adrp =
3649       MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {})
3650           .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
3651 
3652   MachineInstr *LoadMI = nullptr;
3653   switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) {
3654   case 16:
3655     LoadMI =
3656         &*MIRBuilder
3657               .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp})
3658               .addConstantPoolIndex(CPIdx, 0,
3659                                     AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3660     break;
3661   case 8:
3662     LoadMI = &*MIRBuilder
3663                  .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp})
3664                  .addConstantPoolIndex(
3665                      CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3666     break;
3667   default:
3668     LLVM_DEBUG(dbgs() << "Could not load from constant pool of type "
3669                       << *CPVal->getType());
3670     return nullptr;
3671   }
3672   constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI);
3673   constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
3674   return LoadMI;
3675 }
3676 
3677 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given
3678 /// size and RB.
3679 static std::pair<unsigned, unsigned>
3680 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) {
3681   unsigned Opc, SubregIdx;
3682   if (RB.getID() == AArch64::GPRRegBankID) {
3683     if (EltSize == 32) {
3684       Opc = AArch64::INSvi32gpr;
3685       SubregIdx = AArch64::ssub;
3686     } else if (EltSize == 64) {
3687       Opc = AArch64::INSvi64gpr;
3688       SubregIdx = AArch64::dsub;
3689     } else {
3690       llvm_unreachable("invalid elt size!");
3691     }
3692   } else {
3693     if (EltSize == 8) {
3694       Opc = AArch64::INSvi8lane;
3695       SubregIdx = AArch64::bsub;
3696     } else if (EltSize == 16) {
3697       Opc = AArch64::INSvi16lane;
3698       SubregIdx = AArch64::hsub;
3699     } else if (EltSize == 32) {
3700       Opc = AArch64::INSvi32lane;
3701       SubregIdx = AArch64::ssub;
3702     } else if (EltSize == 64) {
3703       Opc = AArch64::INSvi64lane;
3704       SubregIdx = AArch64::dsub;
3705     } else {
3706       llvm_unreachable("invalid elt size!");
3707     }
3708   }
3709   return std::make_pair(Opc, SubregIdx);
3710 }
3711 
3712 MachineInstr *
3713 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS,
3714                                     MachineOperand &RHS,
3715                                     MachineIRBuilder &MIRBuilder) const {
3716   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3717   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3718   static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri},
3719                                        {AArch64::ADDWrr, AArch64::ADDWri}};
3720   bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32;
3721   auto ImmFns = selectArithImmed(RHS);
3722   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3723   auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS});
3724 
3725   // If we matched a valid constant immediate, add those operands.
3726   if (ImmFns) {
3727     for (auto &RenderFn : *ImmFns)
3728       RenderFn(AddMI);
3729   } else {
3730     AddMI.addUse(RHS.getReg());
3731   }
3732 
3733   constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI);
3734   return &*AddMI;
3735 }
3736 
3737 MachineInstr *
3738 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS,
3739                                     MachineIRBuilder &MIRBuilder) const {
3740   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3741   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3742   static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri},
3743                                        {AArch64::ADDSWrr, AArch64::ADDSWri}};
3744   bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32);
3745   auto ImmFns = selectArithImmed(RHS);
3746   unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()];
3747   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3748 
3749   auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3750 
3751   // If we matched a valid constant immediate, add those operands.
3752   if (ImmFns) {
3753     for (auto &RenderFn : *ImmFns)
3754       RenderFn(CmpMI);
3755   } else {
3756     CmpMI.addUse(RHS.getReg());
3757   }
3758 
3759   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3760   return &*CmpMI;
3761 }
3762 
3763 MachineInstr *
3764 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS,
3765                                     MachineIRBuilder &MIRBuilder) const {
3766   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3767   unsigned RegSize = MRI.getType(LHS).getSizeInBits();
3768   bool Is32Bit = (RegSize == 32);
3769   static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri},
3770                                        {AArch64::ANDSWrr, AArch64::ANDSWri}};
3771   Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR;
3772 
3773   // We might be able to fold in an immediate into the TST. We need to make sure
3774   // it's a logical immediate though, since ANDS requires that.
3775   auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI);
3776   bool IsImmForm = ValAndVReg.hasValue() &&
3777                    AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize);
3778   unsigned Opc = OpcTable[Is32Bit][IsImmForm];
3779   auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS});
3780 
3781   if (IsImmForm)
3782     TstMI.addImm(
3783         AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize));
3784   else
3785     TstMI.addUse(RHS);
3786 
3787   constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI);
3788   return &*TstMI;
3789 }
3790 
3791 std::pair<MachineInstr *, CmpInst::Predicate>
3792 AArch64InstructionSelector::emitIntegerCompare(
3793     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
3794     MachineIRBuilder &MIRBuilder) const {
3795   assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!");
3796   assert(Predicate.isPredicate() && "Expected predicate?");
3797   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3798 
3799   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
3800 
3801   // Fold the compare if possible.
3802   MachineInstr *FoldCmp =
3803       tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder);
3804   if (FoldCmp)
3805     return {FoldCmp, P};
3806 
3807   // Can't fold into a CMN. Just emit a normal compare.
3808   unsigned CmpOpc = 0;
3809   Register ZReg;
3810 
3811   LLT CmpTy = MRI.getType(LHS.getReg());
3812   assert((CmpTy.isScalar() || CmpTy.isPointer()) &&
3813          "Expected scalar or pointer");
3814   if (CmpTy == LLT::scalar(32)) {
3815     CmpOpc = AArch64::SUBSWrr;
3816     ZReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3817   } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
3818     CmpOpc = AArch64::SUBSXrr;
3819     ZReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3820   } else {
3821     return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE};
3822   }
3823 
3824   // Try to match immediate forms.
3825   MachineInstr *ImmedCmp =
3826       tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder);
3827   if (ImmedCmp)
3828     return {ImmedCmp, P};
3829 
3830   // If we don't have an immediate, we may have a shift which can be folded
3831   // into the compare.
3832   MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder);
3833   if (ShiftedCmp)
3834     return {ShiftedCmp, P};
3835 
3836   auto CmpMI =
3837       MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()});
3838   // Make sure that we can constrain the compare that we emitted.
3839   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
3840   return {&*CmpMI, P};
3841 }
3842 
3843 MachineInstr *AArch64InstructionSelector::emitVectorConcat(
3844     Optional<Register> Dst, Register Op1, Register Op2,
3845     MachineIRBuilder &MIRBuilder) const {
3846   // We implement a vector concat by:
3847   // 1. Use scalar_to_vector to insert the lower vector into the larger dest
3848   // 2. Insert the upper vector into the destination's upper element
3849   // TODO: some of this code is common with G_BUILD_VECTOR handling.
3850   MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo();
3851 
3852   const LLT Op1Ty = MRI.getType(Op1);
3853   const LLT Op2Ty = MRI.getType(Op2);
3854 
3855   if (Op1Ty != Op2Ty) {
3856     LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys");
3857     return nullptr;
3858   }
3859   assert(Op1Ty.isVector() && "Expected a vector for vector concat");
3860 
3861   if (Op1Ty.getSizeInBits() >= 128) {
3862     LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors");
3863     return nullptr;
3864   }
3865 
3866   // At the moment we just support 64 bit vector concats.
3867   if (Op1Ty.getSizeInBits() != 64) {
3868     LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors");
3869     return nullptr;
3870   }
3871 
3872   const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits());
3873   const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI);
3874   const TargetRegisterClass *DstRC =
3875       getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2);
3876 
3877   MachineInstr *WidenedOp1 =
3878       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder);
3879   MachineInstr *WidenedOp2 =
3880       emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder);
3881   if (!WidenedOp1 || !WidenedOp2) {
3882     LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value");
3883     return nullptr;
3884   }
3885 
3886   // Now do the insert of the upper element.
3887   unsigned InsertOpc, InsSubRegIdx;
3888   std::tie(InsertOpc, InsSubRegIdx) =
3889       getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits());
3890 
3891   if (!Dst)
3892     Dst = MRI.createVirtualRegister(DstRC);
3893   auto InsElt =
3894       MIRBuilder
3895           .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()})
3896           .addImm(1) /* Lane index */
3897           .addUse(WidenedOp2->getOperand(0).getReg())
3898           .addImm(0);
3899   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
3900   return &*InsElt;
3901 }
3902 
3903 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant(
3904     MachineInstr &I, MachineRegisterInfo &MRI) const {
3905   assert(I.getOpcode() == TargetOpcode::G_FCONSTANT &&
3906          "Expected a G_FCONSTANT!");
3907   MachineOperand &ImmOp = I.getOperand(1);
3908   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
3909 
3910   // Only handle 32 and 64 bit defs for now.
3911   if (DefSize != 32 && DefSize != 64)
3912     return nullptr;
3913 
3914   // Don't handle null values using FMOV.
3915   if (ImmOp.getFPImm()->isNullValue())
3916     return nullptr;
3917 
3918   // Get the immediate representation for the FMOV.
3919   const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF();
3920   int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF)
3921                           : AArch64_AM::getFP64Imm(ImmValAPF);
3922 
3923   // If this is -1, it means the immediate can't be represented as the requested
3924   // floating point value. Bail.
3925   if (Imm == -1)
3926     return nullptr;
3927 
3928   // Update MI to represent the new FMOV instruction, constrain it, and return.
3929   ImmOp.ChangeToImmediate(Imm);
3930   unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi;
3931   I.setDesc(TII.get(MovOpc));
3932   constrainSelectedInstRegOperands(I, TII, TRI, RBI);
3933   return &I;
3934 }
3935 
3936 MachineInstr *
3937 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred,
3938                                      MachineIRBuilder &MIRBuilder) const {
3939   // CSINC increments the result when the predicate is false. Invert it.
3940   const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC(
3941       CmpInst::getInversePredicate((CmpInst::Predicate)Pred));
3942   auto I =
3943       MIRBuilder
3944     .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)})
3945           .addImm(InvCC);
3946   constrainSelectedInstRegOperands(*I, TII, TRI, RBI);
3947   return &*I;
3948 }
3949 
3950 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const {
3951   MachineIRBuilder MIB(I);
3952   MachineRegisterInfo &MRI = *MIB.getMRI();
3953   const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
3954 
3955   // We want to recognize this pattern:
3956   //
3957   // $z = G_FCMP pred, $x, $y
3958   // ...
3959   // $w = G_SELECT $z, $a, $b
3960   //
3961   // Where the value of $z is *only* ever used by the G_SELECT (possibly with
3962   // some copies/truncs in between.)
3963   //
3964   // If we see this, then we can emit something like this:
3965   //
3966   // fcmp $x, $y
3967   // fcsel $w, $a, $b, pred
3968   //
3969   // Rather than emitting both of the rather long sequences in the standard
3970   // G_FCMP/G_SELECT select methods.
3971 
3972   // First, check if the condition is defined by a compare.
3973   MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg());
3974   while (CondDef) {
3975     // We can only fold if all of the defs have one use.
3976     Register CondDefReg = CondDef->getOperand(0).getReg();
3977     if (!MRI.hasOneNonDBGUse(CondDefReg)) {
3978       // Unless it's another select.
3979       for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) {
3980         if (CondDef == &UI)
3981           continue;
3982         if (UI.getOpcode() != TargetOpcode::G_SELECT)
3983           return false;
3984       }
3985     }
3986 
3987     // We can skip over G_TRUNC since the condition is 1-bit.
3988     // Truncating/extending can have no impact on the value.
3989     unsigned Opc = CondDef->getOpcode();
3990     if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC)
3991       break;
3992 
3993     // Can't see past copies from physregs.
3994     if (Opc == TargetOpcode::COPY &&
3995         Register::isPhysicalRegister(CondDef->getOperand(1).getReg()))
3996       return false;
3997 
3998     CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg());
3999   }
4000 
4001   // Is the condition defined by a compare?
4002   if (!CondDef)
4003     return false;
4004 
4005   unsigned CondOpc = CondDef->getOpcode();
4006   if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP)
4007     return false;
4008 
4009   AArch64CC::CondCode CondCode;
4010   if (CondOpc == TargetOpcode::G_ICMP) {
4011     MachineInstr *Cmp;
4012     CmpInst::Predicate Pred;
4013 
4014     std::tie(Cmp, Pred) =
4015         emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3),
4016                            CondDef->getOperand(1), MIB);
4017 
4018     if (!Cmp) {
4019       LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n");
4020       return false;
4021     }
4022 
4023     // Have to collect the CondCode after emitIntegerCompare, since it can
4024     // update the predicate.
4025     CondCode = changeICMPPredToAArch64CC(Pred);
4026   } else {
4027     // Get the condition code for the select.
4028     AArch64CC::CondCode CondCode2;
4029     changeFCMPPredToAArch64CC(
4030         (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode,
4031         CondCode2);
4032 
4033     // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two
4034     // instructions to emit the comparison.
4035     // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be
4036     // unnecessary.
4037     if (CondCode2 != AArch64CC::AL)
4038       return false;
4039 
4040     // Make sure we'll be able to select the compare.
4041     unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI);
4042     if (!CmpOpc)
4043       return false;
4044 
4045     // Emit a new compare.
4046     auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()});
4047     if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri)
4048       Cmp.addUse(CondDef->getOperand(3).getReg());
4049     constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI);
4050   }
4051 
4052   // Emit the select.
4053   unsigned CSelOpc = selectSelectOpc(I, MRI, RBI);
4054   auto CSel =
4055       MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()},
4056                      {I.getOperand(2).getReg(), I.getOperand(3).getReg()})
4057           .addImm(CondCode);
4058   constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI);
4059   I.eraseFromParent();
4060   return true;
4061 }
4062 
4063 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
4064     MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate,
4065     MachineIRBuilder &MIRBuilder) const {
4066   assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() &&
4067          "Unexpected MachineOperand");
4068   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4069   // We want to find this sort of thing:
4070   // x = G_SUB 0, y
4071   // G_ICMP z, x
4072   //
4073   // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead.
4074   // e.g:
4075   //
4076   // cmn z, y
4077 
4078   // Helper lambda to detect the subtract followed by the compare.
4079   // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0.
4080   auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) {
4081     if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB)
4082       return false;
4083 
4084     // Need to make sure NZCV is the same at the end of the transformation.
4085     if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
4086       return false;
4087 
4088     // We want to match against SUBs.
4089     if (DefMI->getOpcode() != TargetOpcode::G_SUB)
4090       return false;
4091 
4092     // Make sure that we're getting
4093     // x = G_SUB 0, y
4094     auto ValAndVReg =
4095         getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI);
4096     if (!ValAndVReg || ValAndVReg->Value != 0)
4097       return false;
4098 
4099     // This can safely be represented as a CMN.
4100     return true;
4101   };
4102 
4103   // Check if the RHS or LHS of the G_ICMP is defined by a SUB
4104   MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI);
4105   MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI);
4106   CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate();
4107   const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P);
4108 
4109   // Given this:
4110   //
4111   // x = G_SUB 0, y
4112   // G_ICMP x, z
4113   //
4114   // Produce this:
4115   //
4116   // cmn y, z
4117   if (IsCMN(LHSDef, CC))
4118     return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder);
4119 
4120   // Same idea here, but with the RHS of the compare instead:
4121   //
4122   // Given this:
4123   //
4124   // x = G_SUB 0, y
4125   // G_ICMP z, x
4126   //
4127   // Produce this:
4128   //
4129   // cmn z, y
4130   if (IsCMN(RHSDef, CC))
4131     return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder);
4132 
4133   // Given this:
4134   //
4135   // z = G_AND x, y
4136   // G_ICMP z, 0
4137   //
4138   // Produce this if the compare is signed:
4139   //
4140   // tst x, y
4141   if (!isUnsignedICMPPred(P) && LHSDef &&
4142       LHSDef->getOpcode() == TargetOpcode::G_AND) {
4143     // Make sure that the RHS is 0.
4144     auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI);
4145     if (!ValAndVReg || ValAndVReg->Value != 0)
4146       return nullptr;
4147 
4148     return emitTST(LHSDef->getOperand(1).getReg(),
4149                    LHSDef->getOperand(2).getReg(), MIRBuilder);
4150   }
4151 
4152   return nullptr;
4153 }
4154 
4155 MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare(
4156     MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P,
4157     MachineIRBuilder &MIB) const {
4158   // Attempt to select the immediate form of an integer compare.
4159   MachineRegisterInfo &MRI = *MIB.getMRI();
4160   auto Ty = MRI.getType(LHS.getReg());
4161   assert(!Ty.isVector() && "Expected scalar or pointer only?");
4162   unsigned Size = Ty.getSizeInBits();
4163   assert((Size == 32 || Size == 64) &&
4164          "Expected 32 bit or 64 bit compare only?");
4165 
4166   // Check if this is a case we can already handle.
4167   InstructionSelector::ComplexRendererFns ImmFns;
4168   ImmFns = selectArithImmed(RHS);
4169 
4170   if (!ImmFns) {
4171     // We didn't get a rendering function, but we may still have a constant.
4172     auto MaybeImmed = getImmedFromMO(RHS);
4173     if (!MaybeImmed)
4174       return nullptr;
4175 
4176     // We have a constant, but it doesn't fit. Try adjusting it by one and
4177     // updating the predicate if possible.
4178     uint64_t C = *MaybeImmed;
4179     CmpInst::Predicate NewP;
4180     switch (P) {
4181     default:
4182       return nullptr;
4183     case CmpInst::ICMP_SLT:
4184     case CmpInst::ICMP_SGE:
4185       // Check for
4186       //
4187       // x slt c => x sle c - 1
4188       // x sge c => x sgt c - 1
4189       //
4190       // When c is not the smallest possible negative number.
4191       if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
4192           (Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
4193         return nullptr;
4194       NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
4195       C -= 1;
4196       break;
4197     case CmpInst::ICMP_ULT:
4198     case CmpInst::ICMP_UGE:
4199       // Check for
4200       //
4201       // x ult c => x ule c - 1
4202       // x uge c => x ugt c - 1
4203       //
4204       // When c is not zero.
4205       if (C == 0)
4206         return nullptr;
4207       NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
4208       C -= 1;
4209       break;
4210     case CmpInst::ICMP_SLE:
4211     case CmpInst::ICMP_SGT:
4212       // Check for
4213       //
4214       // x sle c => x slt c + 1
4215       // x sgt c => s sge c + 1
4216       //
4217       // When c is not the largest possible signed integer.
4218       if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
4219           (Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
4220         return nullptr;
4221       NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
4222       C += 1;
4223       break;
4224     case CmpInst::ICMP_ULE:
4225     case CmpInst::ICMP_UGT:
4226       // Check for
4227       //
4228       // x ule c => x ult c + 1
4229       // x ugt c => s uge c + 1
4230       //
4231       // When c is not the largest possible unsigned integer.
4232       if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
4233           (Size == 64 && C == UINT64_MAX))
4234         return nullptr;
4235       NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
4236       C += 1;
4237       break;
4238     }
4239 
4240     // Check if the new constant is valid.
4241     if (Size == 32)
4242       C = static_cast<uint32_t>(C);
4243     ImmFns = select12BitValueWithLeftShift(C);
4244     if (!ImmFns)
4245       return nullptr;
4246     P = NewP;
4247   }
4248 
4249   // At this point, we know we can select an immediate form. Go ahead and do
4250   // that.
4251   Register ZReg;
4252   unsigned Opc;
4253   if (Size == 32) {
4254     ZReg = AArch64::WZR;
4255     Opc = AArch64::SUBSWri;
4256   } else {
4257     ZReg = AArch64::XZR;
4258     Opc = AArch64::SUBSXri;
4259   }
4260 
4261   auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
4262   for (auto &RenderFn : *ImmFns)
4263     RenderFn(CmpMI);
4264   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4265   return &*CmpMI;
4266 }
4267 
4268 MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare(
4269     MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const {
4270   // We are looking for the following pattern:
4271   //
4272   // shift = G_SHL/ASHR/LHSR y, c
4273   // ...
4274   // cmp = G_ICMP pred, something, shift
4275   //
4276   // Since we will select the G_ICMP to a SUBS, we can potentially fold the
4277   // shift into the subtract.
4278   static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs};
4279   static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR};
4280   auto ImmFns = selectShiftedRegister(RHS);
4281   if (!ImmFns)
4282     return nullptr;
4283   MachineRegisterInfo &MRI = *MIB.getMRI();
4284   auto Ty = MRI.getType(LHS.getReg());
4285   assert(!Ty.isVector() && "Expected scalar or pointer only?");
4286   unsigned Size = Ty.getSizeInBits();
4287   bool Idx = (Size == 64);
4288   Register ZReg = ZRegTable[Idx];
4289   unsigned Opc = OpcTable[Idx];
4290   auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()});
4291   for (auto &RenderFn : *ImmFns)
4292     RenderFn(CmpMI);
4293   constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI);
4294   return &*CmpMI;
4295 }
4296 
4297 bool AArch64InstructionSelector::selectShuffleVector(
4298     MachineInstr &I, MachineRegisterInfo &MRI) const {
4299   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4300   Register Src1Reg = I.getOperand(1).getReg();
4301   const LLT Src1Ty = MRI.getType(Src1Reg);
4302   Register Src2Reg = I.getOperand(2).getReg();
4303   const LLT Src2Ty = MRI.getType(Src2Reg);
4304   ArrayRef<int> Mask = I.getOperand(3).getShuffleMask();
4305 
4306   MachineBasicBlock &MBB = *I.getParent();
4307   MachineFunction &MF = *MBB.getParent();
4308   LLVMContext &Ctx = MF.getFunction().getContext();
4309 
4310   // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if
4311   // it's originated from a <1 x T> type. Those should have been lowered into
4312   // G_BUILD_VECTOR earlier.
4313   if (!Src1Ty.isVector() || !Src2Ty.isVector()) {
4314     LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n");
4315     return false;
4316   }
4317 
4318   unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8;
4319 
4320   SmallVector<Constant *, 64> CstIdxs;
4321   for (int Val : Mask) {
4322     // For now, any undef indexes we'll just assume to be 0. This should be
4323     // optimized in future, e.g. to select DUP etc.
4324     Val = Val < 0 ? 0 : Val;
4325     for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
4326       unsigned Offset = Byte + Val * BytesPerElt;
4327       CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset));
4328     }
4329   }
4330 
4331   MachineIRBuilder MIRBuilder(I);
4332 
4333   // Use a constant pool to load the index vector for TBL.
4334   Constant *CPVal = ConstantVector::get(CstIdxs);
4335   MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder);
4336   if (!IndexLoad) {
4337     LLVM_DEBUG(dbgs() << "Could not load from a constant pool");
4338     return false;
4339   }
4340 
4341   if (DstTy.getSizeInBits() != 128) {
4342     assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty");
4343     // This case can be done with TBL1.
4344     MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder);
4345     if (!Concat) {
4346       LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1");
4347       return false;
4348     }
4349 
4350     // The constant pool load will be 64 bits, so need to convert to FPR128 reg.
4351     IndexLoad =
4352         emitScalarToVector(64, &AArch64::FPR128RegClass,
4353                            IndexLoad->getOperand(0).getReg(), MIRBuilder);
4354 
4355     auto TBL1 = MIRBuilder.buildInstr(
4356         AArch64::TBLv16i8One, {&AArch64::FPR128RegClass},
4357         {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()});
4358     constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI);
4359 
4360     auto Copy =
4361         MIRBuilder
4362             .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {})
4363             .addReg(TBL1.getReg(0), 0, AArch64::dsub);
4364     RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI);
4365     I.eraseFromParent();
4366     return true;
4367   }
4368 
4369   // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive
4370   // Q registers for regalloc.
4371   auto RegSeq = MIRBuilder
4372                     .buildInstr(TargetOpcode::REG_SEQUENCE,
4373                                 {&AArch64::QQRegClass}, {Src1Reg})
4374                     .addImm(AArch64::qsub0)
4375                     .addUse(Src2Reg)
4376                     .addImm(AArch64::qsub1);
4377 
4378   auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)},
4379                                     {RegSeq, IndexLoad->getOperand(0)});
4380   constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI);
4381   constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI);
4382   I.eraseFromParent();
4383   return true;
4384 }
4385 
4386 MachineInstr *AArch64InstructionSelector::emitLaneInsert(
4387     Optional<Register> DstReg, Register SrcReg, Register EltReg,
4388     unsigned LaneIdx, const RegisterBank &RB,
4389     MachineIRBuilder &MIRBuilder) const {
4390   MachineInstr *InsElt = nullptr;
4391   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4392   MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4393 
4394   // Create a register to define with the insert if one wasn't passed in.
4395   if (!DstReg)
4396     DstReg = MRI.createVirtualRegister(DstRC);
4397 
4398   unsigned EltSize = MRI.getType(EltReg).getSizeInBits();
4399   unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first;
4400 
4401   if (RB.getID() == AArch64::FPRRegBankID) {
4402     auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder);
4403     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4404                  .addImm(LaneIdx)
4405                  .addUse(InsSub->getOperand(0).getReg())
4406                  .addImm(0);
4407   } else {
4408     InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg})
4409                  .addImm(LaneIdx)
4410                  .addUse(EltReg);
4411   }
4412 
4413   constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI);
4414   return InsElt;
4415 }
4416 
4417 bool AArch64InstructionSelector::selectInsertElt(
4418     MachineInstr &I, MachineRegisterInfo &MRI) const {
4419   assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT);
4420 
4421   // Get information on the destination.
4422   Register DstReg = I.getOperand(0).getReg();
4423   const LLT DstTy = MRI.getType(DstReg);
4424   unsigned VecSize = DstTy.getSizeInBits();
4425 
4426   // Get information on the element we want to insert into the destination.
4427   Register EltReg = I.getOperand(2).getReg();
4428   const LLT EltTy = MRI.getType(EltReg);
4429   unsigned EltSize = EltTy.getSizeInBits();
4430   if (EltSize < 16 || EltSize > 64)
4431     return false; // Don't support all element types yet.
4432 
4433   // Find the definition of the index. Bail out if it's not defined by a
4434   // G_CONSTANT.
4435   Register IdxReg = I.getOperand(3).getReg();
4436   auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI);
4437   if (!VRegAndVal)
4438     return false;
4439   unsigned LaneIdx = VRegAndVal->Value;
4440 
4441   // Perform the lane insert.
4442   Register SrcReg = I.getOperand(1).getReg();
4443   const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI);
4444   MachineIRBuilder MIRBuilder(I);
4445 
4446   if (VecSize < 128) {
4447     // If the vector we're inserting into is smaller than 128 bits, widen it
4448     // to 128 to do the insert.
4449     MachineInstr *ScalarToVec = emitScalarToVector(
4450         VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder);
4451     if (!ScalarToVec)
4452       return false;
4453     SrcReg = ScalarToVec->getOperand(0).getReg();
4454   }
4455 
4456   // Create an insert into a new FPR128 register.
4457   // Note that if our vector is already 128 bits, we end up emitting an extra
4458   // register.
4459   MachineInstr *InsMI =
4460       emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder);
4461 
4462   if (VecSize < 128) {
4463     // If we had to widen to perform the insert, then we have to demote back to
4464     // the original size to get the result we want.
4465     Register DemoteVec = InsMI->getOperand(0).getReg();
4466     const TargetRegisterClass *RC =
4467         getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize);
4468     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4469       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4470       return false;
4471     }
4472     unsigned SubReg = 0;
4473     if (!getSubRegForClass(RC, TRI, SubReg))
4474       return false;
4475     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4476       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize
4477                         << "\n");
4478       return false;
4479     }
4480     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4481         .addReg(DemoteVec, 0, SubReg);
4482     RBI.constrainGenericRegister(DstReg, *RC, MRI);
4483   } else {
4484     // No widening needed.
4485     InsMI->getOperand(0).setReg(DstReg);
4486     constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI);
4487   }
4488 
4489   I.eraseFromParent();
4490   return true;
4491 }
4492 
4493 bool AArch64InstructionSelector::tryOptConstantBuildVec(
4494     MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const {
4495   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4496   assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!");
4497   if (DstTy.getSizeInBits() < 32)
4498     return false;
4499   // Check if we're building a constant vector, in which case we want to
4500   // generate a constant pool load instead of a vector insert sequence.
4501   SmallVector<Constant *, 16> Csts;
4502   for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) {
4503     // Try to find G_CONSTANT or G_FCONSTANT
4504     auto *OpMI =
4505         getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI);
4506     if (OpMI)
4507       Csts.emplace_back(
4508           const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm()));
4509     else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT,
4510                                   I.getOperand(Idx).getReg(), MRI)))
4511       Csts.emplace_back(
4512           const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm()));
4513     else
4514       return false;
4515   }
4516   Constant *CV = ConstantVector::get(Csts);
4517   MachineIRBuilder MIB(I);
4518   auto *CPLoad = emitLoadFromConstantPool(CV, MIB);
4519   if (!CPLoad) {
4520     LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector");
4521     return false;
4522   }
4523   MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0));
4524   RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4525                                *MRI.getRegClass(CPLoad->getOperand(0).getReg()),
4526                                MRI);
4527   I.eraseFromParent();
4528   return true;
4529 }
4530 
4531 bool AArch64InstructionSelector::selectBuildVector(
4532     MachineInstr &I, MachineRegisterInfo &MRI) const {
4533   assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
4534   // Until we port more of the optimized selections, for now just use a vector
4535   // insert sequence.
4536   const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
4537   const LLT EltTy = MRI.getType(I.getOperand(1).getReg());
4538   unsigned EltSize = EltTy.getSizeInBits();
4539 
4540   if (tryOptConstantBuildVec(I, DstTy, MRI))
4541     return true;
4542   if (EltSize < 16 || EltSize > 64)
4543     return false; // Don't support all element types yet.
4544   const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI);
4545   MachineIRBuilder MIRBuilder(I);
4546 
4547   const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass;
4548   MachineInstr *ScalarToVec =
4549       emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC,
4550                          I.getOperand(1).getReg(), MIRBuilder);
4551   if (!ScalarToVec)
4552     return false;
4553 
4554   Register DstVec = ScalarToVec->getOperand(0).getReg();
4555   unsigned DstSize = DstTy.getSizeInBits();
4556 
4557   // Keep track of the last MI we inserted. Later on, we might be able to save
4558   // a copy using it.
4559   MachineInstr *PrevMI = nullptr;
4560   for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) {
4561     // Note that if we don't do a subregister copy, we can end up making an
4562     // extra register.
4563     PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB,
4564                               MIRBuilder);
4565     DstVec = PrevMI->getOperand(0).getReg();
4566   }
4567 
4568   // If DstTy's size in bits is less than 128, then emit a subregister copy
4569   // from DstVec to the last register we've defined.
4570   if (DstSize < 128) {
4571     // Force this to be FPR using the destination vector.
4572     const TargetRegisterClass *RC =
4573         getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize);
4574     if (!RC)
4575       return false;
4576     if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) {
4577       LLVM_DEBUG(dbgs() << "Unsupported register class!\n");
4578       return false;
4579     }
4580 
4581     unsigned SubReg = 0;
4582     if (!getSubRegForClass(RC, TRI, SubReg))
4583       return false;
4584     if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) {
4585       LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize
4586                         << "\n");
4587       return false;
4588     }
4589 
4590     Register Reg = MRI.createVirtualRegister(RC);
4591     Register DstReg = I.getOperand(0).getReg();
4592 
4593     MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {})
4594         .addReg(DstVec, 0, SubReg);
4595     MachineOperand &RegOp = I.getOperand(1);
4596     RegOp.setReg(Reg);
4597     RBI.constrainGenericRegister(DstReg, *RC, MRI);
4598   } else {
4599     // We don't need a subregister copy. Save a copy by re-using the
4600     // destination register on the final insert.
4601     assert(PrevMI && "PrevMI was null?");
4602     PrevMI->getOperand(0).setReg(I.getOperand(0).getReg());
4603     constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI);
4604   }
4605 
4606   I.eraseFromParent();
4607   return true;
4608 }
4609 
4610 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the
4611 /// ID if it exists, and 0 otherwise.
4612 static unsigned findIntrinsicID(MachineInstr &I) {
4613   auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) {
4614     return Op.isIntrinsicID();
4615   });
4616   if (IntrinOp == I.operands_end())
4617     return 0;
4618   return IntrinOp->getIntrinsicID();
4619 }
4620 
4621 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects(
4622     MachineInstr &I, MachineRegisterInfo &MRI) const {
4623   // Find the intrinsic ID.
4624   unsigned IntrinID = findIntrinsicID(I);
4625   if (!IntrinID)
4626     return false;
4627   MachineIRBuilder MIRBuilder(I);
4628 
4629   // Select the instruction.
4630   switch (IntrinID) {
4631   default:
4632     return false;
4633   case Intrinsic::trap:
4634     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1);
4635     break;
4636   case Intrinsic::debugtrap:
4637     if (!STI.isTargetWindows())
4638       return false;
4639     MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000);
4640     break;
4641   }
4642 
4643   I.eraseFromParent();
4644   return true;
4645 }
4646 
4647 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I,
4648                                                  MachineRegisterInfo &MRI) {
4649   unsigned IntrinID = findIntrinsicID(I);
4650   if (!IntrinID)
4651     return false;
4652   MachineIRBuilder MIRBuilder(I);
4653 
4654   switch (IntrinID) {
4655   default:
4656     break;
4657   case Intrinsic::aarch64_crypto_sha1h: {
4658     Register DstReg = I.getOperand(0).getReg();
4659     Register SrcReg = I.getOperand(2).getReg();
4660 
4661     // FIXME: Should this be an assert?
4662     if (MRI.getType(DstReg).getSizeInBits() != 32 ||
4663         MRI.getType(SrcReg).getSizeInBits() != 32)
4664       return false;
4665 
4666     // The operation has to happen on FPRs. Set up some new FPR registers for
4667     // the source and destination if they are on GPRs.
4668     if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) {
4669       SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4670       MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)});
4671 
4672       // Make sure the copy ends up getting constrained properly.
4673       RBI.constrainGenericRegister(I.getOperand(2).getReg(),
4674                                    AArch64::GPR32RegClass, MRI);
4675     }
4676 
4677     if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID)
4678       DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
4679 
4680     // Actually insert the instruction.
4681     auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg});
4682     constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI);
4683 
4684     // Did we create a new register for the destination?
4685     if (DstReg != I.getOperand(0).getReg()) {
4686       // Yep. Copy the result of the instruction back into the original
4687       // destination.
4688       MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg});
4689       RBI.constrainGenericRegister(I.getOperand(0).getReg(),
4690                                    AArch64::GPR32RegClass, MRI);
4691     }
4692 
4693     I.eraseFromParent();
4694     return true;
4695   }
4696   case Intrinsic::frameaddress:
4697   case Intrinsic::returnaddress: {
4698     MachineFunction &MF = *I.getParent()->getParent();
4699     MachineFrameInfo &MFI = MF.getFrameInfo();
4700 
4701     unsigned Depth = I.getOperand(2).getImm();
4702     Register DstReg = I.getOperand(0).getReg();
4703     RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI);
4704 
4705     if (Depth == 0 && IntrinID == Intrinsic::returnaddress) {
4706       if (MFReturnAddr) {
4707         MIRBuilder.buildCopy({DstReg}, MFReturnAddr);
4708         I.eraseFromParent();
4709         return true;
4710       }
4711       MFI.setReturnAddressIsTaken(true);
4712       MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass);
4713       // Insert the copy from LR/X30 into the entry block, before it can be
4714       // clobbered by anything.
4715       MachineBasicBlock &EntryBlock = *MF.begin();
4716       if (!EntryBlock.isLiveIn(AArch64::LR))
4717         EntryBlock.addLiveIn(AArch64::LR);
4718       MachineIRBuilder EntryBuilder(MF);
4719       EntryBuilder.setInstr(*EntryBlock.begin());
4720       EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)});
4721       MFReturnAddr = DstReg;
4722       I.eraseFromParent();
4723       return true;
4724     }
4725 
4726     MFI.setFrameAddressIsTaken(true);
4727     Register FrameAddr(AArch64::FP);
4728     while (Depth--) {
4729       Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass);
4730       auto Ldr =
4731           MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr})
4732               .addImm(0);
4733       constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI);
4734       FrameAddr = NextFrame;
4735     }
4736 
4737     if (IntrinID == Intrinsic::frameaddress)
4738       MIRBuilder.buildCopy({DstReg}, {FrameAddr});
4739     else {
4740       MFI.setReturnAddressIsTaken(true);
4741       MIRBuilder.buildInstr(AArch64::LDRXui, {DstReg}, {FrameAddr}).addImm(1);
4742     }
4743 
4744     I.eraseFromParent();
4745     return true;
4746   }
4747   }
4748   return false;
4749 }
4750 
4751 InstructionSelector::ComplexRendererFns
4752 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const {
4753   auto MaybeImmed = getImmedFromMO(Root);
4754   if (MaybeImmed == None || *MaybeImmed > 31)
4755     return None;
4756   uint64_t Enc = (32 - *MaybeImmed) & 0x1f;
4757   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4758 }
4759 
4760 InstructionSelector::ComplexRendererFns
4761 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const {
4762   auto MaybeImmed = getImmedFromMO(Root);
4763   if (MaybeImmed == None || *MaybeImmed > 31)
4764     return None;
4765   uint64_t Enc = 31 - *MaybeImmed;
4766   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4767 }
4768 
4769 InstructionSelector::ComplexRendererFns
4770 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const {
4771   auto MaybeImmed = getImmedFromMO(Root);
4772   if (MaybeImmed == None || *MaybeImmed > 63)
4773     return None;
4774   uint64_t Enc = (64 - *MaybeImmed) & 0x3f;
4775   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4776 }
4777 
4778 InstructionSelector::ComplexRendererFns
4779 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const {
4780   auto MaybeImmed = getImmedFromMO(Root);
4781   if (MaybeImmed == None || *MaybeImmed > 63)
4782     return None;
4783   uint64_t Enc = 63 - *MaybeImmed;
4784   return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}};
4785 }
4786 
4787 /// Helper to select an immediate value that can be represented as a 12-bit
4788 /// value shifted left by either 0 or 12. If it is possible to do so, return
4789 /// the immediate and shift value. If not, return None.
4790 ///
4791 /// Used by selectArithImmed and selectNegArithImmed.
4792 InstructionSelector::ComplexRendererFns
4793 AArch64InstructionSelector::select12BitValueWithLeftShift(
4794     uint64_t Immed) const {
4795   unsigned ShiftAmt;
4796   if (Immed >> 12 == 0) {
4797     ShiftAmt = 0;
4798   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
4799     ShiftAmt = 12;
4800     Immed = Immed >> 12;
4801   } else
4802     return None;
4803 
4804   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
4805   return {{
4806       [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); },
4807       [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); },
4808   }};
4809 }
4810 
4811 /// SelectArithImmed - Select an immediate value that can be represented as
4812 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
4813 /// Val set to the 12-bit value and Shift set to the shifter operand.
4814 InstructionSelector::ComplexRendererFns
4815 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const {
4816   // This function is called from the addsub_shifted_imm ComplexPattern,
4817   // which lists [imm] as the list of opcode it's interested in, however
4818   // we still need to check whether the operand is actually an immediate
4819   // here because the ComplexPattern opcode list is only used in
4820   // root-level opcode matching.
4821   auto MaybeImmed = getImmedFromMO(Root);
4822   if (MaybeImmed == None)
4823     return None;
4824   return select12BitValueWithLeftShift(*MaybeImmed);
4825 }
4826 
4827 /// SelectNegArithImmed - As above, but negates the value before trying to
4828 /// select it.
4829 InstructionSelector::ComplexRendererFns
4830 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const {
4831   // We need a register here, because we need to know if we have a 64 or 32
4832   // bit immediate.
4833   if (!Root.isReg())
4834     return None;
4835   auto MaybeImmed = getImmedFromMO(Root);
4836   if (MaybeImmed == None)
4837     return None;
4838   uint64_t Immed = *MaybeImmed;
4839 
4840   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
4841   // have the opposite effect on the C flag, so this pattern mustn't match under
4842   // those circumstances.
4843   if (Immed == 0)
4844     return None;
4845 
4846   // Check if we're dealing with a 32-bit type on the root or a 64-bit type on
4847   // the root.
4848   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4849   if (MRI.getType(Root.getReg()).getSizeInBits() == 32)
4850     Immed = ~((uint32_t)Immed) + 1;
4851   else
4852     Immed = ~Immed + 1ULL;
4853 
4854   if (Immed & 0xFFFFFFFFFF000000ULL)
4855     return None;
4856 
4857   Immed &= 0xFFFFFFULL;
4858   return select12BitValueWithLeftShift(Immed);
4859 }
4860 
4861 /// Return true if it is worth folding MI into an extended register. That is,
4862 /// if it's safe to pull it into the addressing mode of a load or store as a
4863 /// shift.
4864 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
4865     MachineInstr &MI, const MachineRegisterInfo &MRI) const {
4866   // Always fold if there is one use, or if we're optimizing for size.
4867   Register DefReg = MI.getOperand(0).getReg();
4868   if (MRI.hasOneNonDBGUse(DefReg) ||
4869       MI.getParent()->getParent()->getFunction().hasMinSize())
4870     return true;
4871 
4872   // It's better to avoid folding and recomputing shifts when we don't have a
4873   // fastpath.
4874   if (!STI.hasLSLFast())
4875     return false;
4876 
4877   // We have a fastpath, so folding a shift in and potentially computing it
4878   // many times may be beneficial. Check if this is only used in memory ops.
4879   // If it is, then we should fold.
4880   return all_of(MRI.use_nodbg_instructions(DefReg),
4881                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
4882 }
4883 
4884 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) {
4885   switch (Type) {
4886   case AArch64_AM::SXTB:
4887   case AArch64_AM::SXTH:
4888   case AArch64_AM::SXTW:
4889     return true;
4890   default:
4891     return false;
4892   }
4893 }
4894 
4895 InstructionSelector::ComplexRendererFns
4896 AArch64InstructionSelector::selectExtendedSHL(
4897     MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
4898     unsigned SizeInBytes, bool WantsExt) const {
4899   assert(Base.isReg() && "Expected base to be a register operand");
4900   assert(Offset.isReg() && "Expected offset to be a register operand");
4901 
4902   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
4903   MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
4904   if (!OffsetInst)
4905     return None;
4906 
4907   unsigned OffsetOpc = OffsetInst->getOpcode();
4908   bool LookedThroughZExt = false;
4909   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) {
4910     // Try to look through a ZEXT.
4911     if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt)
4912       return None;
4913 
4914     OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg());
4915     OffsetOpc = OffsetInst->getOpcode();
4916     LookedThroughZExt = true;
4917 
4918     if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
4919       return None;
4920   }
4921   // Make sure that the memory op is a valid size.
4922   int64_t LegalShiftVal = Log2_32(SizeInBytes);
4923   if (LegalShiftVal == 0)
4924     return None;
4925   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
4926     return None;
4927 
4928   // Now, try to find the specific G_CONSTANT. Start by assuming that the
4929   // register we will offset is the LHS, and the register containing the
4930   // constant is the RHS.
4931   Register OffsetReg = OffsetInst->getOperand(1).getReg();
4932   Register ConstantReg = OffsetInst->getOperand(2).getReg();
4933   auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4934   if (!ValAndVReg) {
4935     // We didn't get a constant on the RHS. If the opcode is a shift, then
4936     // we're done.
4937     if (OffsetOpc == TargetOpcode::G_SHL)
4938       return None;
4939 
4940     // If we have a G_MUL, we can use either register. Try looking at the RHS.
4941     std::swap(OffsetReg, ConstantReg);
4942     ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI);
4943     if (!ValAndVReg)
4944       return None;
4945   }
4946 
4947   // The value must fit into 3 bits, and must be positive. Make sure that is
4948   // true.
4949   int64_t ImmVal = ValAndVReg->Value;
4950 
4951   // Since we're going to pull this into a shift, the constant value must be
4952   // a power of 2. If we got a multiply, then we need to check this.
4953   if (OffsetOpc == TargetOpcode::G_MUL) {
4954     if (!isPowerOf2_32(ImmVal))
4955       return None;
4956 
4957     // Got a power of 2. So, the amount we'll shift is the log base-2 of that.
4958     ImmVal = Log2_32(ImmVal);
4959   }
4960 
4961   if ((ImmVal & 0x7) != ImmVal)
4962     return None;
4963 
4964   // We are only allowed to shift by LegalShiftVal. This shift value is built
4965   // into the instruction, so we can't just use whatever we want.
4966   if (ImmVal != LegalShiftVal)
4967     return None;
4968 
4969   unsigned SignExtend = 0;
4970   if (WantsExt) {
4971     // Check if the offset is defined by an extend, unless we looked through a
4972     // G_ZEXT earlier.
4973     if (!LookedThroughZExt) {
4974       MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
4975       auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
4976       if (Ext == AArch64_AM::InvalidShiftExtend)
4977         return None;
4978 
4979       SignExtend = isSignExtendShiftType(Ext) ? 1 : 0;
4980       // We only support SXTW for signed extension here.
4981       if (SignExtend && Ext != AArch64_AM::SXTW)
4982         return None;
4983       OffsetReg = ExtInst->getOperand(1).getReg();
4984     }
4985 
4986     // Need a 32-bit wide register here.
4987     MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
4988     OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB);
4989   }
4990 
4991   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
4992   // offset. Signify that we are shifting by setting the shift flag to 1.
4993   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
4994            [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
4995            [=](MachineInstrBuilder &MIB) {
4996              // Need to add both immediates here to make sure that they are both
4997              // added to the instruction.
4998              MIB.addImm(SignExtend);
4999              MIB.addImm(1);
5000            }}};
5001 }
5002 
5003 /// This is used for computing addresses like this:
5004 ///
5005 /// ldr x1, [x2, x3, lsl #3]
5006 ///
5007 /// Where x2 is the base register, and x3 is an offset register. The shift-left
5008 /// is a constant value specific to this load instruction. That is, we'll never
5009 /// see anything other than a 3 here (which corresponds to the size of the
5010 /// element being loaded.)
5011 InstructionSelector::ComplexRendererFns
5012 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
5013     MachineOperand &Root, unsigned SizeInBytes) const {
5014   if (!Root.isReg())
5015     return None;
5016   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5017 
5018   // We want to find something like this:
5019   //
5020   // val = G_CONSTANT LegalShiftVal
5021   // shift = G_SHL off_reg val
5022   // ptr = G_PTR_ADD base_reg shift
5023   // x = G_LOAD ptr
5024   //
5025   // And fold it into this addressing mode:
5026   //
5027   // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
5028 
5029   // Check if we can find the G_PTR_ADD.
5030   MachineInstr *PtrAdd =
5031       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5032   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5033     return None;
5034 
5035   // Now, try to match an opcode which will match our specific offset.
5036   // We want a G_SHL or a G_MUL.
5037   MachineInstr *OffsetInst =
5038       getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
5039   return selectExtendedSHL(Root, PtrAdd->getOperand(1),
5040                            OffsetInst->getOperand(0), SizeInBytes,
5041                            /*WantsExt=*/false);
5042 }
5043 
5044 /// This is used for computing addresses like this:
5045 ///
5046 /// ldr x1, [x2, x3]
5047 ///
5048 /// Where x2 is the base register, and x3 is an offset register.
5049 ///
5050 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation,
5051 /// this will do so. Otherwise, it will return None.
5052 InstructionSelector::ComplexRendererFns
5053 AArch64InstructionSelector::selectAddrModeRegisterOffset(
5054     MachineOperand &Root) const {
5055   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5056 
5057   // We need a GEP.
5058   MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
5059   if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD)
5060     return None;
5061 
5062   // If this is used more than once, let's not bother folding.
5063   // TODO: Check if they are memory ops. If they are, then we can still fold
5064   // without having to recompute anything.
5065   if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg()))
5066     return None;
5067 
5068   // Base is the GEP's LHS, offset is its RHS.
5069   return {{[=](MachineInstrBuilder &MIB) {
5070              MIB.addUse(Gep->getOperand(1).getReg());
5071            },
5072            [=](MachineInstrBuilder &MIB) {
5073              MIB.addUse(Gep->getOperand(2).getReg());
5074            },
5075            [=](MachineInstrBuilder &MIB) {
5076              // Need to add both immediates here to make sure that they are both
5077              // added to the instruction.
5078              MIB.addImm(0);
5079              MIB.addImm(0);
5080            }}};
5081 }
5082 
5083 /// This is intended to be equivalent to selectAddrModeXRO in
5084 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads.
5085 InstructionSelector::ComplexRendererFns
5086 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
5087                                               unsigned SizeInBytes) const {
5088   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5089 
5090   // If we have a constant offset, then we probably don't want to match a
5091   // register offset.
5092   if (isBaseWithConstantOffset(Root, MRI))
5093     return None;
5094 
5095   // Try to fold shifts into the addressing mode.
5096   auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes);
5097   if (AddrModeFns)
5098     return AddrModeFns;
5099 
5100   // If that doesn't work, see if it's possible to fold in registers from
5101   // a GEP.
5102   return selectAddrModeRegisterOffset(Root);
5103 }
5104 
5105 /// This is used for computing addresses like this:
5106 ///
5107 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
5108 ///
5109 /// Where we have a 64-bit base register, a 32-bit offset register, and an
5110 /// extend (which may or may not be signed).
5111 InstructionSelector::ComplexRendererFns
5112 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
5113                                               unsigned SizeInBytes) const {
5114   MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
5115 
5116   MachineInstr *PtrAdd =
5117       getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
5118   if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
5119     return None;
5120 
5121   MachineOperand &LHS = PtrAdd->getOperand(1);
5122   MachineOperand &RHS = PtrAdd->getOperand(2);
5123   MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
5124 
5125   // The first case is the same as selectAddrModeXRO, except we need an extend.
5126   // In this case, we try to find a shift and extend, and fold them into the
5127   // addressing mode.
5128   //
5129   // E.g.
5130   //
5131   // off_reg = G_Z/S/ANYEXT ext_reg
5132   // val = G_CONSTANT LegalShiftVal
5133   // shift = G_SHL off_reg val
5134   // ptr = G_PTR_ADD base_reg shift
5135   // x = G_LOAD ptr
5136   //
5137   // In this case we can get a load like this:
5138   //
5139   // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
5140   auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
5141                                        SizeInBytes, /*WantsExt=*/true);
5142   if (ExtendedShl)
5143     return ExtendedShl;
5144 
5145   // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
5146   //
5147   // e.g.
5148   // ldr something, [base_reg, ext_reg, sxtw]
5149   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
5150     return None;
5151 
5152   // Check if this is an extend. We'll get an extend type if it is.
5153   AArch64_AM::ShiftExtendType Ext =
5154       getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
5155   if (Ext == AArch64_AM::InvalidShiftExtend)
5156     return None;
5157 
5158   // Need a 32-bit wide register.
5159   MachineIRBuilder MIB(*PtrAdd);
5160   Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(),
5161                                        AArch64::GPR32RegClass, MIB);
5162   unsigned SignExtend = Ext == AArch64_AM::SXTW;
5163 
5164   // Base is LHS, offset is ExtReg.
5165   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
5166            [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5167            [=](MachineInstrBuilder &MIB) {
5168              MIB.addImm(SignExtend);
5169              MIB.addImm(0);
5170            }}};
5171 }
5172 
5173 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
5174 /// should only match when there is an offset that is not valid for a scaled
5175 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
5176 /// memory reference, which is needed here to know what is valid for a scaled
5177 /// immediate.
5178 InstructionSelector::ComplexRendererFns
5179 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root,
5180                                                    unsigned Size) const {
5181   MachineRegisterInfo &MRI =
5182       Root.getParent()->getParent()->getParent()->getRegInfo();
5183 
5184   if (!Root.isReg())
5185     return None;
5186 
5187   if (!isBaseWithConstantOffset(Root, MRI))
5188     return None;
5189 
5190   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5191   if (!RootDef)
5192     return None;
5193 
5194   MachineOperand &OffImm = RootDef->getOperand(2);
5195   if (!OffImm.isReg())
5196     return None;
5197   MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg());
5198   if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT)
5199     return None;
5200   int64_t RHSC;
5201   MachineOperand &RHSOp1 = RHS->getOperand(1);
5202   if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64)
5203     return None;
5204   RHSC = RHSOp1.getCImm()->getSExtValue();
5205 
5206   // If the offset is valid as a scaled immediate, don't match here.
5207   if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size)))
5208     return None;
5209   if (RHSC >= -256 && RHSC < 256) {
5210     MachineOperand &Base = RootDef->getOperand(1);
5211     return {{
5212         [=](MachineInstrBuilder &MIB) { MIB.add(Base); },
5213         [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); },
5214     }};
5215   }
5216   return None;
5217 }
5218 
5219 InstructionSelector::ComplexRendererFns
5220 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef,
5221                                                  unsigned Size,
5222                                                  MachineRegisterInfo &MRI) const {
5223   if (RootDef.getOpcode() != AArch64::G_ADD_LOW)
5224     return None;
5225   MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg());
5226   if (Adrp.getOpcode() != AArch64::ADRP)
5227     return None;
5228 
5229   // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG.
5230   // TODO: Need to check GV's offset % size if doing offset folding into globals.
5231   assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global");
5232   auto GV = Adrp.getOperand(1).getGlobal();
5233   if (GV->isThreadLocal())
5234     return None;
5235 
5236   auto &MF = *RootDef.getParent()->getParent();
5237   if (GV->getPointerAlignment(MF.getDataLayout()) < Size)
5238     return None;
5239 
5240   unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget());
5241   MachineIRBuilder MIRBuilder(RootDef);
5242   Register AdrpReg = Adrp.getOperand(0).getReg();
5243   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); },
5244            [=](MachineInstrBuilder &MIB) {
5245              MIB.addGlobalAddress(GV, /* Offset */ 0,
5246                                   OpFlags | AArch64II::MO_PAGEOFF |
5247                                       AArch64II::MO_NC);
5248            }}};
5249 }
5250 
5251 /// Select a "register plus scaled unsigned 12-bit immediate" address.  The
5252 /// "Size" argument is the size in bytes of the memory reference, which
5253 /// determines the scale.
5254 InstructionSelector::ComplexRendererFns
5255 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root,
5256                                                   unsigned Size) const {
5257   MachineFunction &MF = *Root.getParent()->getParent()->getParent();
5258   MachineRegisterInfo &MRI = MF.getRegInfo();
5259 
5260   if (!Root.isReg())
5261     return None;
5262 
5263   MachineInstr *RootDef = MRI.getVRegDef(Root.getReg());
5264   if (!RootDef)
5265     return None;
5266 
5267   if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
5268     return {{
5269         [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); },
5270         [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5271     }};
5272   }
5273 
5274   CodeModel::Model CM = MF.getTarget().getCodeModel();
5275   // Check if we can fold in the ADD of small code model ADRP + ADD address.
5276   if (CM == CodeModel::Small) {
5277     auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI);
5278     if (OpFns)
5279       return OpFns;
5280   }
5281 
5282   if (isBaseWithConstantOffset(Root, MRI)) {
5283     MachineOperand &LHS = RootDef->getOperand(1);
5284     MachineOperand &RHS = RootDef->getOperand(2);
5285     MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg());
5286     MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg());
5287     if (LHSDef && RHSDef) {
5288       int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue();
5289       unsigned Scale = Log2_32(Size);
5290       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
5291         if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
5292           return {{
5293               [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); },
5294               [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5295           }};
5296 
5297         return {{
5298             [=](MachineInstrBuilder &MIB) { MIB.add(LHS); },
5299             [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); },
5300         }};
5301       }
5302     }
5303   }
5304 
5305   // Before falling back to our general case, check if the unscaled
5306   // instructions can handle this. If so, that's preferable.
5307   if (selectAddrModeUnscaled(Root, Size).hasValue())
5308     return None;
5309 
5310   return {{
5311       [=](MachineInstrBuilder &MIB) { MIB.add(Root); },
5312       [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
5313   }};
5314 }
5315 
5316 /// Given a shift instruction, return the correct shift type for that
5317 /// instruction.
5318 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) {
5319   // TODO: Handle AArch64_AM::ROR
5320   switch (MI.getOpcode()) {
5321   default:
5322     return AArch64_AM::InvalidShiftExtend;
5323   case TargetOpcode::G_SHL:
5324     return AArch64_AM::LSL;
5325   case TargetOpcode::G_LSHR:
5326     return AArch64_AM::LSR;
5327   case TargetOpcode::G_ASHR:
5328     return AArch64_AM::ASR;
5329   }
5330 }
5331 
5332 /// Select a "shifted register" operand. If the value is not shifted, set the
5333 /// shift operand to a default value of "lsl 0".
5334 ///
5335 /// TODO: Allow shifted register to be rotated in logical instructions.
5336 InstructionSelector::ComplexRendererFns
5337 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
5338   if (!Root.isReg())
5339     return None;
5340   MachineRegisterInfo &MRI =
5341       Root.getParent()->getParent()->getParent()->getRegInfo();
5342 
5343   // Check if the operand is defined by an instruction which corresponds to
5344   // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc.
5345   //
5346   // TODO: Handle AArch64_AM::ROR for logical instructions.
5347   MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg());
5348   if (!ShiftInst)
5349     return None;
5350   AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst);
5351   if (ShType == AArch64_AM::InvalidShiftExtend)
5352     return None;
5353   if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI))
5354     return None;
5355 
5356   // Need an immediate on the RHS.
5357   MachineOperand &ShiftRHS = ShiftInst->getOperand(2);
5358   auto Immed = getImmedFromMO(ShiftRHS);
5359   if (!Immed)
5360     return None;
5361 
5362   // We have something that we can fold. Fold in the shift's LHS and RHS into
5363   // the instruction.
5364   MachineOperand &ShiftLHS = ShiftInst->getOperand(1);
5365   Register ShiftReg = ShiftLHS.getReg();
5366 
5367   unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits();
5368   unsigned Val = *Immed & (NumBits - 1);
5369   unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val);
5370 
5371   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); },
5372            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
5373 }
5374 
5375 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
5376     MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
5377   unsigned Opc = MI.getOpcode();
5378 
5379   // Handle explicit extend instructions first.
5380   if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) {
5381     unsigned Size;
5382     if (Opc == TargetOpcode::G_SEXT)
5383       Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5384     else
5385       Size = MI.getOperand(2).getImm();
5386     assert(Size != 64 && "Extend from 64 bits?");
5387     switch (Size) {
5388     case 8:
5389       return AArch64_AM::SXTB;
5390     case 16:
5391       return AArch64_AM::SXTH;
5392     case 32:
5393       return AArch64_AM::SXTW;
5394     default:
5395       return AArch64_AM::InvalidShiftExtend;
5396     }
5397   }
5398 
5399   if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) {
5400     unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5401     assert(Size != 64 && "Extend from 64 bits?");
5402     switch (Size) {
5403     case 8:
5404       return AArch64_AM::UXTB;
5405     case 16:
5406       return AArch64_AM::UXTH;
5407     case 32:
5408       return AArch64_AM::UXTW;
5409     default:
5410       return AArch64_AM::InvalidShiftExtend;
5411     }
5412   }
5413 
5414   // Don't have an explicit extend. Try to handle a G_AND with a constant mask
5415   // on the RHS.
5416   if (Opc != TargetOpcode::G_AND)
5417     return AArch64_AM::InvalidShiftExtend;
5418 
5419   Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2));
5420   if (!MaybeAndMask)
5421     return AArch64_AM::InvalidShiftExtend;
5422   uint64_t AndMask = *MaybeAndMask;
5423   switch (AndMask) {
5424   default:
5425     return AArch64_AM::InvalidShiftExtend;
5426   case 0xFF:
5427     return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
5428   case 0xFFFF:
5429     return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
5430   case 0xFFFFFFFF:
5431     return AArch64_AM::UXTW;
5432   }
5433 }
5434 
5435 Register AArch64InstructionSelector::moveScalarRegClass(
5436     Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const {
5437   MachineRegisterInfo &MRI = *MIB.getMRI();
5438   auto Ty = MRI.getType(Reg);
5439   assert(!Ty.isVector() && "Expected scalars only!");
5440   if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC))
5441     return Reg;
5442 
5443   // Create a copy and immediately select it.
5444   // FIXME: We should have an emitCopy function?
5445   auto Copy = MIB.buildCopy({&RC}, {Reg});
5446   selectCopy(*Copy, TII, MRI, TRI, RBI);
5447   return Copy.getReg(0);
5448 }
5449 
5450 /// Select an "extended register" operand. This operand folds in an extend
5451 /// followed by an optional left shift.
5452 InstructionSelector::ComplexRendererFns
5453 AArch64InstructionSelector::selectArithExtendedRegister(
5454     MachineOperand &Root) const {
5455   if (!Root.isReg())
5456     return None;
5457   MachineRegisterInfo &MRI =
5458       Root.getParent()->getParent()->getParent()->getRegInfo();
5459 
5460   uint64_t ShiftVal = 0;
5461   Register ExtReg;
5462   AArch64_AM::ShiftExtendType Ext;
5463   MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI);
5464   if (!RootDef)
5465     return None;
5466 
5467   if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI))
5468     return None;
5469 
5470   // Check if we can fold a shift and an extend.
5471   if (RootDef->getOpcode() == TargetOpcode::G_SHL) {
5472     // Look for a constant on the RHS of the shift.
5473     MachineOperand &RHS = RootDef->getOperand(2);
5474     Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS);
5475     if (!MaybeShiftVal)
5476       return None;
5477     ShiftVal = *MaybeShiftVal;
5478     if (ShiftVal > 4)
5479       return None;
5480     // Look for a valid extend instruction on the LHS of the shift.
5481     MachineOperand &LHS = RootDef->getOperand(1);
5482     MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI);
5483     if (!ExtDef)
5484       return None;
5485     Ext = getExtendTypeForInst(*ExtDef, MRI);
5486     if (Ext == AArch64_AM::InvalidShiftExtend)
5487       return None;
5488     ExtReg = ExtDef->getOperand(1).getReg();
5489   } else {
5490     // Didn't get a shift. Try just folding an extend.
5491     Ext = getExtendTypeForInst(*RootDef, MRI);
5492     if (Ext == AArch64_AM::InvalidShiftExtend)
5493       return None;
5494     ExtReg = RootDef->getOperand(1).getReg();
5495 
5496     // If we have a 32 bit instruction which zeroes out the high half of a
5497     // register, we get an implicit zero extend for free. Check if we have one.
5498     // FIXME: We actually emit the extend right now even though we don't have
5499     // to.
5500     if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) {
5501       MachineInstr *ExtInst = MRI.getVRegDef(ExtReg);
5502       if (ExtInst && isDef32(*ExtInst))
5503         return None;
5504     }
5505   }
5506 
5507   // We require a GPR32 here. Narrow the ExtReg if needed using a subregister
5508   // copy.
5509   MachineIRBuilder MIB(*RootDef);
5510   ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB);
5511 
5512   return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
5513            [=](MachineInstrBuilder &MIB) {
5514              MIB.addImm(getArithExtendImm(Ext, ShiftVal));
5515            }}};
5516 }
5517 
5518 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB,
5519                                                 const MachineInstr &MI,
5520                                                 int OpIdx) const {
5521   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5522   assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5523          "Expected G_CONSTANT");
5524   Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
5525   assert(CstVal && "Expected constant value");
5526   MIB.addImm(CstVal.getValue());
5527 }
5528 
5529 void AArch64InstructionSelector::renderLogicalImm32(
5530   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5531   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5532          "Expected G_CONSTANT");
5533   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5534   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32);
5535   MIB.addImm(Enc);
5536 }
5537 
5538 void AArch64InstructionSelector::renderLogicalImm64(
5539   MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const {
5540   assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 &&
5541          "Expected G_CONSTANT");
5542   uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue();
5543   uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64);
5544   MIB.addImm(Enc);
5545 }
5546 
5547 bool AArch64InstructionSelector::isLoadStoreOfNumBytes(
5548     const MachineInstr &MI, unsigned NumBytes) const {
5549   if (!MI.mayLoadOrStore())
5550     return false;
5551   assert(MI.hasOneMemOperand() &&
5552          "Expected load/store to have only one mem op!");
5553   return (*MI.memoperands_begin())->getSize() == NumBytes;
5554 }
5555 
5556 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const {
5557   const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5558   if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32)
5559     return false;
5560 
5561   // Only return true if we know the operation will zero-out the high half of
5562   // the 64-bit register. Truncates can be subregister copies, which don't
5563   // zero out the high bits. Copies and other copy-like instructions can be
5564   // fed by truncates, or could be lowered as subregister copies.
5565   switch (MI.getOpcode()) {
5566   default:
5567     return true;
5568   case TargetOpcode::COPY:
5569   case TargetOpcode::G_BITCAST:
5570   case TargetOpcode::G_TRUNC:
5571   case TargetOpcode::G_PHI:
5572     return false;
5573   }
5574 }
5575 
5576 
5577 // Perform fixups on the given PHI instruction's operands to force them all
5578 // to be the same as the destination regbank.
5579 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI,
5580                             const AArch64RegisterBankInfo &RBI) {
5581   assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI");
5582   Register DstReg = MI.getOperand(0).getReg();
5583   const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg);
5584   assert(DstRB && "Expected PHI dst to have regbank assigned");
5585   MachineIRBuilder MIB(MI);
5586 
5587   // Go through each operand and ensure it has the same regbank.
5588   for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
5589     MachineOperand &MO = MI.getOperand(OpIdx);
5590     if (!MO.isReg())
5591       continue;
5592     Register OpReg = MO.getReg();
5593     const RegisterBank *RB = MRI.getRegBankOrNull(OpReg);
5594     if (RB != DstRB) {
5595       // Insert a cross-bank copy.
5596       auto *OpDef = MRI.getVRegDef(OpReg);
5597       const LLT &Ty = MRI.getType(OpReg);
5598       MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator()));
5599       auto Copy = MIB.buildCopy(Ty, OpReg);
5600       MRI.setRegBank(Copy.getReg(0), *DstRB);
5601       MO.setReg(Copy.getReg(0));
5602     }
5603   }
5604 }
5605 
5606 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) {
5607   // We're looking for PHIs, build a list so we don't invalidate iterators.
5608   MachineRegisterInfo &MRI = MF.getRegInfo();
5609   SmallVector<MachineInstr *, 32> Phis;
5610   for (auto &BB : MF) {
5611     for (auto &MI : BB) {
5612       if (MI.getOpcode() == TargetOpcode::G_PHI)
5613         Phis.emplace_back(&MI);
5614     }
5615   }
5616 
5617   for (auto *MI : Phis) {
5618     // We need to do some work here if the operand types are < 16 bit and they
5619     // are split across fpr/gpr banks. Since all types <32b on gpr
5620     // end up being assigned gpr32 regclasses, we can end up with PHIs here
5621     // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't
5622     // be selecting heterogenous regbanks for operands if possible, but we
5623     // still need to be able to deal with it here.
5624     //
5625     // To fix this, if we have a gpr-bank operand < 32b in size and at least
5626     // one other operand is on the fpr bank, then we add cross-bank copies
5627     // to homogenize the operand banks. For simplicity the bank that we choose
5628     // to settle on is whatever bank the def operand has. For example:
5629     //
5630     // %endbb:
5631     //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2
5632     //  =>
5633     // %bb2:
5634     //   ...
5635     //   %in2_copy:gpr(s16) = COPY %in2:fpr(s16)
5636     //   ...
5637     // %endbb:
5638     //   %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2
5639     bool HasGPROp = false, HasFPROp = false;
5640     for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) {
5641       const auto &MO = MI->getOperand(OpIdx);
5642       if (!MO.isReg())
5643         continue;
5644       const LLT &Ty = MRI.getType(MO.getReg());
5645       if (!Ty.isValid() || !Ty.isScalar())
5646         break;
5647       if (Ty.getSizeInBits() >= 32)
5648         break;
5649       const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg());
5650       // If for some reason we don't have a regbank yet. Don't try anything.
5651       if (!RB)
5652         break;
5653 
5654       if (RB->getID() == AArch64::GPRRegBankID)
5655         HasGPROp = true;
5656       else
5657         HasFPROp = true;
5658     }
5659     // We have heterogenous regbanks, need to fixup.
5660     if (HasGPROp && HasFPROp)
5661       fixupPHIOpBanks(*MI, MRI, RBI);
5662   }
5663 }
5664 
5665 namespace llvm {
5666 InstructionSelector *
5667 createAArch64InstructionSelector(const AArch64TargetMachine &TM,
5668                                  AArch64Subtarget &Subtarget,
5669                                  AArch64RegisterBankInfo &RBI) {
5670   return new AArch64InstructionSelector(TM, Subtarget, RBI);
5671 }
5672 }
5673