xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp (revision c9539b89010900499a200cdd6c0265ea5d950875)
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Analysis/BranchProbabilityInfo.h"
27 #include "llvm/CodeGen/CallingConvLower.h"
28 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineConstantPool.h"
33 #include "llvm/CodeGen/MachineFrameInfo.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/RuntimeLibcalls.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/Argument.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CallingConv.h"
44 #include "llvm/IR/Constant.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DataLayout.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GetElementPtrTypeIterator.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/Operator.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/MC/MCInstrDesc.h"
61 #include "llvm/MC/MCRegisterInfo.h"
62 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/AtomicOrdering.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
67 #include "llvm/Support/ErrorHandling.h"
68 #include "llvm/Support/MachineValueType.h"
69 #include "llvm/Support/MathExtras.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81   class Address {
82   public:
83     using BaseKind = enum {
84       RegBase,
85       FrameIndexBase
86     };
87 
88   private:
89     BaseKind Kind = RegBase;
90     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
91     union {
92       unsigned Reg;
93       int FI;
94     } Base;
95     unsigned OffsetReg = 0;
96     unsigned Shift = 0;
97     int64_t Offset = 0;
98     const GlobalValue *GV = nullptr;
99 
100   public:
101     Address() { Base.Reg = 0; }
102 
103     void setKind(BaseKind K) { Kind = K; }
104     BaseKind getKind() const { return Kind; }
105     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107     bool isRegBase() const { return Kind == RegBase; }
108     bool isFIBase() const { return Kind == FrameIndexBase; }
109 
110     void setReg(unsigned Reg) {
111       assert(isRegBase() && "Invalid base register access!");
112       Base.Reg = Reg;
113     }
114 
115     unsigned getReg() const {
116       assert(isRegBase() && "Invalid base register access!");
117       return Base.Reg;
118     }
119 
120     void setOffsetReg(unsigned Reg) {
121       OffsetReg = Reg;
122     }
123 
124     unsigned getOffsetReg() const {
125       return OffsetReg;
126     }
127 
128     void setFI(unsigned FI) {
129       assert(isFIBase() && "Invalid base frame index  access!");
130       Base.FI = FI;
131     }
132 
133     unsigned getFI() const {
134       assert(isFIBase() && "Invalid base frame index access!");
135       return Base.FI;
136     }
137 
138     void setOffset(int64_t O) { Offset = O; }
139     int64_t getOffset() { return Offset; }
140     void setShift(unsigned S) { Shift = S; }
141     unsigned getShift() { return Shift; }
142 
143     void setGlobalValue(const GlobalValue *G) { GV = G; }
144     const GlobalValue *getGlobalValue() { return GV; }
145   };
146 
147   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148   /// make the right decision when generating code for different targets.
149   const AArch64Subtarget *Subtarget;
150   LLVMContext *Context;
151 
152   bool fastLowerArguments() override;
153   bool fastLowerCall(CallLoweringInfo &CLI) override;
154   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157   // Selection routines.
158   bool selectAddSub(const Instruction *I);
159   bool selectLogicalOp(const Instruction *I);
160   bool selectLoad(const Instruction *I);
161   bool selectStore(const Instruction *I);
162   bool selectBranch(const Instruction *I);
163   bool selectIndirectBr(const Instruction *I);
164   bool selectCmp(const Instruction *I);
165   bool selectSelect(const Instruction *I);
166   bool selectFPExt(const Instruction *I);
167   bool selectFPTrunc(const Instruction *I);
168   bool selectFPToInt(const Instruction *I, bool Signed);
169   bool selectIntToFP(const Instruction *I, bool Signed);
170   bool selectRem(const Instruction *I, unsigned ISDOpcode);
171   bool selectRet(const Instruction *I);
172   bool selectTrunc(const Instruction *I);
173   bool selectIntExt(const Instruction *I);
174   bool selectMul(const Instruction *I);
175   bool selectShift(const Instruction *I);
176   bool selectBitCast(const Instruction *I);
177   bool selectFRem(const Instruction *I);
178   bool selectSDiv(const Instruction *I);
179   bool selectGetElementPtr(const Instruction *I);
180   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182   // Utility helper routines.
183   bool isTypeLegal(Type *Ty, MVT &VT);
184   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185   bool isValueAvailable(const Value *V) const;
186   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187   bool computeCallAddress(const Value *V, Address &Addr);
188   bool simplifyAddress(Address &Addr, MVT VT);
189   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190                             MachineMemOperand::Flags Flags,
191                             unsigned ScaleFactor, MachineMemOperand *MMO);
192   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194                           unsigned Alignment);
195   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196                          const Value *Cond);
197   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198   bool optimizeSelect(const SelectInst *SI);
199   unsigned getRegForGEPIndex(const Value *Idx);
200 
201   // Emit helper routines.
202   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203                       const Value *RHS, bool SetFlags = false,
204                       bool WantResult = true,  bool IsZExt = false);
205   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206                          unsigned RHSReg, bool SetFlags = false,
207                          bool WantResult = true);
208   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209                          uint64_t Imm, bool SetFlags = false,
210                          bool WantResult = true);
211   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212                          unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
213                          uint64_t ShiftImm, bool SetFlags = false,
214                          bool WantResult = true);
215   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216                          unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
217                          uint64_t ShiftImm, bool SetFlags = false,
218                          bool WantResult = true);
219 
220   // Emit functions.
221   bool emitCompareAndBranch(const BranchInst *BI);
222   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
223   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
224   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
225   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
226   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
227                     MachineMemOperand *MMO = nullptr);
228   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
229                  MachineMemOperand *MMO = nullptr);
230   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
231                         MachineMemOperand *MMO = nullptr);
232   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
233   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
234   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
235                    bool SetFlags = false, bool WantResult = true,
236                    bool IsZExt = false);
237   unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
238   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
239                    bool SetFlags = false, bool WantResult = true,
240                    bool IsZExt = false);
241   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
242                        bool WantResult = true);
243   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
245                        bool WantResult = true);
246   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
247                          const Value *RHS);
248   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
249                             uint64_t Imm);
250   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251                             unsigned RHSReg, uint64_t ShiftImm);
252   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
253   unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
257   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
258                       bool IsZExt = true);
259   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
260   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
261                       bool IsZExt = true);
262   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
263   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
264                       bool IsZExt = false);
265 
266   unsigned materializeInt(const ConstantInt *CI, MVT VT);
267   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
268   unsigned materializeGV(const GlobalValue *GV);
269 
270   // Call handling routines.
271 private:
272   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
273   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
274                        unsigned &NumBytes);
275   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
276 
277 public:
278   // Backend specific FastISel code.
279   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
280   unsigned fastMaterializeConstant(const Constant *C) override;
281   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
282 
283   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
284                            const TargetLibraryInfo *LibInfo)
285       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
286     Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
287     Context = &FuncInfo.Fn->getContext();
288   }
289 
290   bool fastSelectInstruction(const Instruction *I) override;
291 
292 #include "AArch64GenFastISel.inc"
293 };
294 
295 } // end anonymous namespace
296 
297 /// Check if the sign-/zero-extend will be a noop.
298 static bool isIntExtFree(const Instruction *I) {
299   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
300          "Unexpected integer extend instruction.");
301   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
302          "Unexpected value type.");
303   bool IsZExt = isa<ZExtInst>(I);
304 
305   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
306     if (LI->hasOneUse())
307       return true;
308 
309   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
310     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
311       return true;
312 
313   return false;
314 }
315 
316 /// Determine the implicit scale factor that is applied by a memory
317 /// operation for a given value type.
318 static unsigned getImplicitScaleFactor(MVT VT) {
319   switch (VT.SimpleTy) {
320   default:
321     return 0;    // invalid
322   case MVT::i1:  // fall-through
323   case MVT::i8:
324     return 1;
325   case MVT::i16:
326     return 2;
327   case MVT::i32: // fall-through
328   case MVT::f32:
329     return 4;
330   case MVT::i64: // fall-through
331   case MVT::f64:
332     return 8;
333   }
334 }
335 
336 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
337   if (CC == CallingConv::WebKit_JS)
338     return CC_AArch64_WebKit_JS;
339   if (CC == CallingConv::GHC)
340     return CC_AArch64_GHC;
341   if (CC == CallingConv::CFGuard_Check)
342     return CC_AArch64_Win64_CFGuard_Check;
343   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
344 }
345 
346 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
347   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
348          "Alloca should always return a pointer.");
349 
350   // Don't handle dynamic allocas.
351   if (!FuncInfo.StaticAllocaMap.count(AI))
352     return 0;
353 
354   DenseMap<const AllocaInst *, int>::iterator SI =
355       FuncInfo.StaticAllocaMap.find(AI);
356 
357   if (SI != FuncInfo.StaticAllocaMap.end()) {
358     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
359     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
360             ResultReg)
361         .addFrameIndex(SI->second)
362         .addImm(0)
363         .addImm(0);
364     return ResultReg;
365   }
366 
367   return 0;
368 }
369 
370 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
371   if (VT > MVT::i64)
372     return 0;
373 
374   if (!CI->isZero())
375     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
376 
377   // Create a copy from the zero register to materialize a "0" value.
378   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
379                                                    : &AArch64::GPR32RegClass;
380   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
381   Register ResultReg = createResultReg(RC);
382   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
383           ResultReg).addReg(ZeroReg, getKillRegState(true));
384   return ResultReg;
385 }
386 
387 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
388   // Positive zero (+0.0) has to be materialized with a fmov from the zero
389   // register, because the immediate version of fmov cannot encode zero.
390   if (CFP->isNullValue())
391     return fastMaterializeFloatZero(CFP);
392 
393   if (VT != MVT::f32 && VT != MVT::f64)
394     return 0;
395 
396   const APFloat Val = CFP->getValueAPF();
397   bool Is64Bit = (VT == MVT::f64);
398   // This checks to see if we can use FMOV instructions to materialize
399   // a constant, otherwise we have to materialize via the constant pool.
400   int Imm =
401       Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
402   if (Imm != -1) {
403     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
404     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
405   }
406 
407   // For the large code model materialize the FP constant in code.
408   if (TM.getCodeModel() == CodeModel::Large) {
409     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
410     const TargetRegisterClass *RC = Is64Bit ?
411         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
412 
413     Register TmpReg = createResultReg(RC);
414     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
415         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
416 
417     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
418     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
419             TII.get(TargetOpcode::COPY), ResultReg)
420         .addReg(TmpReg, getKillRegState(true));
421 
422     return ResultReg;
423   }
424 
425   // Materialize via constant pool.  MachineConstantPool wants an explicit
426   // alignment.
427   Align Alignment = DL.getPrefTypeAlign(CFP->getType());
428 
429   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
430   Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
431   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
432           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
433 
434   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
435   Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
436   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
437       .addReg(ADRPReg)
438       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
439   return ResultReg;
440 }
441 
442 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
443   // We can't handle thread-local variables quickly yet.
444   if (GV->isThreadLocal())
445     return 0;
446 
447   // MachO still uses GOT for large code-model accesses, but ELF requires
448   // movz/movk sequences, which FastISel doesn't handle yet.
449   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
450     return 0;
451 
452   unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
453 
454   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
455   if (!DestEVT.isSimple())
456     return 0;
457 
458   Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
459   unsigned ResultReg;
460 
461   if (OpFlags & AArch64II::MO_GOT) {
462     // ADRP + LDRX
463     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
464             ADRPReg)
465         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
466 
467     unsigned LdrOpc;
468     if (Subtarget->isTargetILP32()) {
469       ResultReg = createResultReg(&AArch64::GPR32RegClass);
470       LdrOpc = AArch64::LDRWui;
471     } else {
472       ResultReg = createResultReg(&AArch64::GPR64RegClass);
473       LdrOpc = AArch64::LDRXui;
474     }
475     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
476             ResultReg)
477       .addReg(ADRPReg)
478       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
479                         AArch64II::MO_NC | OpFlags);
480     if (!Subtarget->isTargetILP32())
481       return ResultReg;
482 
483     // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
484     // so we must extend the result on ILP32.
485     Register Result64 = createResultReg(&AArch64::GPR64RegClass);
486     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
487             TII.get(TargetOpcode::SUBREG_TO_REG))
488         .addDef(Result64)
489         .addImm(0)
490         .addReg(ResultReg, RegState::Kill)
491         .addImm(AArch64::sub_32);
492     return Result64;
493   } else {
494     // ADRP + ADDX
495     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
496             ADRPReg)
497         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
498 
499     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
500     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
501             ResultReg)
502         .addReg(ADRPReg)
503         .addGlobalAddress(GV, 0,
504                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
505         .addImm(0);
506   }
507   return ResultReg;
508 }
509 
510 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
511   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
512 
513   // Only handle simple types.
514   if (!CEVT.isSimple())
515     return 0;
516   MVT VT = CEVT.getSimpleVT();
517   // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
518   // 'null' pointers need to have a somewhat special treatment.
519   if (isa<ConstantPointerNull>(C)) {
520     assert(VT == MVT::i64 && "Expected 64-bit pointers");
521     return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
522   }
523 
524   if (const auto *CI = dyn_cast<ConstantInt>(C))
525     return materializeInt(CI, VT);
526   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
527     return materializeFP(CFP, VT);
528   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
529     return materializeGV(GV);
530 
531   return 0;
532 }
533 
534 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
535   assert(CFP->isNullValue() &&
536          "Floating-point constant is not a positive zero.");
537   MVT VT;
538   if (!isTypeLegal(CFP->getType(), VT))
539     return 0;
540 
541   if (VT != MVT::f32 && VT != MVT::f64)
542     return 0;
543 
544   bool Is64Bit = (VT == MVT::f64);
545   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
546   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
547   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
548 }
549 
550 /// Check if the multiply is by a power-of-2 constant.
551 static bool isMulPowOf2(const Value *I) {
552   if (const auto *MI = dyn_cast<MulOperator>(I)) {
553     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
554       if (C->getValue().isPowerOf2())
555         return true;
556     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
557       if (C->getValue().isPowerOf2())
558         return true;
559   }
560   return false;
561 }
562 
563 // Computes the address to get to an object.
564 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
565 {
566   const User *U = nullptr;
567   unsigned Opcode = Instruction::UserOp1;
568   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
569     // Don't walk into other basic blocks unless the object is an alloca from
570     // another block, otherwise it may not have a virtual register assigned.
571     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
572         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
573       Opcode = I->getOpcode();
574       U = I;
575     }
576   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
577     Opcode = C->getOpcode();
578     U = C;
579   }
580 
581   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
582     if (Ty->getAddressSpace() > 255)
583       // Fast instruction selection doesn't support the special
584       // address spaces.
585       return false;
586 
587   switch (Opcode) {
588   default:
589     break;
590   case Instruction::BitCast:
591     // Look through bitcasts.
592     return computeAddress(U->getOperand(0), Addr, Ty);
593 
594   case Instruction::IntToPtr:
595     // Look past no-op inttoptrs.
596     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
597         TLI.getPointerTy(DL))
598       return computeAddress(U->getOperand(0), Addr, Ty);
599     break;
600 
601   case Instruction::PtrToInt:
602     // Look past no-op ptrtoints.
603     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
604       return computeAddress(U->getOperand(0), Addr, Ty);
605     break;
606 
607   case Instruction::GetElementPtr: {
608     Address SavedAddr = Addr;
609     uint64_t TmpOffset = Addr.getOffset();
610 
611     // Iterate through the GEP folding the constants into offsets where
612     // we can.
613     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
614          GTI != E; ++GTI) {
615       const Value *Op = GTI.getOperand();
616       if (StructType *STy = GTI.getStructTypeOrNull()) {
617         const StructLayout *SL = DL.getStructLayout(STy);
618         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
619         TmpOffset += SL->getElementOffset(Idx);
620       } else {
621         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
622         while (true) {
623           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
624             // Constant-offset addressing.
625             TmpOffset += CI->getSExtValue() * S;
626             break;
627           }
628           if (canFoldAddIntoGEP(U, Op)) {
629             // A compatible add with a constant operand. Fold the constant.
630             ConstantInt *CI =
631                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
632             TmpOffset += CI->getSExtValue() * S;
633             // Iterate on the other operand.
634             Op = cast<AddOperator>(Op)->getOperand(0);
635             continue;
636           }
637           // Unsupported
638           goto unsupported_gep;
639         }
640       }
641     }
642 
643     // Try to grab the base operand now.
644     Addr.setOffset(TmpOffset);
645     if (computeAddress(U->getOperand(0), Addr, Ty))
646       return true;
647 
648     // We failed, restore everything and try the other options.
649     Addr = SavedAddr;
650 
651   unsupported_gep:
652     break;
653   }
654   case Instruction::Alloca: {
655     const AllocaInst *AI = cast<AllocaInst>(Obj);
656     DenseMap<const AllocaInst *, int>::iterator SI =
657         FuncInfo.StaticAllocaMap.find(AI);
658     if (SI != FuncInfo.StaticAllocaMap.end()) {
659       Addr.setKind(Address::FrameIndexBase);
660       Addr.setFI(SI->second);
661       return true;
662     }
663     break;
664   }
665   case Instruction::Add: {
666     // Adds of constants are common and easy enough.
667     const Value *LHS = U->getOperand(0);
668     const Value *RHS = U->getOperand(1);
669 
670     if (isa<ConstantInt>(LHS))
671       std::swap(LHS, RHS);
672 
673     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
674       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
675       return computeAddress(LHS, Addr, Ty);
676     }
677 
678     Address Backup = Addr;
679     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
680       return true;
681     Addr = Backup;
682 
683     break;
684   }
685   case Instruction::Sub: {
686     // Subs of constants are common and easy enough.
687     const Value *LHS = U->getOperand(0);
688     const Value *RHS = U->getOperand(1);
689 
690     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
691       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
692       return computeAddress(LHS, Addr, Ty);
693     }
694     break;
695   }
696   case Instruction::Shl: {
697     if (Addr.getOffsetReg())
698       break;
699 
700     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
701     if (!CI)
702       break;
703 
704     unsigned Val = CI->getZExtValue();
705     if (Val < 1 || Val > 3)
706       break;
707 
708     uint64_t NumBytes = 0;
709     if (Ty && Ty->isSized()) {
710       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
711       NumBytes = NumBits / 8;
712       if (!isPowerOf2_64(NumBits))
713         NumBytes = 0;
714     }
715 
716     if (NumBytes != (1ULL << Val))
717       break;
718 
719     Addr.setShift(Val);
720     Addr.setExtendType(AArch64_AM::LSL);
721 
722     const Value *Src = U->getOperand(0);
723     if (const auto *I = dyn_cast<Instruction>(Src)) {
724       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
725         // Fold the zext or sext when it won't become a noop.
726         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
727           if (!isIntExtFree(ZE) &&
728               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
729             Addr.setExtendType(AArch64_AM::UXTW);
730             Src = ZE->getOperand(0);
731           }
732         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
733           if (!isIntExtFree(SE) &&
734               SE->getOperand(0)->getType()->isIntegerTy(32)) {
735             Addr.setExtendType(AArch64_AM::SXTW);
736             Src = SE->getOperand(0);
737           }
738         }
739       }
740     }
741 
742     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
743       if (AI->getOpcode() == Instruction::And) {
744         const Value *LHS = AI->getOperand(0);
745         const Value *RHS = AI->getOperand(1);
746 
747         if (const auto *C = dyn_cast<ConstantInt>(LHS))
748           if (C->getValue() == 0xffffffff)
749             std::swap(LHS, RHS);
750 
751         if (const auto *C = dyn_cast<ConstantInt>(RHS))
752           if (C->getValue() == 0xffffffff) {
753             Addr.setExtendType(AArch64_AM::UXTW);
754             Register Reg = getRegForValue(LHS);
755             if (!Reg)
756               return false;
757             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
758             Addr.setOffsetReg(Reg);
759             return true;
760           }
761       }
762 
763     Register Reg = getRegForValue(Src);
764     if (!Reg)
765       return false;
766     Addr.setOffsetReg(Reg);
767     return true;
768   }
769   case Instruction::Mul: {
770     if (Addr.getOffsetReg())
771       break;
772 
773     if (!isMulPowOf2(U))
774       break;
775 
776     const Value *LHS = U->getOperand(0);
777     const Value *RHS = U->getOperand(1);
778 
779     // Canonicalize power-of-2 value to the RHS.
780     if (const auto *C = dyn_cast<ConstantInt>(LHS))
781       if (C->getValue().isPowerOf2())
782         std::swap(LHS, RHS);
783 
784     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
785     const auto *C = cast<ConstantInt>(RHS);
786     unsigned Val = C->getValue().logBase2();
787     if (Val < 1 || Val > 3)
788       break;
789 
790     uint64_t NumBytes = 0;
791     if (Ty && Ty->isSized()) {
792       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
793       NumBytes = NumBits / 8;
794       if (!isPowerOf2_64(NumBits))
795         NumBytes = 0;
796     }
797 
798     if (NumBytes != (1ULL << Val))
799       break;
800 
801     Addr.setShift(Val);
802     Addr.setExtendType(AArch64_AM::LSL);
803 
804     const Value *Src = LHS;
805     if (const auto *I = dyn_cast<Instruction>(Src)) {
806       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
807         // Fold the zext or sext when it won't become a noop.
808         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
809           if (!isIntExtFree(ZE) &&
810               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
811             Addr.setExtendType(AArch64_AM::UXTW);
812             Src = ZE->getOperand(0);
813           }
814         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
815           if (!isIntExtFree(SE) &&
816               SE->getOperand(0)->getType()->isIntegerTy(32)) {
817             Addr.setExtendType(AArch64_AM::SXTW);
818             Src = SE->getOperand(0);
819           }
820         }
821       }
822     }
823 
824     Register Reg = getRegForValue(Src);
825     if (!Reg)
826       return false;
827     Addr.setOffsetReg(Reg);
828     return true;
829   }
830   case Instruction::And: {
831     if (Addr.getOffsetReg())
832       break;
833 
834     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
835       break;
836 
837     const Value *LHS = U->getOperand(0);
838     const Value *RHS = U->getOperand(1);
839 
840     if (const auto *C = dyn_cast<ConstantInt>(LHS))
841       if (C->getValue() == 0xffffffff)
842         std::swap(LHS, RHS);
843 
844     if (const auto *C = dyn_cast<ConstantInt>(RHS))
845       if (C->getValue() == 0xffffffff) {
846         Addr.setShift(0);
847         Addr.setExtendType(AArch64_AM::LSL);
848         Addr.setExtendType(AArch64_AM::UXTW);
849 
850         Register Reg = getRegForValue(LHS);
851         if (!Reg)
852           return false;
853         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
854         Addr.setOffsetReg(Reg);
855         return true;
856       }
857     break;
858   }
859   case Instruction::SExt:
860   case Instruction::ZExt: {
861     if (!Addr.getReg() || Addr.getOffsetReg())
862       break;
863 
864     const Value *Src = nullptr;
865     // Fold the zext or sext when it won't become a noop.
866     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
867       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
868         Addr.setExtendType(AArch64_AM::UXTW);
869         Src = ZE->getOperand(0);
870       }
871     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
872       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
873         Addr.setExtendType(AArch64_AM::SXTW);
874         Src = SE->getOperand(0);
875       }
876     }
877 
878     if (!Src)
879       break;
880 
881     Addr.setShift(0);
882     Register Reg = getRegForValue(Src);
883     if (!Reg)
884       return false;
885     Addr.setOffsetReg(Reg);
886     return true;
887   }
888   } // end switch
889 
890   if (Addr.isRegBase() && !Addr.getReg()) {
891     Register Reg = getRegForValue(Obj);
892     if (!Reg)
893       return false;
894     Addr.setReg(Reg);
895     return true;
896   }
897 
898   if (!Addr.getOffsetReg()) {
899     Register Reg = getRegForValue(Obj);
900     if (!Reg)
901       return false;
902     Addr.setOffsetReg(Reg);
903     return true;
904   }
905 
906   return false;
907 }
908 
909 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
910   const User *U = nullptr;
911   unsigned Opcode = Instruction::UserOp1;
912   bool InMBB = true;
913 
914   if (const auto *I = dyn_cast<Instruction>(V)) {
915     Opcode = I->getOpcode();
916     U = I;
917     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
918   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
919     Opcode = C->getOpcode();
920     U = C;
921   }
922 
923   switch (Opcode) {
924   default: break;
925   case Instruction::BitCast:
926     // Look past bitcasts if its operand is in the same BB.
927     if (InMBB)
928       return computeCallAddress(U->getOperand(0), Addr);
929     break;
930   case Instruction::IntToPtr:
931     // Look past no-op inttoptrs if its operand is in the same BB.
932     if (InMBB &&
933         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
934             TLI.getPointerTy(DL))
935       return computeCallAddress(U->getOperand(0), Addr);
936     break;
937   case Instruction::PtrToInt:
938     // Look past no-op ptrtoints if its operand is in the same BB.
939     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
940       return computeCallAddress(U->getOperand(0), Addr);
941     break;
942   }
943 
944   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
945     Addr.setGlobalValue(GV);
946     return true;
947   }
948 
949   // If all else fails, try to materialize the value in a register.
950   if (!Addr.getGlobalValue()) {
951     Addr.setReg(getRegForValue(V));
952     return Addr.getReg() != 0;
953   }
954 
955   return false;
956 }
957 
958 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
959   EVT evt = TLI.getValueType(DL, Ty, true);
960 
961   if (Subtarget->isTargetILP32() && Ty->isPointerTy())
962     return false;
963 
964   // Only handle simple types.
965   if (evt == MVT::Other || !evt.isSimple())
966     return false;
967   VT = evt.getSimpleVT();
968 
969   // This is a legal type, but it's not something we handle in fast-isel.
970   if (VT == MVT::f128)
971     return false;
972 
973   // Handle all other legal types, i.e. a register that will directly hold this
974   // value.
975   return TLI.isTypeLegal(VT);
976 }
977 
978 /// Determine if the value type is supported by FastISel.
979 ///
980 /// FastISel for AArch64 can handle more value types than are legal. This adds
981 /// simple value type such as i1, i8, and i16.
982 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
983   if (Ty->isVectorTy() && !IsVectorAllowed)
984     return false;
985 
986   if (isTypeLegal(Ty, VT))
987     return true;
988 
989   // If this is a type than can be sign or zero-extended to a basic operation
990   // go ahead and accept it now.
991   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
992     return true;
993 
994   return false;
995 }
996 
997 bool AArch64FastISel::isValueAvailable(const Value *V) const {
998   if (!isa<Instruction>(V))
999     return true;
1000 
1001   const auto *I = cast<Instruction>(V);
1002   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1003 }
1004 
1005 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1006   if (Subtarget->isTargetILP32())
1007     return false;
1008 
1009   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1010   if (!ScaleFactor)
1011     return false;
1012 
1013   bool ImmediateOffsetNeedsLowering = false;
1014   bool RegisterOffsetNeedsLowering = false;
1015   int64_t Offset = Addr.getOffset();
1016   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1017     ImmediateOffsetNeedsLowering = true;
1018   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1019            !isUInt<12>(Offset / ScaleFactor))
1020     ImmediateOffsetNeedsLowering = true;
1021 
1022   // Cannot encode an offset register and an immediate offset in the same
1023   // instruction. Fold the immediate offset into the load/store instruction and
1024   // emit an additional add to take care of the offset register.
1025   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1026     RegisterOffsetNeedsLowering = true;
1027 
1028   // Cannot encode zero register as base.
1029   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1030     RegisterOffsetNeedsLowering = true;
1031 
1032   // If this is a stack pointer and the offset needs to be simplified then put
1033   // the alloca address into a register, set the base type back to register and
1034   // continue. This should almost never happen.
1035   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1036   {
1037     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1038     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1039             ResultReg)
1040       .addFrameIndex(Addr.getFI())
1041       .addImm(0)
1042       .addImm(0);
1043     Addr.setKind(Address::RegBase);
1044     Addr.setReg(ResultReg);
1045   }
1046 
1047   if (RegisterOffsetNeedsLowering) {
1048     unsigned ResultReg = 0;
1049     if (Addr.getReg()) {
1050       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1051           Addr.getExtendType() == AArch64_AM::UXTW   )
1052         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1053                                   Addr.getOffsetReg(), Addr.getExtendType(),
1054                                   Addr.getShift());
1055       else
1056         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1057                                   Addr.getOffsetReg(), AArch64_AM::LSL,
1058                                   Addr.getShift());
1059     } else {
1060       if (Addr.getExtendType() == AArch64_AM::UXTW)
1061         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1062                                Addr.getShift(), /*IsZExt=*/true);
1063       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1064         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1065                                Addr.getShift(), /*IsZExt=*/false);
1066       else
1067         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1068                                Addr.getShift());
1069     }
1070     if (!ResultReg)
1071       return false;
1072 
1073     Addr.setReg(ResultReg);
1074     Addr.setOffsetReg(0);
1075     Addr.setShift(0);
1076     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1077   }
1078 
1079   // Since the offset is too large for the load/store instruction get the
1080   // reg+offset into a register.
1081   if (ImmediateOffsetNeedsLowering) {
1082     unsigned ResultReg;
1083     if (Addr.getReg())
1084       // Try to fold the immediate into the add instruction.
1085       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1086     else
1087       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1088 
1089     if (!ResultReg)
1090       return false;
1091     Addr.setReg(ResultReg);
1092     Addr.setOffset(0);
1093   }
1094   return true;
1095 }
1096 
1097 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1098                                            const MachineInstrBuilder &MIB,
1099                                            MachineMemOperand::Flags Flags,
1100                                            unsigned ScaleFactor,
1101                                            MachineMemOperand *MMO) {
1102   int64_t Offset = Addr.getOffset() / ScaleFactor;
1103   // Frame base works a bit differently. Handle it separately.
1104   if (Addr.isFIBase()) {
1105     int FI = Addr.getFI();
1106     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1107     // and alignment should be based on the VT.
1108     MMO = FuncInfo.MF->getMachineMemOperand(
1109         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1110         MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1111     // Now add the rest of the operands.
1112     MIB.addFrameIndex(FI).addImm(Offset);
1113   } else {
1114     assert(Addr.isRegBase() && "Unexpected address kind.");
1115     const MCInstrDesc &II = MIB->getDesc();
1116     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1117     Addr.setReg(
1118       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1119     Addr.setOffsetReg(
1120       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1121     if (Addr.getOffsetReg()) {
1122       assert(Addr.getOffset() == 0 && "Unexpected offset");
1123       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1124                       Addr.getExtendType() == AArch64_AM::SXTX;
1125       MIB.addReg(Addr.getReg());
1126       MIB.addReg(Addr.getOffsetReg());
1127       MIB.addImm(IsSigned);
1128       MIB.addImm(Addr.getShift() != 0);
1129     } else
1130       MIB.addReg(Addr.getReg()).addImm(Offset);
1131   }
1132 
1133   if (MMO)
1134     MIB.addMemOperand(MMO);
1135 }
1136 
1137 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1138                                      const Value *RHS, bool SetFlags,
1139                                      bool WantResult,  bool IsZExt) {
1140   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1141   bool NeedExtend = false;
1142   switch (RetVT.SimpleTy) {
1143   default:
1144     return 0;
1145   case MVT::i1:
1146     NeedExtend = true;
1147     break;
1148   case MVT::i8:
1149     NeedExtend = true;
1150     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1151     break;
1152   case MVT::i16:
1153     NeedExtend = true;
1154     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1155     break;
1156   case MVT::i32:  // fall-through
1157   case MVT::i64:
1158     break;
1159   }
1160   MVT SrcVT = RetVT;
1161   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1162 
1163   // Canonicalize immediates to the RHS first.
1164   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1165     std::swap(LHS, RHS);
1166 
1167   // Canonicalize mul by power of 2 to the RHS.
1168   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1169     if (isMulPowOf2(LHS))
1170       std::swap(LHS, RHS);
1171 
1172   // Canonicalize shift immediate to the RHS.
1173   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1174     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1175       if (isa<ConstantInt>(SI->getOperand(1)))
1176         if (SI->getOpcode() == Instruction::Shl  ||
1177             SI->getOpcode() == Instruction::LShr ||
1178             SI->getOpcode() == Instruction::AShr   )
1179           std::swap(LHS, RHS);
1180 
1181   Register LHSReg = getRegForValue(LHS);
1182   if (!LHSReg)
1183     return 0;
1184 
1185   if (NeedExtend)
1186     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1187 
1188   unsigned ResultReg = 0;
1189   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1190     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1191     if (C->isNegative())
1192       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1193                                 WantResult);
1194     else
1195       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1196                                 WantResult);
1197   } else if (const auto *C = dyn_cast<Constant>(RHS))
1198     if (C->isNullValue())
1199       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1200 
1201   if (ResultReg)
1202     return ResultReg;
1203 
1204   // Only extend the RHS within the instruction if there is a valid extend type.
1205   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1206       isValueAvailable(RHS)) {
1207     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1208       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1209         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1210           Register RHSReg = getRegForValue(SI->getOperand(0));
1211           if (!RHSReg)
1212             return 0;
1213           return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1214                                C->getZExtValue(), SetFlags, WantResult);
1215         }
1216     Register RHSReg = getRegForValue(RHS);
1217     if (!RHSReg)
1218       return 0;
1219     return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1220                          SetFlags, WantResult);
1221   }
1222 
1223   // Check if the mul can be folded into the instruction.
1224   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1225     if (isMulPowOf2(RHS)) {
1226       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1227       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1228 
1229       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1230         if (C->getValue().isPowerOf2())
1231           std::swap(MulLHS, MulRHS);
1232 
1233       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1234       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1235       Register RHSReg = getRegForValue(MulLHS);
1236       if (!RHSReg)
1237         return 0;
1238       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1239                                 ShiftVal, SetFlags, WantResult);
1240       if (ResultReg)
1241         return ResultReg;
1242     }
1243   }
1244 
1245   // Check if the shift can be folded into the instruction.
1246   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1247     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1248       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1249         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1250         switch (SI->getOpcode()) {
1251         default: break;
1252         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1253         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1254         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1255         }
1256         uint64_t ShiftVal = C->getZExtValue();
1257         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1258           Register RHSReg = getRegForValue(SI->getOperand(0));
1259           if (!RHSReg)
1260             return 0;
1261           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1262                                     ShiftVal, SetFlags, WantResult);
1263           if (ResultReg)
1264             return ResultReg;
1265         }
1266       }
1267     }
1268   }
1269 
1270   Register RHSReg = getRegForValue(RHS);
1271   if (!RHSReg)
1272     return 0;
1273 
1274   if (NeedExtend)
1275     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 
1277   return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1278 }
1279 
1280 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1281                                         unsigned RHSReg, bool SetFlags,
1282                                         bool WantResult) {
1283   assert(LHSReg && RHSReg && "Invalid register number.");
1284 
1285   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1286       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1287     return 0;
1288 
1289   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1290     return 0;
1291 
1292   static const unsigned OpcTable[2][2][2] = {
1293     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1294       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1295     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1296       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1297   };
1298   bool Is64Bit = RetVT == MVT::i64;
1299   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1300   const TargetRegisterClass *RC =
1301       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1302   unsigned ResultReg;
1303   if (WantResult)
1304     ResultReg = createResultReg(RC);
1305   else
1306     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1307 
1308   const MCInstrDesc &II = TII.get(Opc);
1309   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1310   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1311   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1312       .addReg(LHSReg)
1313       .addReg(RHSReg);
1314   return ResultReg;
1315 }
1316 
1317 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1318                                         uint64_t Imm, bool SetFlags,
1319                                         bool WantResult) {
1320   assert(LHSReg && "Invalid register number.");
1321 
1322   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1323     return 0;
1324 
1325   unsigned ShiftImm;
1326   if (isUInt<12>(Imm))
1327     ShiftImm = 0;
1328   else if ((Imm & 0xfff000) == Imm) {
1329     ShiftImm = 12;
1330     Imm >>= 12;
1331   } else
1332     return 0;
1333 
1334   static const unsigned OpcTable[2][2][2] = {
1335     { { AArch64::SUBWri,  AArch64::SUBXri  },
1336       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1337     { { AArch64::SUBSWri, AArch64::SUBSXri },
1338       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1339   };
1340   bool Is64Bit = RetVT == MVT::i64;
1341   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1342   const TargetRegisterClass *RC;
1343   if (SetFlags)
1344     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1345   else
1346     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1347   unsigned ResultReg;
1348   if (WantResult)
1349     ResultReg = createResultReg(RC);
1350   else
1351     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1352 
1353   const MCInstrDesc &II = TII.get(Opc);
1354   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1355   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1356       .addReg(LHSReg)
1357       .addImm(Imm)
1358       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1359   return ResultReg;
1360 }
1361 
1362 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1363                                         unsigned RHSReg,
1364                                         AArch64_AM::ShiftExtendType ShiftType,
1365                                         uint64_t ShiftImm, bool SetFlags,
1366                                         bool WantResult) {
1367   assert(LHSReg && RHSReg && "Invalid register number.");
1368   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1369          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1370 
1371   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1372     return 0;
1373 
1374   // Don't deal with undefined shifts.
1375   if (ShiftImm >= RetVT.getSizeInBits())
1376     return 0;
1377 
1378   static const unsigned OpcTable[2][2][2] = {
1379     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1380       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1381     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1382       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1383   };
1384   bool Is64Bit = RetVT == MVT::i64;
1385   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1386   const TargetRegisterClass *RC =
1387       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1388   unsigned ResultReg;
1389   if (WantResult)
1390     ResultReg = createResultReg(RC);
1391   else
1392     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1393 
1394   const MCInstrDesc &II = TII.get(Opc);
1395   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1396   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1397   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1398       .addReg(LHSReg)
1399       .addReg(RHSReg)
1400       .addImm(getShifterImm(ShiftType, ShiftImm));
1401   return ResultReg;
1402 }
1403 
1404 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1405                                         unsigned RHSReg,
1406                                         AArch64_AM::ShiftExtendType ExtType,
1407                                         uint64_t ShiftImm, bool SetFlags,
1408                                         bool WantResult) {
1409   assert(LHSReg && RHSReg && "Invalid register number.");
1410   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412 
1413   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414     return 0;
1415 
1416   if (ShiftImm >= 4)
1417     return 0;
1418 
1419   static const unsigned OpcTable[2][2][2] = {
1420     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1421       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1422     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1424   };
1425   bool Is64Bit = RetVT == MVT::i64;
1426   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427   const TargetRegisterClass *RC = nullptr;
1428   if (SetFlags)
1429     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430   else
1431     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432   unsigned ResultReg;
1433   if (WantResult)
1434     ResultReg = createResultReg(RC);
1435   else
1436     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1437 
1438   const MCInstrDesc &II = TII.get(Opc);
1439   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442       .addReg(LHSReg)
1443       .addReg(RHSReg)
1444       .addImm(getArithExtendImm(ExtType, ShiftImm));
1445   return ResultReg;
1446 }
1447 
1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449   Type *Ty = LHS->getType();
1450   EVT EVT = TLI.getValueType(DL, Ty, true);
1451   if (!EVT.isSimple())
1452     return false;
1453   MVT VT = EVT.getSimpleVT();
1454 
1455   switch (VT.SimpleTy) {
1456   default:
1457     return false;
1458   case MVT::i1:
1459   case MVT::i8:
1460   case MVT::i16:
1461   case MVT::i32:
1462   case MVT::i64:
1463     return emitICmp(VT, LHS, RHS, IsZExt);
1464   case MVT::f32:
1465   case MVT::f64:
1466     return emitFCmp(VT, LHS, RHS);
1467   }
1468 }
1469 
1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471                                bool IsZExt) {
1472   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473                  IsZExt) != 0;
1474 }
1475 
1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1477   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1478                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1479 }
1480 
1481 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1482   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1483     return false;
1484 
1485   // Check to see if the 2nd operand is a constant that we can encode directly
1486   // in the compare.
1487   bool UseImm = false;
1488   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1489     if (CFP->isZero() && !CFP->isNegative())
1490       UseImm = true;
1491 
1492   Register LHSReg = getRegForValue(LHS);
1493   if (!LHSReg)
1494     return false;
1495 
1496   if (UseImm) {
1497     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1498     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1499         .addReg(LHSReg);
1500     return true;
1501   }
1502 
1503   Register RHSReg = getRegForValue(RHS);
1504   if (!RHSReg)
1505     return false;
1506 
1507   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1508   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1509       .addReg(LHSReg)
1510       .addReg(RHSReg);
1511   return true;
1512 }
1513 
1514 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1515                                   bool SetFlags, bool WantResult, bool IsZExt) {
1516   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1517                     IsZExt);
1518 }
1519 
1520 /// This method is a wrapper to simplify add emission.
1521 ///
1522 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1523 /// that fails, then try to materialize the immediate into a register and use
1524 /// emitAddSub_rr instead.
1525 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1526   unsigned ResultReg;
1527   if (Imm < 0)
1528     ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1529   else
1530     ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1531 
1532   if (ResultReg)
1533     return ResultReg;
1534 
1535   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1536   if (!CReg)
1537     return 0;
1538 
1539   ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1540   return ResultReg;
1541 }
1542 
1543 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1544                                   bool SetFlags, bool WantResult, bool IsZExt) {
1545   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1546                     IsZExt);
1547 }
1548 
1549 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1550                                       unsigned RHSReg, bool WantResult) {
1551   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1552                        /*SetFlags=*/true, WantResult);
1553 }
1554 
1555 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1556                                       unsigned RHSReg,
1557                                       AArch64_AM::ShiftExtendType ShiftType,
1558                                       uint64_t ShiftImm, bool WantResult) {
1559   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1560                        ShiftImm, /*SetFlags=*/true, WantResult);
1561 }
1562 
1563 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1564                                         const Value *LHS, const Value *RHS) {
1565   // Canonicalize immediates to the RHS first.
1566   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1567     std::swap(LHS, RHS);
1568 
1569   // Canonicalize mul by power-of-2 to the RHS.
1570   if (LHS->hasOneUse() && isValueAvailable(LHS))
1571     if (isMulPowOf2(LHS))
1572       std::swap(LHS, RHS);
1573 
1574   // Canonicalize shift immediate to the RHS.
1575   if (LHS->hasOneUse() && isValueAvailable(LHS))
1576     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1577       if (isa<ConstantInt>(SI->getOperand(1)))
1578         std::swap(LHS, RHS);
1579 
1580   Register LHSReg = getRegForValue(LHS);
1581   if (!LHSReg)
1582     return 0;
1583 
1584   unsigned ResultReg = 0;
1585   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1586     uint64_t Imm = C->getZExtValue();
1587     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1588   }
1589   if (ResultReg)
1590     return ResultReg;
1591 
1592   // Check if the mul can be folded into the instruction.
1593   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1594     if (isMulPowOf2(RHS)) {
1595       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1596       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1597 
1598       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1599         if (C->getValue().isPowerOf2())
1600           std::swap(MulLHS, MulRHS);
1601 
1602       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1603       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1604 
1605       Register RHSReg = getRegForValue(MulLHS);
1606       if (!RHSReg)
1607         return 0;
1608       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1609       if (ResultReg)
1610         return ResultReg;
1611     }
1612   }
1613 
1614   // Check if the shift can be folded into the instruction.
1615   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1616     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1617       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1618         uint64_t ShiftVal = C->getZExtValue();
1619         Register RHSReg = getRegForValue(SI->getOperand(0));
1620         if (!RHSReg)
1621           return 0;
1622         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1623         if (ResultReg)
1624           return ResultReg;
1625       }
1626   }
1627 
1628   Register RHSReg = getRegForValue(RHS);
1629   if (!RHSReg)
1630     return 0;
1631 
1632   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1633   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1634   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1635     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1636     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1637   }
1638   return ResultReg;
1639 }
1640 
1641 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1642                                            unsigned LHSReg, uint64_t Imm) {
1643   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1644                 "ISD nodes are not consecutive!");
1645   static const unsigned OpcTable[3][2] = {
1646     { AArch64::ANDWri, AArch64::ANDXri },
1647     { AArch64::ORRWri, AArch64::ORRXri },
1648     { AArch64::EORWri, AArch64::EORXri }
1649   };
1650   const TargetRegisterClass *RC;
1651   unsigned Opc;
1652   unsigned RegSize;
1653   switch (RetVT.SimpleTy) {
1654   default:
1655     return 0;
1656   case MVT::i1:
1657   case MVT::i8:
1658   case MVT::i16:
1659   case MVT::i32: {
1660     unsigned Idx = ISDOpc - ISD::AND;
1661     Opc = OpcTable[Idx][0];
1662     RC = &AArch64::GPR32spRegClass;
1663     RegSize = 32;
1664     break;
1665   }
1666   case MVT::i64:
1667     Opc = OpcTable[ISDOpc - ISD::AND][1];
1668     RC = &AArch64::GPR64spRegClass;
1669     RegSize = 64;
1670     break;
1671   }
1672 
1673   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1674     return 0;
1675 
1676   Register ResultReg =
1677       fastEmitInst_ri(Opc, RC, LHSReg,
1678                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1679   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1680     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1681     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1682   }
1683   return ResultReg;
1684 }
1685 
1686 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1687                                            unsigned LHSReg, unsigned RHSReg,
1688                                            uint64_t ShiftImm) {
1689   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1690                 "ISD nodes are not consecutive!");
1691   static const unsigned OpcTable[3][2] = {
1692     { AArch64::ANDWrs, AArch64::ANDXrs },
1693     { AArch64::ORRWrs, AArch64::ORRXrs },
1694     { AArch64::EORWrs, AArch64::EORXrs }
1695   };
1696 
1697   // Don't deal with undefined shifts.
1698   if (ShiftImm >= RetVT.getSizeInBits())
1699     return 0;
1700 
1701   const TargetRegisterClass *RC;
1702   unsigned Opc;
1703   switch (RetVT.SimpleTy) {
1704   default:
1705     return 0;
1706   case MVT::i1:
1707   case MVT::i8:
1708   case MVT::i16:
1709   case MVT::i32:
1710     Opc = OpcTable[ISDOpc - ISD::AND][0];
1711     RC = &AArch64::GPR32RegClass;
1712     break;
1713   case MVT::i64:
1714     Opc = OpcTable[ISDOpc - ISD::AND][1];
1715     RC = &AArch64::GPR64RegClass;
1716     break;
1717   }
1718   Register ResultReg =
1719       fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1720                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1721   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1722     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1723     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1724   }
1725   return ResultReg;
1726 }
1727 
1728 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1729                                      uint64_t Imm) {
1730   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1731 }
1732 
1733 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1734                                    bool WantZExt, MachineMemOperand *MMO) {
1735   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1736     return 0;
1737 
1738   // Simplify this down to something we can handle.
1739   if (!simplifyAddress(Addr, VT))
1740     return 0;
1741 
1742   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1743   if (!ScaleFactor)
1744     llvm_unreachable("Unexpected value type.");
1745 
1746   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1747   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1748   bool UseScaled = true;
1749   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1750     UseScaled = false;
1751     ScaleFactor = 1;
1752   }
1753 
1754   static const unsigned GPOpcTable[2][8][4] = {
1755     // Sign-extend.
1756     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1757         AArch64::LDURXi  },
1758       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1759         AArch64::LDURXi  },
1760       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1761         AArch64::LDRXui  },
1762       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1763         AArch64::LDRXui  },
1764       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1765         AArch64::LDRXroX },
1766       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1767         AArch64::LDRXroX },
1768       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1769         AArch64::LDRXroW },
1770       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1771         AArch64::LDRXroW }
1772     },
1773     // Zero-extend.
1774     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1775         AArch64::LDURXi  },
1776       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1777         AArch64::LDURXi  },
1778       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1779         AArch64::LDRXui  },
1780       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1781         AArch64::LDRXui  },
1782       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1783         AArch64::LDRXroX },
1784       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1785         AArch64::LDRXroX },
1786       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1787         AArch64::LDRXroW },
1788       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1789         AArch64::LDRXroW }
1790     }
1791   };
1792 
1793   static const unsigned FPOpcTable[4][2] = {
1794     { AArch64::LDURSi,  AArch64::LDURDi  },
1795     { AArch64::LDRSui,  AArch64::LDRDui  },
1796     { AArch64::LDRSroX, AArch64::LDRDroX },
1797     { AArch64::LDRSroW, AArch64::LDRDroW }
1798   };
1799 
1800   unsigned Opc;
1801   const TargetRegisterClass *RC;
1802   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1803                       Addr.getOffsetReg();
1804   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1805   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1806       Addr.getExtendType() == AArch64_AM::SXTW)
1807     Idx++;
1808 
1809   bool IsRet64Bit = RetVT == MVT::i64;
1810   switch (VT.SimpleTy) {
1811   default:
1812     llvm_unreachable("Unexpected value type.");
1813   case MVT::i1: // Intentional fall-through.
1814   case MVT::i8:
1815     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1816     RC = (IsRet64Bit && !WantZExt) ?
1817              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1818     break;
1819   case MVT::i16:
1820     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1821     RC = (IsRet64Bit && !WantZExt) ?
1822              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1823     break;
1824   case MVT::i32:
1825     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1826     RC = (IsRet64Bit && !WantZExt) ?
1827              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1828     break;
1829   case MVT::i64:
1830     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1831     RC = &AArch64::GPR64RegClass;
1832     break;
1833   case MVT::f32:
1834     Opc = FPOpcTable[Idx][0];
1835     RC = &AArch64::FPR32RegClass;
1836     break;
1837   case MVT::f64:
1838     Opc = FPOpcTable[Idx][1];
1839     RC = &AArch64::FPR64RegClass;
1840     break;
1841   }
1842 
1843   // Create the base instruction, then add the operands.
1844   Register ResultReg = createResultReg(RC);
1845   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1846                                     TII.get(Opc), ResultReg);
1847   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1848 
1849   // Loading an i1 requires special handling.
1850   if (VT == MVT::i1) {
1851     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1852     assert(ANDReg && "Unexpected AND instruction emission failure.");
1853     ResultReg = ANDReg;
1854   }
1855 
1856   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1857   // the 32bit reg to a 64bit reg.
1858   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1859     Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1860     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1862         .addImm(0)
1863         .addReg(ResultReg, getKillRegState(true))
1864         .addImm(AArch64::sub_32);
1865     ResultReg = Reg64;
1866   }
1867   return ResultReg;
1868 }
1869 
1870 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1871   MVT VT;
1872   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1873     return false;
1874 
1875   if (VT.isVector())
1876     return selectOperator(I, I->getOpcode());
1877 
1878   unsigned ResultReg;
1879   switch (I->getOpcode()) {
1880   default:
1881     llvm_unreachable("Unexpected instruction.");
1882   case Instruction::Add:
1883     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1884     break;
1885   case Instruction::Sub:
1886     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1887     break;
1888   }
1889   if (!ResultReg)
1890     return false;
1891 
1892   updateValueMap(I, ResultReg);
1893   return true;
1894 }
1895 
1896 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1897   MVT VT;
1898   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1899     return false;
1900 
1901   if (VT.isVector())
1902     return selectOperator(I, I->getOpcode());
1903 
1904   unsigned ResultReg;
1905   switch (I->getOpcode()) {
1906   default:
1907     llvm_unreachable("Unexpected instruction.");
1908   case Instruction::And:
1909     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1910     break;
1911   case Instruction::Or:
1912     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1913     break;
1914   case Instruction::Xor:
1915     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1916     break;
1917   }
1918   if (!ResultReg)
1919     return false;
1920 
1921   updateValueMap(I, ResultReg);
1922   return true;
1923 }
1924 
1925 bool AArch64FastISel::selectLoad(const Instruction *I) {
1926   MVT VT;
1927   // Verify we have a legal type before going any further.  Currently, we handle
1928   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1929   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1930   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1931       cast<LoadInst>(I)->isAtomic())
1932     return false;
1933 
1934   const Value *SV = I->getOperand(0);
1935   if (TLI.supportSwiftError()) {
1936     // Swifterror values can come from either a function parameter with
1937     // swifterror attribute or an alloca with swifterror attribute.
1938     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1939       if (Arg->hasSwiftErrorAttr())
1940         return false;
1941     }
1942 
1943     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1944       if (Alloca->isSwiftError())
1945         return false;
1946     }
1947   }
1948 
1949   // See if we can handle this address.
1950   Address Addr;
1951   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1952     return false;
1953 
1954   // Fold the following sign-/zero-extend into the load instruction.
1955   bool WantZExt = true;
1956   MVT RetVT = VT;
1957   const Value *IntExtVal = nullptr;
1958   if (I->hasOneUse()) {
1959     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1960       if (isTypeSupported(ZE->getType(), RetVT))
1961         IntExtVal = ZE;
1962       else
1963         RetVT = VT;
1964     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1965       if (isTypeSupported(SE->getType(), RetVT))
1966         IntExtVal = SE;
1967       else
1968         RetVT = VT;
1969       WantZExt = false;
1970     }
1971   }
1972 
1973   unsigned ResultReg =
1974       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1975   if (!ResultReg)
1976     return false;
1977 
1978   // There are a few different cases we have to handle, because the load or the
1979   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1980   // SelectionDAG. There is also an ordering issue when both instructions are in
1981   // different basic blocks.
1982   // 1.) The load instruction is selected by FastISel, but the integer extend
1983   //     not. This usually happens when the integer extend is in a different
1984   //     basic block and SelectionDAG took over for that basic block.
1985   // 2.) The load instruction is selected before the integer extend. This only
1986   //     happens when the integer extend is in a different basic block.
1987   // 3.) The load instruction is selected by SelectionDAG and the integer extend
1988   //     by FastISel. This happens if there are instructions between the load
1989   //     and the integer extend that couldn't be selected by FastISel.
1990   if (IntExtVal) {
1991     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1992     // could select it. Emit a copy to subreg if necessary. FastISel will remove
1993     // it when it selects the integer extend.
1994     Register Reg = lookUpRegForValue(IntExtVal);
1995     auto *MI = MRI.getUniqueVRegDef(Reg);
1996     if (!MI) {
1997       if (RetVT == MVT::i64 && VT <= MVT::i32) {
1998         if (WantZExt) {
1999           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2000           MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2001           ResultReg = std::prev(I)->getOperand(0).getReg();
2002           removeDeadCode(I, std::next(I));
2003         } else
2004           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2005                                                  AArch64::sub_32);
2006       }
2007       updateValueMap(I, ResultReg);
2008       return true;
2009     }
2010 
2011     // The integer extend has already been emitted - delete all the instructions
2012     // that have been emitted by the integer extend lowering code and use the
2013     // result from the load instruction directly.
2014     while (MI) {
2015       Reg = 0;
2016       for (auto &Opnd : MI->uses()) {
2017         if (Opnd.isReg()) {
2018           Reg = Opnd.getReg();
2019           break;
2020         }
2021       }
2022       MachineBasicBlock::iterator I(MI);
2023       removeDeadCode(I, std::next(I));
2024       MI = nullptr;
2025       if (Reg)
2026         MI = MRI.getUniqueVRegDef(Reg);
2027     }
2028     updateValueMap(IntExtVal, ResultReg);
2029     return true;
2030   }
2031 
2032   updateValueMap(I, ResultReg);
2033   return true;
2034 }
2035 
2036 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2037                                        unsigned AddrReg,
2038                                        MachineMemOperand *MMO) {
2039   unsigned Opc;
2040   switch (VT.SimpleTy) {
2041   default: return false;
2042   case MVT::i8:  Opc = AArch64::STLRB; break;
2043   case MVT::i16: Opc = AArch64::STLRH; break;
2044   case MVT::i32: Opc = AArch64::STLRW; break;
2045   case MVT::i64: Opc = AArch64::STLRX; break;
2046   }
2047 
2048   const MCInstrDesc &II = TII.get(Opc);
2049   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2050   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2051   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2052       .addReg(SrcReg)
2053       .addReg(AddrReg)
2054       .addMemOperand(MMO);
2055   return true;
2056 }
2057 
2058 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2059                                 MachineMemOperand *MMO) {
2060   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2061     return false;
2062 
2063   // Simplify this down to something we can handle.
2064   if (!simplifyAddress(Addr, VT))
2065     return false;
2066 
2067   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2068   if (!ScaleFactor)
2069     llvm_unreachable("Unexpected value type.");
2070 
2071   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2072   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2073   bool UseScaled = true;
2074   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2075     UseScaled = false;
2076     ScaleFactor = 1;
2077   }
2078 
2079   static const unsigned OpcTable[4][6] = {
2080     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2081       AArch64::STURSi,   AArch64::STURDi },
2082     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2083       AArch64::STRSui,   AArch64::STRDui },
2084     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2085       AArch64::STRSroX,  AArch64::STRDroX },
2086     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2087       AArch64::STRSroW,  AArch64::STRDroW }
2088   };
2089 
2090   unsigned Opc;
2091   bool VTIsi1 = false;
2092   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2093                       Addr.getOffsetReg();
2094   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2095   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2096       Addr.getExtendType() == AArch64_AM::SXTW)
2097     Idx++;
2098 
2099   switch (VT.SimpleTy) {
2100   default: llvm_unreachable("Unexpected value type.");
2101   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2102   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2103   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2104   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2105   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2106   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2107   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2108   }
2109 
2110   // Storing an i1 requires special handling.
2111   if (VTIsi1 && SrcReg != AArch64::WZR) {
2112     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2113     assert(ANDReg && "Unexpected AND instruction emission failure.");
2114     SrcReg = ANDReg;
2115   }
2116   // Create the base instruction, then add the operands.
2117   const MCInstrDesc &II = TII.get(Opc);
2118   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2119   MachineInstrBuilder MIB =
2120       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2121   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2122 
2123   return true;
2124 }
2125 
2126 bool AArch64FastISel::selectStore(const Instruction *I) {
2127   MVT VT;
2128   const Value *Op0 = I->getOperand(0);
2129   // Verify we have a legal type before going any further.  Currently, we handle
2130   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2131   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2132   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2133     return false;
2134 
2135   const Value *PtrV = I->getOperand(1);
2136   if (TLI.supportSwiftError()) {
2137     // Swifterror values can come from either a function parameter with
2138     // swifterror attribute or an alloca with swifterror attribute.
2139     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2140       if (Arg->hasSwiftErrorAttr())
2141         return false;
2142     }
2143 
2144     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2145       if (Alloca->isSwiftError())
2146         return false;
2147     }
2148   }
2149 
2150   // Get the value to be stored into a register. Use the zero register directly
2151   // when possible to avoid an unnecessary copy and a wasted register.
2152   unsigned SrcReg = 0;
2153   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2154     if (CI->isZero())
2155       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2156   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2157     if (CF->isZero() && !CF->isNegative()) {
2158       VT = MVT::getIntegerVT(VT.getSizeInBits());
2159       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2160     }
2161   }
2162 
2163   if (!SrcReg)
2164     SrcReg = getRegForValue(Op0);
2165 
2166   if (!SrcReg)
2167     return false;
2168 
2169   auto *SI = cast<StoreInst>(I);
2170 
2171   // Try to emit a STLR for seq_cst/release.
2172   if (SI->isAtomic()) {
2173     AtomicOrdering Ord = SI->getOrdering();
2174     // The non-atomic instructions are sufficient for relaxed stores.
2175     if (isReleaseOrStronger(Ord)) {
2176       // The STLR addressing mode only supports a base reg; pass that directly.
2177       Register AddrReg = getRegForValue(PtrV);
2178       return emitStoreRelease(VT, SrcReg, AddrReg,
2179                               createMachineMemOperandFor(I));
2180     }
2181   }
2182 
2183   // See if we can handle this address.
2184   Address Addr;
2185   if (!computeAddress(PtrV, Addr, Op0->getType()))
2186     return false;
2187 
2188   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2189     return false;
2190   return true;
2191 }
2192 
2193 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2194   switch (Pred) {
2195   case CmpInst::FCMP_ONE:
2196   case CmpInst::FCMP_UEQ:
2197   default:
2198     // AL is our "false" for now. The other two need more compares.
2199     return AArch64CC::AL;
2200   case CmpInst::ICMP_EQ:
2201   case CmpInst::FCMP_OEQ:
2202     return AArch64CC::EQ;
2203   case CmpInst::ICMP_SGT:
2204   case CmpInst::FCMP_OGT:
2205     return AArch64CC::GT;
2206   case CmpInst::ICMP_SGE:
2207   case CmpInst::FCMP_OGE:
2208     return AArch64CC::GE;
2209   case CmpInst::ICMP_UGT:
2210   case CmpInst::FCMP_UGT:
2211     return AArch64CC::HI;
2212   case CmpInst::FCMP_OLT:
2213     return AArch64CC::MI;
2214   case CmpInst::ICMP_ULE:
2215   case CmpInst::FCMP_OLE:
2216     return AArch64CC::LS;
2217   case CmpInst::FCMP_ORD:
2218     return AArch64CC::VC;
2219   case CmpInst::FCMP_UNO:
2220     return AArch64CC::VS;
2221   case CmpInst::FCMP_UGE:
2222     return AArch64CC::PL;
2223   case CmpInst::ICMP_SLT:
2224   case CmpInst::FCMP_ULT:
2225     return AArch64CC::LT;
2226   case CmpInst::ICMP_SLE:
2227   case CmpInst::FCMP_ULE:
2228     return AArch64CC::LE;
2229   case CmpInst::FCMP_UNE:
2230   case CmpInst::ICMP_NE:
2231     return AArch64CC::NE;
2232   case CmpInst::ICMP_UGE:
2233     return AArch64CC::HS;
2234   case CmpInst::ICMP_ULT:
2235     return AArch64CC::LO;
2236   }
2237 }
2238 
2239 /// Try to emit a combined compare-and-branch instruction.
2240 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2241   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2242   // will not be produced, as they are conditional branch instructions that do
2243   // not set flags.
2244   if (FuncInfo.MF->getFunction().hasFnAttribute(
2245           Attribute::SpeculativeLoadHardening))
2246     return false;
2247 
2248   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2249   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2250   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2251 
2252   const Value *LHS = CI->getOperand(0);
2253   const Value *RHS = CI->getOperand(1);
2254 
2255   MVT VT;
2256   if (!isTypeSupported(LHS->getType(), VT))
2257     return false;
2258 
2259   unsigned BW = VT.getSizeInBits();
2260   if (BW > 64)
2261     return false;
2262 
2263   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2264   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2265 
2266   // Try to take advantage of fallthrough opportunities.
2267   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2268     std::swap(TBB, FBB);
2269     Predicate = CmpInst::getInversePredicate(Predicate);
2270   }
2271 
2272   int TestBit = -1;
2273   bool IsCmpNE;
2274   switch (Predicate) {
2275   default:
2276     return false;
2277   case CmpInst::ICMP_EQ:
2278   case CmpInst::ICMP_NE:
2279     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2280       std::swap(LHS, RHS);
2281 
2282     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2283       return false;
2284 
2285     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2286       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2287         const Value *AndLHS = AI->getOperand(0);
2288         const Value *AndRHS = AI->getOperand(1);
2289 
2290         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2291           if (C->getValue().isPowerOf2())
2292             std::swap(AndLHS, AndRHS);
2293 
2294         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2295           if (C->getValue().isPowerOf2()) {
2296             TestBit = C->getValue().logBase2();
2297             LHS = AndLHS;
2298           }
2299       }
2300 
2301     if (VT == MVT::i1)
2302       TestBit = 0;
2303 
2304     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2305     break;
2306   case CmpInst::ICMP_SLT:
2307   case CmpInst::ICMP_SGE:
2308     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2309       return false;
2310 
2311     TestBit = BW - 1;
2312     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2313     break;
2314   case CmpInst::ICMP_SGT:
2315   case CmpInst::ICMP_SLE:
2316     if (!isa<ConstantInt>(RHS))
2317       return false;
2318 
2319     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2320       return false;
2321 
2322     TestBit = BW - 1;
2323     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2324     break;
2325   } // end switch
2326 
2327   static const unsigned OpcTable[2][2][2] = {
2328     { {AArch64::CBZW,  AArch64::CBZX },
2329       {AArch64::CBNZW, AArch64::CBNZX} },
2330     { {AArch64::TBZW,  AArch64::TBZX },
2331       {AArch64::TBNZW, AArch64::TBNZX} }
2332   };
2333 
2334   bool IsBitTest = TestBit != -1;
2335   bool Is64Bit = BW == 64;
2336   if (TestBit < 32 && TestBit >= 0)
2337     Is64Bit = false;
2338 
2339   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2340   const MCInstrDesc &II = TII.get(Opc);
2341 
2342   Register SrcReg = getRegForValue(LHS);
2343   if (!SrcReg)
2344     return false;
2345 
2346   if (BW == 64 && !Is64Bit)
2347     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2348 
2349   if ((BW < 32) && !IsBitTest)
2350     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2351 
2352   // Emit the combined compare and branch instruction.
2353   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2354   MachineInstrBuilder MIB =
2355       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2356           .addReg(SrcReg);
2357   if (IsBitTest)
2358     MIB.addImm(TestBit);
2359   MIB.addMBB(TBB);
2360 
2361   finishCondBranch(BI->getParent(), TBB, FBB);
2362   return true;
2363 }
2364 
2365 bool AArch64FastISel::selectBranch(const Instruction *I) {
2366   const BranchInst *BI = cast<BranchInst>(I);
2367   if (BI->isUnconditional()) {
2368     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2369     fastEmitBranch(MSucc, BI->getDebugLoc());
2370     return true;
2371   }
2372 
2373   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2374   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2375 
2376   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2377     if (CI->hasOneUse() && isValueAvailable(CI)) {
2378       // Try to optimize or fold the cmp.
2379       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2380       switch (Predicate) {
2381       default:
2382         break;
2383       case CmpInst::FCMP_FALSE:
2384         fastEmitBranch(FBB, DbgLoc);
2385         return true;
2386       case CmpInst::FCMP_TRUE:
2387         fastEmitBranch(TBB, DbgLoc);
2388         return true;
2389       }
2390 
2391       // Try to emit a combined compare-and-branch first.
2392       if (emitCompareAndBranch(BI))
2393         return true;
2394 
2395       // Try to take advantage of fallthrough opportunities.
2396       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2397         std::swap(TBB, FBB);
2398         Predicate = CmpInst::getInversePredicate(Predicate);
2399       }
2400 
2401       // Emit the cmp.
2402       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2403         return false;
2404 
2405       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2406       // instruction.
2407       AArch64CC::CondCode CC = getCompareCC(Predicate);
2408       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2409       switch (Predicate) {
2410       default:
2411         break;
2412       case CmpInst::FCMP_UEQ:
2413         ExtraCC = AArch64CC::EQ;
2414         CC = AArch64CC::VS;
2415         break;
2416       case CmpInst::FCMP_ONE:
2417         ExtraCC = AArch64CC::MI;
2418         CC = AArch64CC::GT;
2419         break;
2420       }
2421       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2422 
2423       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2424       if (ExtraCC != AArch64CC::AL) {
2425         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2426             .addImm(ExtraCC)
2427             .addMBB(TBB);
2428       }
2429 
2430       // Emit the branch.
2431       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2432           .addImm(CC)
2433           .addMBB(TBB);
2434 
2435       finishCondBranch(BI->getParent(), TBB, FBB);
2436       return true;
2437     }
2438   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2439     uint64_t Imm = CI->getZExtValue();
2440     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2441     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2442         .addMBB(Target);
2443 
2444     // Obtain the branch probability and add the target to the successor list.
2445     if (FuncInfo.BPI) {
2446       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2447           BI->getParent(), Target->getBasicBlock());
2448       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2449     } else
2450       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2451     return true;
2452   } else {
2453     AArch64CC::CondCode CC = AArch64CC::NE;
2454     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2455       // Fake request the condition, otherwise the intrinsic might be completely
2456       // optimized away.
2457       Register CondReg = getRegForValue(BI->getCondition());
2458       if (!CondReg)
2459         return false;
2460 
2461       // Emit the branch.
2462       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2463         .addImm(CC)
2464         .addMBB(TBB);
2465 
2466       finishCondBranch(BI->getParent(), TBB, FBB);
2467       return true;
2468     }
2469   }
2470 
2471   Register CondReg = getRegForValue(BI->getCondition());
2472   if (CondReg == 0)
2473     return false;
2474 
2475   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2476   unsigned Opcode = AArch64::TBNZW;
2477   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2478     std::swap(TBB, FBB);
2479     Opcode = AArch64::TBZW;
2480   }
2481 
2482   const MCInstrDesc &II = TII.get(Opcode);
2483   Register ConstrainedCondReg
2484     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2485   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2486       .addReg(ConstrainedCondReg)
2487       .addImm(0)
2488       .addMBB(TBB);
2489 
2490   finishCondBranch(BI->getParent(), TBB, FBB);
2491   return true;
2492 }
2493 
2494 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2495   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2496   Register AddrReg = getRegForValue(BI->getOperand(0));
2497   if (AddrReg == 0)
2498     return false;
2499 
2500   // Emit the indirect branch.
2501   const MCInstrDesc &II = TII.get(AArch64::BR);
2502   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2503   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2504 
2505   // Make sure the CFG is up-to-date.
2506   for (auto *Succ : BI->successors())
2507     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2508 
2509   return true;
2510 }
2511 
2512 bool AArch64FastISel::selectCmp(const Instruction *I) {
2513   const CmpInst *CI = cast<CmpInst>(I);
2514 
2515   // Vectors of i1 are weird: bail out.
2516   if (CI->getType()->isVectorTy())
2517     return false;
2518 
2519   // Try to optimize or fold the cmp.
2520   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2521   unsigned ResultReg = 0;
2522   switch (Predicate) {
2523   default:
2524     break;
2525   case CmpInst::FCMP_FALSE:
2526     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2527     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2528             TII.get(TargetOpcode::COPY), ResultReg)
2529         .addReg(AArch64::WZR, getKillRegState(true));
2530     break;
2531   case CmpInst::FCMP_TRUE:
2532     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2533     break;
2534   }
2535 
2536   if (ResultReg) {
2537     updateValueMap(I, ResultReg);
2538     return true;
2539   }
2540 
2541   // Emit the cmp.
2542   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2543     return false;
2544 
2545   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546 
2547   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2548   // condition codes are inverted, because they are used by CSINC.
2549   static unsigned CondCodeTable[2][2] = {
2550     { AArch64CC::NE, AArch64CC::VC },
2551     { AArch64CC::PL, AArch64CC::LE }
2552   };
2553   unsigned *CondCodes = nullptr;
2554   switch (Predicate) {
2555   default:
2556     break;
2557   case CmpInst::FCMP_UEQ:
2558     CondCodes = &CondCodeTable[0][0];
2559     break;
2560   case CmpInst::FCMP_ONE:
2561     CondCodes = &CondCodeTable[1][0];
2562     break;
2563   }
2564 
2565   if (CondCodes) {
2566     Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2567     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2568             TmpReg1)
2569         .addReg(AArch64::WZR, getKillRegState(true))
2570         .addReg(AArch64::WZR, getKillRegState(true))
2571         .addImm(CondCodes[0]);
2572     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2573             ResultReg)
2574         .addReg(TmpReg1, getKillRegState(true))
2575         .addReg(AArch64::WZR, getKillRegState(true))
2576         .addImm(CondCodes[1]);
2577 
2578     updateValueMap(I, ResultReg);
2579     return true;
2580   }
2581 
2582   // Now set a register based on the comparison.
2583   AArch64CC::CondCode CC = getCompareCC(Predicate);
2584   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2585   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2586   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587           ResultReg)
2588       .addReg(AArch64::WZR, getKillRegState(true))
2589       .addReg(AArch64::WZR, getKillRegState(true))
2590       .addImm(invertedCC);
2591 
2592   updateValueMap(I, ResultReg);
2593   return true;
2594 }
2595 
2596 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2597 /// value.
2598 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2599   if (!SI->getType()->isIntegerTy(1))
2600     return false;
2601 
2602   const Value *Src1Val, *Src2Val;
2603   unsigned Opc = 0;
2604   bool NeedExtraOp = false;
2605   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2606     if (CI->isOne()) {
2607       Src1Val = SI->getCondition();
2608       Src2Val = SI->getFalseValue();
2609       Opc = AArch64::ORRWrr;
2610     } else {
2611       assert(CI->isZero());
2612       Src1Val = SI->getFalseValue();
2613       Src2Val = SI->getCondition();
2614       Opc = AArch64::BICWrr;
2615     }
2616   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2617     if (CI->isOne()) {
2618       Src1Val = SI->getCondition();
2619       Src2Val = SI->getTrueValue();
2620       Opc = AArch64::ORRWrr;
2621       NeedExtraOp = true;
2622     } else {
2623       assert(CI->isZero());
2624       Src1Val = SI->getCondition();
2625       Src2Val = SI->getTrueValue();
2626       Opc = AArch64::ANDWrr;
2627     }
2628   }
2629 
2630   if (!Opc)
2631     return false;
2632 
2633   Register Src1Reg = getRegForValue(Src1Val);
2634   if (!Src1Reg)
2635     return false;
2636 
2637   Register Src2Reg = getRegForValue(Src2Val);
2638   if (!Src2Reg)
2639     return false;
2640 
2641   if (NeedExtraOp)
2642     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2643 
2644   Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2645                                        Src2Reg);
2646   updateValueMap(SI, ResultReg);
2647   return true;
2648 }
2649 
2650 bool AArch64FastISel::selectSelect(const Instruction *I) {
2651   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2652   MVT VT;
2653   if (!isTypeSupported(I->getType(), VT))
2654     return false;
2655 
2656   unsigned Opc;
2657   const TargetRegisterClass *RC;
2658   switch (VT.SimpleTy) {
2659   default:
2660     return false;
2661   case MVT::i1:
2662   case MVT::i8:
2663   case MVT::i16:
2664   case MVT::i32:
2665     Opc = AArch64::CSELWr;
2666     RC = &AArch64::GPR32RegClass;
2667     break;
2668   case MVT::i64:
2669     Opc = AArch64::CSELXr;
2670     RC = &AArch64::GPR64RegClass;
2671     break;
2672   case MVT::f32:
2673     Opc = AArch64::FCSELSrrr;
2674     RC = &AArch64::FPR32RegClass;
2675     break;
2676   case MVT::f64:
2677     Opc = AArch64::FCSELDrrr;
2678     RC = &AArch64::FPR64RegClass;
2679     break;
2680   }
2681 
2682   const SelectInst *SI = cast<SelectInst>(I);
2683   const Value *Cond = SI->getCondition();
2684   AArch64CC::CondCode CC = AArch64CC::NE;
2685   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2686 
2687   if (optimizeSelect(SI))
2688     return true;
2689 
2690   // Try to pickup the flags, so we don't have to emit another compare.
2691   if (foldXALUIntrinsic(CC, I, Cond)) {
2692     // Fake request the condition to force emission of the XALU intrinsic.
2693     Register CondReg = getRegForValue(Cond);
2694     if (!CondReg)
2695       return false;
2696   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2697              isValueAvailable(Cond)) {
2698     const auto *Cmp = cast<CmpInst>(Cond);
2699     // Try to optimize or fold the cmp.
2700     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2701     const Value *FoldSelect = nullptr;
2702     switch (Predicate) {
2703     default:
2704       break;
2705     case CmpInst::FCMP_FALSE:
2706       FoldSelect = SI->getFalseValue();
2707       break;
2708     case CmpInst::FCMP_TRUE:
2709       FoldSelect = SI->getTrueValue();
2710       break;
2711     }
2712 
2713     if (FoldSelect) {
2714       Register SrcReg = getRegForValue(FoldSelect);
2715       if (!SrcReg)
2716         return false;
2717 
2718       updateValueMap(I, SrcReg);
2719       return true;
2720     }
2721 
2722     // Emit the cmp.
2723     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2724       return false;
2725 
2726     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2727     CC = getCompareCC(Predicate);
2728     switch (Predicate) {
2729     default:
2730       break;
2731     case CmpInst::FCMP_UEQ:
2732       ExtraCC = AArch64CC::EQ;
2733       CC = AArch64CC::VS;
2734       break;
2735     case CmpInst::FCMP_ONE:
2736       ExtraCC = AArch64CC::MI;
2737       CC = AArch64CC::GT;
2738       break;
2739     }
2740     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2741   } else {
2742     Register CondReg = getRegForValue(Cond);
2743     if (!CondReg)
2744       return false;
2745 
2746     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2747     CondReg = constrainOperandRegClass(II, CondReg, 1);
2748 
2749     // Emit a TST instruction (ANDS wzr, reg, #imm).
2750     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2751             AArch64::WZR)
2752         .addReg(CondReg)
2753         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2754   }
2755 
2756   Register Src1Reg = getRegForValue(SI->getTrueValue());
2757   Register Src2Reg = getRegForValue(SI->getFalseValue());
2758 
2759   if (!Src1Reg || !Src2Reg)
2760     return false;
2761 
2762   if (ExtraCC != AArch64CC::AL)
2763     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2764 
2765   Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2766   updateValueMap(I, ResultReg);
2767   return true;
2768 }
2769 
2770 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2771   Value *V = I->getOperand(0);
2772   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2773     return false;
2774 
2775   Register Op = getRegForValue(V);
2776   if (Op == 0)
2777     return false;
2778 
2779   Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2780   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2781           ResultReg).addReg(Op);
2782   updateValueMap(I, ResultReg);
2783   return true;
2784 }
2785 
2786 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2787   Value *V = I->getOperand(0);
2788   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2789     return false;
2790 
2791   Register Op = getRegForValue(V);
2792   if (Op == 0)
2793     return false;
2794 
2795   Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2796   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2797           ResultReg).addReg(Op);
2798   updateValueMap(I, ResultReg);
2799   return true;
2800 }
2801 
2802 // FPToUI and FPToSI
2803 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2804   MVT DestVT;
2805   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2806     return false;
2807 
2808   Register SrcReg = getRegForValue(I->getOperand(0));
2809   if (SrcReg == 0)
2810     return false;
2811 
2812   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2813   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2814     return false;
2815 
2816   unsigned Opc;
2817   if (SrcVT == MVT::f64) {
2818     if (Signed)
2819       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2820     else
2821       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2822   } else {
2823     if (Signed)
2824       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2825     else
2826       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2827   }
2828   Register ResultReg = createResultReg(
2829       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2830   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2831       .addReg(SrcReg);
2832   updateValueMap(I, ResultReg);
2833   return true;
2834 }
2835 
2836 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2837   MVT DestVT;
2838   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2839     return false;
2840   // Let regular ISEL handle FP16
2841   if (DestVT == MVT::f16)
2842     return false;
2843 
2844   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2845          "Unexpected value type.");
2846 
2847   Register SrcReg = getRegForValue(I->getOperand(0));
2848   if (!SrcReg)
2849     return false;
2850 
2851   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2852 
2853   // Handle sign-extension.
2854   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2855     SrcReg =
2856         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2857     if (!SrcReg)
2858       return false;
2859   }
2860 
2861   unsigned Opc;
2862   if (SrcVT == MVT::i64) {
2863     if (Signed)
2864       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2865     else
2866       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2867   } else {
2868     if (Signed)
2869       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2870     else
2871       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2872   }
2873 
2874   Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2875   updateValueMap(I, ResultReg);
2876   return true;
2877 }
2878 
2879 bool AArch64FastISel::fastLowerArguments() {
2880   if (!FuncInfo.CanLowerReturn)
2881     return false;
2882 
2883   const Function *F = FuncInfo.Fn;
2884   if (F->isVarArg())
2885     return false;
2886 
2887   CallingConv::ID CC = F->getCallingConv();
2888   if (CC != CallingConv::C && CC != CallingConv::Swift)
2889     return false;
2890 
2891   if (Subtarget->hasCustomCallingConv())
2892     return false;
2893 
2894   // Only handle simple cases of up to 8 GPR and FPR each.
2895   unsigned GPRCnt = 0;
2896   unsigned FPRCnt = 0;
2897   for (auto const &Arg : F->args()) {
2898     if (Arg.hasAttribute(Attribute::ByVal) ||
2899         Arg.hasAttribute(Attribute::InReg) ||
2900         Arg.hasAttribute(Attribute::StructRet) ||
2901         Arg.hasAttribute(Attribute::SwiftSelf) ||
2902         Arg.hasAttribute(Attribute::SwiftAsync) ||
2903         Arg.hasAttribute(Attribute::SwiftError) ||
2904         Arg.hasAttribute(Attribute::Nest))
2905       return false;
2906 
2907     Type *ArgTy = Arg.getType();
2908     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2909       return false;
2910 
2911     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2912     if (!ArgVT.isSimple())
2913       return false;
2914 
2915     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2916     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2917       return false;
2918 
2919     if (VT.isVector() &&
2920         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2921       return false;
2922 
2923     if (VT >= MVT::i1 && VT <= MVT::i64)
2924       ++GPRCnt;
2925     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2926              VT.is128BitVector())
2927       ++FPRCnt;
2928     else
2929       return false;
2930 
2931     if (GPRCnt > 8 || FPRCnt > 8)
2932       return false;
2933   }
2934 
2935   static const MCPhysReg Registers[6][8] = {
2936     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2937       AArch64::W5, AArch64::W6, AArch64::W7 },
2938     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2939       AArch64::X5, AArch64::X6, AArch64::X7 },
2940     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2941       AArch64::H5, AArch64::H6, AArch64::H7 },
2942     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2943       AArch64::S5, AArch64::S6, AArch64::S7 },
2944     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2945       AArch64::D5, AArch64::D6, AArch64::D7 },
2946     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2947       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2948   };
2949 
2950   unsigned GPRIdx = 0;
2951   unsigned FPRIdx = 0;
2952   for (auto const &Arg : F->args()) {
2953     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2954     unsigned SrcReg;
2955     const TargetRegisterClass *RC;
2956     if (VT >= MVT::i1 && VT <= MVT::i32) {
2957       SrcReg = Registers[0][GPRIdx++];
2958       RC = &AArch64::GPR32RegClass;
2959       VT = MVT::i32;
2960     } else if (VT == MVT::i64) {
2961       SrcReg = Registers[1][GPRIdx++];
2962       RC = &AArch64::GPR64RegClass;
2963     } else if (VT == MVT::f16) {
2964       SrcReg = Registers[2][FPRIdx++];
2965       RC = &AArch64::FPR16RegClass;
2966     } else if (VT ==  MVT::f32) {
2967       SrcReg = Registers[3][FPRIdx++];
2968       RC = &AArch64::FPR32RegClass;
2969     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2970       SrcReg = Registers[4][FPRIdx++];
2971       RC = &AArch64::FPR64RegClass;
2972     } else if (VT.is128BitVector()) {
2973       SrcReg = Registers[5][FPRIdx++];
2974       RC = &AArch64::FPR128RegClass;
2975     } else
2976       llvm_unreachable("Unexpected value type.");
2977 
2978     Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2979     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2980     // Without this, EmitLiveInCopies may eliminate the livein if its only
2981     // use is a bitcast (which isn't turned into an instruction).
2982     Register ResultReg = createResultReg(RC);
2983     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2984             TII.get(TargetOpcode::COPY), ResultReg)
2985         .addReg(DstReg, getKillRegState(true));
2986     updateValueMap(&Arg, ResultReg);
2987   }
2988   return true;
2989 }
2990 
2991 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2992                                       SmallVectorImpl<MVT> &OutVTs,
2993                                       unsigned &NumBytes) {
2994   CallingConv::ID CC = CLI.CallConv;
2995   SmallVector<CCValAssign, 16> ArgLocs;
2996   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2997   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2998 
2999   // Get a count of how many bytes are to be pushed on the stack.
3000   NumBytes = CCInfo.getNextStackOffset();
3001 
3002   // Issue CALLSEQ_START
3003   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3004   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3005     .addImm(NumBytes).addImm(0);
3006 
3007   // Process the args.
3008   for (CCValAssign &VA : ArgLocs) {
3009     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3010     MVT ArgVT = OutVTs[VA.getValNo()];
3011 
3012     Register ArgReg = getRegForValue(ArgVal);
3013     if (!ArgReg)
3014       return false;
3015 
3016     // Handle arg promotion: SExt, ZExt, AExt.
3017     switch (VA.getLocInfo()) {
3018     case CCValAssign::Full:
3019       break;
3020     case CCValAssign::SExt: {
3021       MVT DestVT = VA.getLocVT();
3022       MVT SrcVT = ArgVT;
3023       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3024       if (!ArgReg)
3025         return false;
3026       break;
3027     }
3028     case CCValAssign::AExt:
3029     // Intentional fall-through.
3030     case CCValAssign::ZExt: {
3031       MVT DestVT = VA.getLocVT();
3032       MVT SrcVT = ArgVT;
3033       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3034       if (!ArgReg)
3035         return false;
3036       break;
3037     }
3038     default:
3039       llvm_unreachable("Unknown arg promotion!");
3040     }
3041 
3042     // Now copy/store arg to correct locations.
3043     if (VA.isRegLoc() && !VA.needsCustom()) {
3044       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3045               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3046       CLI.OutRegs.push_back(VA.getLocReg());
3047     } else if (VA.needsCustom()) {
3048       // FIXME: Handle custom args.
3049       return false;
3050     } else {
3051       assert(VA.isMemLoc() && "Assuming store on stack.");
3052 
3053       // Don't emit stores for undef values.
3054       if (isa<UndefValue>(ArgVal))
3055         continue;
3056 
3057       // Need to store on the stack.
3058       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3059 
3060       unsigned BEAlign = 0;
3061       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3062         BEAlign = 8 - ArgSize;
3063 
3064       Address Addr;
3065       Addr.setKind(Address::RegBase);
3066       Addr.setReg(AArch64::SP);
3067       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3068 
3069       Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3070       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3071           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3072           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3073 
3074       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3075         return false;
3076     }
3077   }
3078   return true;
3079 }
3080 
3081 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3082                                  unsigned NumBytes) {
3083   CallingConv::ID CC = CLI.CallConv;
3084 
3085   // Issue CALLSEQ_END
3086   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3087   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3088     .addImm(NumBytes).addImm(0);
3089 
3090   // Now the return value.
3091   if (RetVT != MVT::isVoid) {
3092     SmallVector<CCValAssign, 16> RVLocs;
3093     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3094     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3095 
3096     // Only handle a single return value.
3097     if (RVLocs.size() != 1)
3098       return false;
3099 
3100     // Copy all of the result registers out of their specified physreg.
3101     MVT CopyVT = RVLocs[0].getValVT();
3102 
3103     // TODO: Handle big-endian results
3104     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3105       return false;
3106 
3107     Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3108     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3109             TII.get(TargetOpcode::COPY), ResultReg)
3110         .addReg(RVLocs[0].getLocReg());
3111     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3112 
3113     CLI.ResultReg = ResultReg;
3114     CLI.NumResultRegs = 1;
3115   }
3116 
3117   return true;
3118 }
3119 
3120 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3121   CallingConv::ID CC  = CLI.CallConv;
3122   bool IsTailCall     = CLI.IsTailCall;
3123   bool IsVarArg       = CLI.IsVarArg;
3124   const Value *Callee = CLI.Callee;
3125   MCSymbol *Symbol = CLI.Symbol;
3126 
3127   if (!Callee && !Symbol)
3128     return false;
3129 
3130   // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3131   // a bti instruction following the call.
3132   if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3133       !Subtarget->noBTIAtReturnTwice() &&
3134       MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3135     return false;
3136 
3137   // Allow SelectionDAG isel to handle tail calls.
3138   if (IsTailCall)
3139     return false;
3140 
3141   // FIXME: we could and should support this, but for now correctness at -O0 is
3142   // more important.
3143   if (Subtarget->isTargetILP32())
3144     return false;
3145 
3146   CodeModel::Model CM = TM.getCodeModel();
3147   // Only support the small-addressing and large code models.
3148   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3149     return false;
3150 
3151   // FIXME: Add large code model support for ELF.
3152   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3153     return false;
3154 
3155   // Let SDISel handle vararg functions.
3156   if (IsVarArg)
3157     return false;
3158 
3159   // FIXME: Only handle *simple* calls for now.
3160   MVT RetVT;
3161   if (CLI.RetTy->isVoidTy())
3162     RetVT = MVT::isVoid;
3163   else if (!isTypeLegal(CLI.RetTy, RetVT))
3164     return false;
3165 
3166   for (auto Flag : CLI.OutFlags)
3167     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3168         Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3169       return false;
3170 
3171   // Set up the argument vectors.
3172   SmallVector<MVT, 16> OutVTs;
3173   OutVTs.reserve(CLI.OutVals.size());
3174 
3175   for (auto *Val : CLI.OutVals) {
3176     MVT VT;
3177     if (!isTypeLegal(Val->getType(), VT) &&
3178         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3179       return false;
3180 
3181     // We don't handle vector parameters yet.
3182     if (VT.isVector() || VT.getSizeInBits() > 64)
3183       return false;
3184 
3185     OutVTs.push_back(VT);
3186   }
3187 
3188   Address Addr;
3189   if (Callee && !computeCallAddress(Callee, Addr))
3190     return false;
3191 
3192   // The weak function target may be zero; in that case we must use indirect
3193   // addressing via a stub on windows as it may be out of range for a
3194   // PC-relative jump.
3195   if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3196       Addr.getGlobalValue()->hasExternalWeakLinkage())
3197     return false;
3198 
3199   // Handle the arguments now that we've gotten them.
3200   unsigned NumBytes;
3201   if (!processCallArgs(CLI, OutVTs, NumBytes))
3202     return false;
3203 
3204   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3205   if (RegInfo->isAnyArgRegReserved(*MF))
3206     RegInfo->emitReservedArgRegCallError(*MF);
3207 
3208   // Issue the call.
3209   MachineInstrBuilder MIB;
3210   if (Subtarget->useSmallAddressing()) {
3211     const MCInstrDesc &II =
3212         TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3213     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3214     if (Symbol)
3215       MIB.addSym(Symbol, 0);
3216     else if (Addr.getGlobalValue())
3217       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3218     else if (Addr.getReg()) {
3219       Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3220       MIB.addReg(Reg);
3221     } else
3222       return false;
3223   } else {
3224     unsigned CallReg = 0;
3225     if (Symbol) {
3226       Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3227       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3228               ADRPReg)
3229           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3230 
3231       CallReg = createResultReg(&AArch64::GPR64RegClass);
3232       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3233               TII.get(AArch64::LDRXui), CallReg)
3234           .addReg(ADRPReg)
3235           .addSym(Symbol,
3236                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3237     } else if (Addr.getGlobalValue())
3238       CallReg = materializeGV(Addr.getGlobalValue());
3239     else if (Addr.getReg())
3240       CallReg = Addr.getReg();
3241 
3242     if (!CallReg)
3243       return false;
3244 
3245     const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3246     CallReg = constrainOperandRegClass(II, CallReg, 0);
3247     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3248   }
3249 
3250   // Add implicit physical register uses to the call.
3251   for (auto Reg : CLI.OutRegs)
3252     MIB.addReg(Reg, RegState::Implicit);
3253 
3254   // Add a register mask with the call-preserved registers.
3255   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3256   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3257 
3258   CLI.Call = MIB;
3259 
3260   // Finish off the call including any return values.
3261   return finishCall(CLI, RetVT, NumBytes);
3262 }
3263 
3264 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3265   if (Alignment)
3266     return Len / Alignment <= 4;
3267   else
3268     return Len < 32;
3269 }
3270 
3271 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3272                                          uint64_t Len, unsigned Alignment) {
3273   // Make sure we don't bloat code by inlining very large memcpy's.
3274   if (!isMemCpySmall(Len, Alignment))
3275     return false;
3276 
3277   int64_t UnscaledOffset = 0;
3278   Address OrigDest = Dest;
3279   Address OrigSrc = Src;
3280 
3281   while (Len) {
3282     MVT VT;
3283     if (!Alignment || Alignment >= 8) {
3284       if (Len >= 8)
3285         VT = MVT::i64;
3286       else if (Len >= 4)
3287         VT = MVT::i32;
3288       else if (Len >= 2)
3289         VT = MVT::i16;
3290       else {
3291         VT = MVT::i8;
3292       }
3293     } else {
3294       // Bound based on alignment.
3295       if (Len >= 4 && Alignment == 4)
3296         VT = MVT::i32;
3297       else if (Len >= 2 && Alignment == 2)
3298         VT = MVT::i16;
3299       else {
3300         VT = MVT::i8;
3301       }
3302     }
3303 
3304     unsigned ResultReg = emitLoad(VT, VT, Src);
3305     if (!ResultReg)
3306       return false;
3307 
3308     if (!emitStore(VT, ResultReg, Dest))
3309       return false;
3310 
3311     int64_t Size = VT.getSizeInBits() / 8;
3312     Len -= Size;
3313     UnscaledOffset += Size;
3314 
3315     // We need to recompute the unscaled offset for each iteration.
3316     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3317     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3318   }
3319 
3320   return true;
3321 }
3322 
3323 /// Check if it is possible to fold the condition from the XALU intrinsic
3324 /// into the user. The condition code will only be updated on success.
3325 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3326                                         const Instruction *I,
3327                                         const Value *Cond) {
3328   if (!isa<ExtractValueInst>(Cond))
3329     return false;
3330 
3331   const auto *EV = cast<ExtractValueInst>(Cond);
3332   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3333     return false;
3334 
3335   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3336   MVT RetVT;
3337   const Function *Callee = II->getCalledFunction();
3338   Type *RetTy =
3339   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3340   if (!isTypeLegal(RetTy, RetVT))
3341     return false;
3342 
3343   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3344     return false;
3345 
3346   const Value *LHS = II->getArgOperand(0);
3347   const Value *RHS = II->getArgOperand(1);
3348 
3349   // Canonicalize immediate to the RHS.
3350   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3351     std::swap(LHS, RHS);
3352 
3353   // Simplify multiplies.
3354   Intrinsic::ID IID = II->getIntrinsicID();
3355   switch (IID) {
3356   default:
3357     break;
3358   case Intrinsic::smul_with_overflow:
3359     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3360       if (C->getValue() == 2)
3361         IID = Intrinsic::sadd_with_overflow;
3362     break;
3363   case Intrinsic::umul_with_overflow:
3364     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3365       if (C->getValue() == 2)
3366         IID = Intrinsic::uadd_with_overflow;
3367     break;
3368   }
3369 
3370   AArch64CC::CondCode TmpCC;
3371   switch (IID) {
3372   default:
3373     return false;
3374   case Intrinsic::sadd_with_overflow:
3375   case Intrinsic::ssub_with_overflow:
3376     TmpCC = AArch64CC::VS;
3377     break;
3378   case Intrinsic::uadd_with_overflow:
3379     TmpCC = AArch64CC::HS;
3380     break;
3381   case Intrinsic::usub_with_overflow:
3382     TmpCC = AArch64CC::LO;
3383     break;
3384   case Intrinsic::smul_with_overflow:
3385   case Intrinsic::umul_with_overflow:
3386     TmpCC = AArch64CC::NE;
3387     break;
3388   }
3389 
3390   // Check if both instructions are in the same basic block.
3391   if (!isValueAvailable(II))
3392     return false;
3393 
3394   // Make sure nothing is in the way
3395   BasicBlock::const_iterator Start(I);
3396   BasicBlock::const_iterator End(II);
3397   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3398     // We only expect extractvalue instructions between the intrinsic and the
3399     // instruction to be selected.
3400     if (!isa<ExtractValueInst>(Itr))
3401       return false;
3402 
3403     // Check that the extractvalue operand comes from the intrinsic.
3404     const auto *EVI = cast<ExtractValueInst>(Itr);
3405     if (EVI->getAggregateOperand() != II)
3406       return false;
3407   }
3408 
3409   CC = TmpCC;
3410   return true;
3411 }
3412 
3413 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3414   // FIXME: Handle more intrinsics.
3415   switch (II->getIntrinsicID()) {
3416   default: return false;
3417   case Intrinsic::frameaddress: {
3418     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3419     MFI.setFrameAddressIsTaken(true);
3420 
3421     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3422     Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3423     Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3424     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3425             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3426     // Recursively load frame address
3427     // ldr x0, [fp]
3428     // ldr x0, [x0]
3429     // ldr x0, [x0]
3430     // ...
3431     unsigned DestReg;
3432     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3433     while (Depth--) {
3434       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3435                                 SrcReg, 0);
3436       assert(DestReg && "Unexpected LDR instruction emission failure.");
3437       SrcReg = DestReg;
3438     }
3439 
3440     updateValueMap(II, SrcReg);
3441     return true;
3442   }
3443   case Intrinsic::sponentry: {
3444     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3445 
3446     // SP = FP + Fixed Object + 16
3447     int FI = MFI.CreateFixedObject(4, 0, false);
3448     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3449     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3450             TII.get(AArch64::ADDXri), ResultReg)
3451             .addFrameIndex(FI)
3452             .addImm(0)
3453             .addImm(0);
3454 
3455     updateValueMap(II, ResultReg);
3456     return true;
3457   }
3458   case Intrinsic::memcpy:
3459   case Intrinsic::memmove: {
3460     const auto *MTI = cast<MemTransferInst>(II);
3461     // Don't handle volatile.
3462     if (MTI->isVolatile())
3463       return false;
3464 
3465     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3466     // we would emit dead code because we don't currently handle memmoves.
3467     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3468     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3469       // Small memcpy's are common enough that we want to do them without a call
3470       // if possible.
3471       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3472       unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3473                                     MTI->getSourceAlignment());
3474       if (isMemCpySmall(Len, Alignment)) {
3475         Address Dest, Src;
3476         if (!computeAddress(MTI->getRawDest(), Dest) ||
3477             !computeAddress(MTI->getRawSource(), Src))
3478           return false;
3479         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3480           return true;
3481       }
3482     }
3483 
3484     if (!MTI->getLength()->getType()->isIntegerTy(64))
3485       return false;
3486 
3487     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3488       // Fast instruction selection doesn't support the special
3489       // address spaces.
3490       return false;
3491 
3492     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3493     return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3494   }
3495   case Intrinsic::memset: {
3496     const MemSetInst *MSI = cast<MemSetInst>(II);
3497     // Don't handle volatile.
3498     if (MSI->isVolatile())
3499       return false;
3500 
3501     if (!MSI->getLength()->getType()->isIntegerTy(64))
3502       return false;
3503 
3504     if (MSI->getDestAddressSpace() > 255)
3505       // Fast instruction selection doesn't support the special
3506       // address spaces.
3507       return false;
3508 
3509     return lowerCallTo(II, "memset", II->arg_size() - 1);
3510   }
3511   case Intrinsic::sin:
3512   case Intrinsic::cos:
3513   case Intrinsic::pow: {
3514     MVT RetVT;
3515     if (!isTypeLegal(II->getType(), RetVT))
3516       return false;
3517 
3518     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3519       return false;
3520 
3521     static const RTLIB::Libcall LibCallTable[3][2] = {
3522       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3523       { RTLIB::COS_F32, RTLIB::COS_F64 },
3524       { RTLIB::POW_F32, RTLIB::POW_F64 }
3525     };
3526     RTLIB::Libcall LC;
3527     bool Is64Bit = RetVT == MVT::f64;
3528     switch (II->getIntrinsicID()) {
3529     default:
3530       llvm_unreachable("Unexpected intrinsic.");
3531     case Intrinsic::sin:
3532       LC = LibCallTable[0][Is64Bit];
3533       break;
3534     case Intrinsic::cos:
3535       LC = LibCallTable[1][Is64Bit];
3536       break;
3537     case Intrinsic::pow:
3538       LC = LibCallTable[2][Is64Bit];
3539       break;
3540     }
3541 
3542     ArgListTy Args;
3543     Args.reserve(II->arg_size());
3544 
3545     // Populate the argument list.
3546     for (auto &Arg : II->args()) {
3547       ArgListEntry Entry;
3548       Entry.Val = Arg;
3549       Entry.Ty = Arg->getType();
3550       Args.push_back(Entry);
3551     }
3552 
3553     CallLoweringInfo CLI;
3554     MCContext &Ctx = MF->getContext();
3555     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3556                   TLI.getLibcallName(LC), std::move(Args));
3557     if (!lowerCallTo(CLI))
3558       return false;
3559     updateValueMap(II, CLI.ResultReg);
3560     return true;
3561   }
3562   case Intrinsic::fabs: {
3563     MVT VT;
3564     if (!isTypeLegal(II->getType(), VT))
3565       return false;
3566 
3567     unsigned Opc;
3568     switch (VT.SimpleTy) {
3569     default:
3570       return false;
3571     case MVT::f32:
3572       Opc = AArch64::FABSSr;
3573       break;
3574     case MVT::f64:
3575       Opc = AArch64::FABSDr;
3576       break;
3577     }
3578     Register SrcReg = getRegForValue(II->getOperand(0));
3579     if (!SrcReg)
3580       return false;
3581     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3582     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3583       .addReg(SrcReg);
3584     updateValueMap(II, ResultReg);
3585     return true;
3586   }
3587   case Intrinsic::trap:
3588     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3589         .addImm(1);
3590     return true;
3591   case Intrinsic::debugtrap:
3592     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3593         .addImm(0xF000);
3594     return true;
3595 
3596   case Intrinsic::sqrt: {
3597     Type *RetTy = II->getCalledFunction()->getReturnType();
3598 
3599     MVT VT;
3600     if (!isTypeLegal(RetTy, VT))
3601       return false;
3602 
3603     Register Op0Reg = getRegForValue(II->getOperand(0));
3604     if (!Op0Reg)
3605       return false;
3606 
3607     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3608     if (!ResultReg)
3609       return false;
3610 
3611     updateValueMap(II, ResultReg);
3612     return true;
3613   }
3614   case Intrinsic::sadd_with_overflow:
3615   case Intrinsic::uadd_with_overflow:
3616   case Intrinsic::ssub_with_overflow:
3617   case Intrinsic::usub_with_overflow:
3618   case Intrinsic::smul_with_overflow:
3619   case Intrinsic::umul_with_overflow: {
3620     // This implements the basic lowering of the xalu with overflow intrinsics.
3621     const Function *Callee = II->getCalledFunction();
3622     auto *Ty = cast<StructType>(Callee->getReturnType());
3623     Type *RetTy = Ty->getTypeAtIndex(0U);
3624 
3625     MVT VT;
3626     if (!isTypeLegal(RetTy, VT))
3627       return false;
3628 
3629     if (VT != MVT::i32 && VT != MVT::i64)
3630       return false;
3631 
3632     const Value *LHS = II->getArgOperand(0);
3633     const Value *RHS = II->getArgOperand(1);
3634     // Canonicalize immediate to the RHS.
3635     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3636       std::swap(LHS, RHS);
3637 
3638     // Simplify multiplies.
3639     Intrinsic::ID IID = II->getIntrinsicID();
3640     switch (IID) {
3641     default:
3642       break;
3643     case Intrinsic::smul_with_overflow:
3644       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3645         if (C->getValue() == 2) {
3646           IID = Intrinsic::sadd_with_overflow;
3647           RHS = LHS;
3648         }
3649       break;
3650     case Intrinsic::umul_with_overflow:
3651       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3652         if (C->getValue() == 2) {
3653           IID = Intrinsic::uadd_with_overflow;
3654           RHS = LHS;
3655         }
3656       break;
3657     }
3658 
3659     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3660     AArch64CC::CondCode CC = AArch64CC::Invalid;
3661     switch (IID) {
3662     default: llvm_unreachable("Unexpected intrinsic!");
3663     case Intrinsic::sadd_with_overflow:
3664       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3665       CC = AArch64CC::VS;
3666       break;
3667     case Intrinsic::uadd_with_overflow:
3668       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3669       CC = AArch64CC::HS;
3670       break;
3671     case Intrinsic::ssub_with_overflow:
3672       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3673       CC = AArch64CC::VS;
3674       break;
3675     case Intrinsic::usub_with_overflow:
3676       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3677       CC = AArch64CC::LO;
3678       break;
3679     case Intrinsic::smul_with_overflow: {
3680       CC = AArch64CC::NE;
3681       Register LHSReg = getRegForValue(LHS);
3682       if (!LHSReg)
3683         return false;
3684 
3685       Register RHSReg = getRegForValue(RHS);
3686       if (!RHSReg)
3687         return false;
3688 
3689       if (VT == MVT::i32) {
3690         MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3691         Register MulSubReg =
3692             fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3693         // cmp xreg, wreg, sxtw
3694         emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3695                       AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3696                       /*WantResult=*/false);
3697         MulReg = MulSubReg;
3698       } else {
3699         assert(VT == MVT::i64 && "Unexpected value type.");
3700         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3701         // reused in the next instruction.
3702         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3703         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3704         emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3705                     /*WantResult=*/false);
3706       }
3707       break;
3708     }
3709     case Intrinsic::umul_with_overflow: {
3710       CC = AArch64CC::NE;
3711       Register LHSReg = getRegForValue(LHS);
3712       if (!LHSReg)
3713         return false;
3714 
3715       Register RHSReg = getRegForValue(RHS);
3716       if (!RHSReg)
3717         return false;
3718 
3719       if (VT == MVT::i32) {
3720         MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3721         // tst xreg, #0xffffffff00000000
3722         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3723                 TII.get(AArch64::ANDSXri), AArch64::XZR)
3724             .addReg(MulReg)
3725             .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3726         MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3727       } else {
3728         assert(VT == MVT::i64 && "Unexpected value type.");
3729         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3730         // reused in the next instruction.
3731         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3732         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3733         emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3734       }
3735       break;
3736     }
3737     }
3738 
3739     if (MulReg) {
3740       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3741       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3742               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3743     }
3744 
3745     if (!ResultReg1)
3746       return false;
3747 
3748     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3749                                   AArch64::WZR, AArch64::WZR,
3750                                   getInvertedCondCode(CC));
3751     (void)ResultReg2;
3752     assert((ResultReg1 + 1) == ResultReg2 &&
3753            "Nonconsecutive result registers.");
3754     updateValueMap(II, ResultReg1, 2);
3755     return true;
3756   }
3757   }
3758   return false;
3759 }
3760 
3761 bool AArch64FastISel::selectRet(const Instruction *I) {
3762   const ReturnInst *Ret = cast<ReturnInst>(I);
3763   const Function &F = *I->getParent()->getParent();
3764 
3765   if (!FuncInfo.CanLowerReturn)
3766     return false;
3767 
3768   if (F.isVarArg())
3769     return false;
3770 
3771   if (TLI.supportSwiftError() &&
3772       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3773     return false;
3774 
3775   if (TLI.supportSplitCSR(FuncInfo.MF))
3776     return false;
3777 
3778   // Build a list of return value registers.
3779   SmallVector<unsigned, 4> RetRegs;
3780 
3781   if (Ret->getNumOperands() > 0) {
3782     CallingConv::ID CC = F.getCallingConv();
3783     SmallVector<ISD::OutputArg, 4> Outs;
3784     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3785 
3786     // Analyze operands of the call, assigning locations to each operand.
3787     SmallVector<CCValAssign, 16> ValLocs;
3788     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3789     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3790                                                      : RetCC_AArch64_AAPCS;
3791     CCInfo.AnalyzeReturn(Outs, RetCC);
3792 
3793     // Only handle a single return value for now.
3794     if (ValLocs.size() != 1)
3795       return false;
3796 
3797     CCValAssign &VA = ValLocs[0];
3798     const Value *RV = Ret->getOperand(0);
3799 
3800     // Don't bother handling odd stuff for now.
3801     if ((VA.getLocInfo() != CCValAssign::Full) &&
3802         (VA.getLocInfo() != CCValAssign::BCvt))
3803       return false;
3804 
3805     // Only handle register returns for now.
3806     if (!VA.isRegLoc())
3807       return false;
3808 
3809     Register Reg = getRegForValue(RV);
3810     if (Reg == 0)
3811       return false;
3812 
3813     unsigned SrcReg = Reg + VA.getValNo();
3814     Register DestReg = VA.getLocReg();
3815     // Avoid a cross-class copy. This is very unlikely.
3816     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3817       return false;
3818 
3819     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3820     if (!RVEVT.isSimple())
3821       return false;
3822 
3823     // Vectors (of > 1 lane) in big endian need tricky handling.
3824     if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3825         !Subtarget->isLittleEndian())
3826       return false;
3827 
3828     MVT RVVT = RVEVT.getSimpleVT();
3829     if (RVVT == MVT::f128)
3830       return false;
3831 
3832     MVT DestVT = VA.getValVT();
3833     // Special handling for extended integers.
3834     if (RVVT != DestVT) {
3835       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3836         return false;
3837 
3838       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3839         return false;
3840 
3841       bool IsZExt = Outs[0].Flags.isZExt();
3842       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3843       if (SrcReg == 0)
3844         return false;
3845     }
3846 
3847     // "Callee" (i.e. value producer) zero extends pointers at function
3848     // boundary.
3849     if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3850       SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3851 
3852     // Make the copy.
3853     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3854             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3855 
3856     // Add register to return instruction.
3857     RetRegs.push_back(VA.getLocReg());
3858   }
3859 
3860   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3861                                     TII.get(AArch64::RET_ReallyLR));
3862   for (unsigned RetReg : RetRegs)
3863     MIB.addReg(RetReg, RegState::Implicit);
3864   return true;
3865 }
3866 
3867 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3868   Type *DestTy = I->getType();
3869   Value *Op = I->getOperand(0);
3870   Type *SrcTy = Op->getType();
3871 
3872   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3873   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3874   if (!SrcEVT.isSimple())
3875     return false;
3876   if (!DestEVT.isSimple())
3877     return false;
3878 
3879   MVT SrcVT = SrcEVT.getSimpleVT();
3880   MVT DestVT = DestEVT.getSimpleVT();
3881 
3882   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3883       SrcVT != MVT::i8)
3884     return false;
3885   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3886       DestVT != MVT::i1)
3887     return false;
3888 
3889   Register SrcReg = getRegForValue(Op);
3890   if (!SrcReg)
3891     return false;
3892 
3893   // If we're truncating from i64 to a smaller non-legal type then generate an
3894   // AND. Otherwise, we know the high bits are undefined and a truncate only
3895   // generate a COPY. We cannot mark the source register also as result
3896   // register, because this can incorrectly transfer the kill flag onto the
3897   // source register.
3898   unsigned ResultReg;
3899   if (SrcVT == MVT::i64) {
3900     uint64_t Mask = 0;
3901     switch (DestVT.SimpleTy) {
3902     default:
3903       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3904       return false;
3905     case MVT::i1:
3906       Mask = 0x1;
3907       break;
3908     case MVT::i8:
3909       Mask = 0xff;
3910       break;
3911     case MVT::i16:
3912       Mask = 0xffff;
3913       break;
3914     }
3915     // Issue an extract_subreg to get the lower 32-bits.
3916     Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3917                                                 AArch64::sub_32);
3918     // Create the AND instruction which performs the actual truncation.
3919     ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3920     assert(ResultReg && "Unexpected AND instruction emission failure.");
3921   } else {
3922     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3923     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3924             TII.get(TargetOpcode::COPY), ResultReg)
3925         .addReg(SrcReg);
3926   }
3927 
3928   updateValueMap(I, ResultReg);
3929   return true;
3930 }
3931 
3932 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3933   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3934           DestVT == MVT::i64) &&
3935          "Unexpected value type.");
3936   // Handle i8 and i16 as i32.
3937   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3938     DestVT = MVT::i32;
3939 
3940   if (IsZExt) {
3941     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3942     assert(ResultReg && "Unexpected AND instruction emission failure.");
3943     if (DestVT == MVT::i64) {
3944       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3945       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3946       Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3947       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3948               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3949           .addImm(0)
3950           .addReg(ResultReg)
3951           .addImm(AArch64::sub_32);
3952       ResultReg = Reg64;
3953     }
3954     return ResultReg;
3955   } else {
3956     if (DestVT == MVT::i64) {
3957       // FIXME: We're SExt i1 to i64.
3958       return 0;
3959     }
3960     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3961                             0, 0);
3962   }
3963 }
3964 
3965 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3966   unsigned Opc, ZReg;
3967   switch (RetVT.SimpleTy) {
3968   default: return 0;
3969   case MVT::i8:
3970   case MVT::i16:
3971   case MVT::i32:
3972     RetVT = MVT::i32;
3973     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3974   case MVT::i64:
3975     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3976   }
3977 
3978   const TargetRegisterClass *RC =
3979       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3980   return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
3981 }
3982 
3983 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3984   if (RetVT != MVT::i64)
3985     return 0;
3986 
3987   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3988                           Op0, Op1, AArch64::XZR);
3989 }
3990 
3991 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3992   if (RetVT != MVT::i64)
3993     return 0;
3994 
3995   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3996                           Op0, Op1, AArch64::XZR);
3997 }
3998 
3999 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4000                                      unsigned Op1Reg) {
4001   unsigned Opc = 0;
4002   bool NeedTrunc = false;
4003   uint64_t Mask = 0;
4004   switch (RetVT.SimpleTy) {
4005   default: return 0;
4006   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4007   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4008   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4009   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4010   }
4011 
4012   const TargetRegisterClass *RC =
4013       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4014   if (NeedTrunc)
4015     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4016 
4017   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4018   if (NeedTrunc)
4019     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4020   return ResultReg;
4021 }
4022 
4023 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4024                                      uint64_t Shift, bool IsZExt) {
4025   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4026          "Unexpected source/return type pair.");
4027   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4028           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4029          "Unexpected source value type.");
4030   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4031           RetVT == MVT::i64) && "Unexpected return value type.");
4032 
4033   bool Is64Bit = (RetVT == MVT::i64);
4034   unsigned RegSize = Is64Bit ? 64 : 32;
4035   unsigned DstBits = RetVT.getSizeInBits();
4036   unsigned SrcBits = SrcVT.getSizeInBits();
4037   const TargetRegisterClass *RC =
4038       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4039 
4040   // Just emit a copy for "zero" shifts.
4041   if (Shift == 0) {
4042     if (RetVT == SrcVT) {
4043       Register ResultReg = createResultReg(RC);
4044       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4045               TII.get(TargetOpcode::COPY), ResultReg)
4046           .addReg(Op0);
4047       return ResultReg;
4048     } else
4049       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4050   }
4051 
4052   // Don't deal with undefined shifts.
4053   if (Shift >= DstBits)
4054     return 0;
4055 
4056   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4057   // {S|U}BFM Wd, Wn, #r, #s
4058   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4059 
4060   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4061   // %2 = shl i16 %1, 4
4062   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4063   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4064   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4065   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4066 
4067   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4068   // %2 = shl i16 %1, 8
4069   // Wd<32+7-24,32-24> = Wn<7:0>
4070   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4071   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4072   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4073 
4074   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4075   // %2 = shl i16 %1, 12
4076   // Wd<32+3-20,32-20> = Wn<3:0>
4077   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4078   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4079   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4080 
4081   unsigned ImmR = RegSize - Shift;
4082   // Limit the width to the length of the source type.
4083   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4084   static const unsigned OpcTable[2][2] = {
4085     {AArch64::SBFMWri, AArch64::SBFMXri},
4086     {AArch64::UBFMWri, AArch64::UBFMXri}
4087   };
4088   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4089   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4090     Register TmpReg = MRI.createVirtualRegister(RC);
4091     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4092             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4093         .addImm(0)
4094         .addReg(Op0)
4095         .addImm(AArch64::sub_32);
4096     Op0 = TmpReg;
4097   }
4098   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4099 }
4100 
4101 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4102                                      unsigned Op1Reg) {
4103   unsigned Opc = 0;
4104   bool NeedTrunc = false;
4105   uint64_t Mask = 0;
4106   switch (RetVT.SimpleTy) {
4107   default: return 0;
4108   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4109   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4110   case MVT::i32: Opc = AArch64::LSRVWr; break;
4111   case MVT::i64: Opc = AArch64::LSRVXr; break;
4112   }
4113 
4114   const TargetRegisterClass *RC =
4115       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4116   if (NeedTrunc) {
4117     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4118     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4119   }
4120   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4121   if (NeedTrunc)
4122     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4123   return ResultReg;
4124 }
4125 
4126 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4127                                      uint64_t Shift, bool IsZExt) {
4128   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4129          "Unexpected source/return type pair.");
4130   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4131           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4132          "Unexpected source value type.");
4133   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4134           RetVT == MVT::i64) && "Unexpected return value type.");
4135 
4136   bool Is64Bit = (RetVT == MVT::i64);
4137   unsigned RegSize = Is64Bit ? 64 : 32;
4138   unsigned DstBits = RetVT.getSizeInBits();
4139   unsigned SrcBits = SrcVT.getSizeInBits();
4140   const TargetRegisterClass *RC =
4141       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4142 
4143   // Just emit a copy for "zero" shifts.
4144   if (Shift == 0) {
4145     if (RetVT == SrcVT) {
4146       Register ResultReg = createResultReg(RC);
4147       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4148               TII.get(TargetOpcode::COPY), ResultReg)
4149       .addReg(Op0);
4150       return ResultReg;
4151     } else
4152       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4153   }
4154 
4155   // Don't deal with undefined shifts.
4156   if (Shift >= DstBits)
4157     return 0;
4158 
4159   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4160   // {S|U}BFM Wd, Wn, #r, #s
4161   // Wd<s-r:0> = Wn<s:r> when r <= s
4162 
4163   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4164   // %2 = lshr i16 %1, 4
4165   // Wd<7-4:0> = Wn<7:4>
4166   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4167   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4168   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4169 
4170   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4171   // %2 = lshr i16 %1, 8
4172   // Wd<7-7,0> = Wn<7:7>
4173   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4174   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4175   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4176 
4177   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4178   // %2 = lshr i16 %1, 12
4179   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4180   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4181   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4182   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4183 
4184   if (Shift >= SrcBits && IsZExt)
4185     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4186 
4187   // It is not possible to fold a sign-extend into the LShr instruction. In this
4188   // case emit a sign-extend.
4189   if (!IsZExt) {
4190     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4191     if (!Op0)
4192       return 0;
4193     SrcVT = RetVT;
4194     SrcBits = SrcVT.getSizeInBits();
4195     IsZExt = true;
4196   }
4197 
4198   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4199   unsigned ImmS = SrcBits - 1;
4200   static const unsigned OpcTable[2][2] = {
4201     {AArch64::SBFMWri, AArch64::SBFMXri},
4202     {AArch64::UBFMWri, AArch64::UBFMXri}
4203   };
4204   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4205   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4206     Register TmpReg = MRI.createVirtualRegister(RC);
4207     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4208             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4209         .addImm(0)
4210         .addReg(Op0)
4211         .addImm(AArch64::sub_32);
4212     Op0 = TmpReg;
4213   }
4214   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4215 }
4216 
4217 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4218                                      unsigned Op1Reg) {
4219   unsigned Opc = 0;
4220   bool NeedTrunc = false;
4221   uint64_t Mask = 0;
4222   switch (RetVT.SimpleTy) {
4223   default: return 0;
4224   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4225   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4226   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4227   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4228   }
4229 
4230   const TargetRegisterClass *RC =
4231       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4232   if (NeedTrunc) {
4233     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4234     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4235   }
4236   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4237   if (NeedTrunc)
4238     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4239   return ResultReg;
4240 }
4241 
4242 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4243                                      uint64_t Shift, bool IsZExt) {
4244   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4245          "Unexpected source/return type pair.");
4246   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4247           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4248          "Unexpected source value type.");
4249   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4250           RetVT == MVT::i64) && "Unexpected return value type.");
4251 
4252   bool Is64Bit = (RetVT == MVT::i64);
4253   unsigned RegSize = Is64Bit ? 64 : 32;
4254   unsigned DstBits = RetVT.getSizeInBits();
4255   unsigned SrcBits = SrcVT.getSizeInBits();
4256   const TargetRegisterClass *RC =
4257       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4258 
4259   // Just emit a copy for "zero" shifts.
4260   if (Shift == 0) {
4261     if (RetVT == SrcVT) {
4262       Register ResultReg = createResultReg(RC);
4263       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4264               TII.get(TargetOpcode::COPY), ResultReg)
4265       .addReg(Op0);
4266       return ResultReg;
4267     } else
4268       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4269   }
4270 
4271   // Don't deal with undefined shifts.
4272   if (Shift >= DstBits)
4273     return 0;
4274 
4275   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4276   // {S|U}BFM Wd, Wn, #r, #s
4277   // Wd<s-r:0> = Wn<s:r> when r <= s
4278 
4279   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4280   // %2 = ashr i16 %1, 4
4281   // Wd<7-4:0> = Wn<7:4>
4282   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4283   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4284   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4285 
4286   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4287   // %2 = ashr i16 %1, 8
4288   // Wd<7-7,0> = Wn<7:7>
4289   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4290   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4291   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4292 
4293   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4294   // %2 = ashr i16 %1, 12
4295   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4296   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4297   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4298   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4299 
4300   if (Shift >= SrcBits && IsZExt)
4301     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4302 
4303   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4304   unsigned ImmS = SrcBits - 1;
4305   static const unsigned OpcTable[2][2] = {
4306     {AArch64::SBFMWri, AArch64::SBFMXri},
4307     {AArch64::UBFMWri, AArch64::UBFMXri}
4308   };
4309   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4310   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4311     Register TmpReg = MRI.createVirtualRegister(RC);
4312     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4313             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4314         .addImm(0)
4315         .addReg(Op0)
4316         .addImm(AArch64::sub_32);
4317     Op0 = TmpReg;
4318   }
4319   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4320 }
4321 
4322 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4323                                      bool IsZExt) {
4324   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4325 
4326   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4327   // DestVT are odd things, so test to make sure that they are both types we can
4328   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4329   // bail out to SelectionDAG.
4330   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4331        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4332       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4333        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4334     return 0;
4335 
4336   unsigned Opc;
4337   unsigned Imm = 0;
4338 
4339   switch (SrcVT.SimpleTy) {
4340   default:
4341     return 0;
4342   case MVT::i1:
4343     return emiti1Ext(SrcReg, DestVT, IsZExt);
4344   case MVT::i8:
4345     if (DestVT == MVT::i64)
4346       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4347     else
4348       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4349     Imm = 7;
4350     break;
4351   case MVT::i16:
4352     if (DestVT == MVT::i64)
4353       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4354     else
4355       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4356     Imm = 15;
4357     break;
4358   case MVT::i32:
4359     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4360     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4361     Imm = 31;
4362     break;
4363   }
4364 
4365   // Handle i8 and i16 as i32.
4366   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4367     DestVT = MVT::i32;
4368   else if (DestVT == MVT::i64) {
4369     Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4370     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4371             TII.get(AArch64::SUBREG_TO_REG), Src64)
4372         .addImm(0)
4373         .addReg(SrcReg)
4374         .addImm(AArch64::sub_32);
4375     SrcReg = Src64;
4376   }
4377 
4378   const TargetRegisterClass *RC =
4379       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4380   return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4381 }
4382 
4383 static bool isZExtLoad(const MachineInstr *LI) {
4384   switch (LI->getOpcode()) {
4385   default:
4386     return false;
4387   case AArch64::LDURBBi:
4388   case AArch64::LDURHHi:
4389   case AArch64::LDURWi:
4390   case AArch64::LDRBBui:
4391   case AArch64::LDRHHui:
4392   case AArch64::LDRWui:
4393   case AArch64::LDRBBroX:
4394   case AArch64::LDRHHroX:
4395   case AArch64::LDRWroX:
4396   case AArch64::LDRBBroW:
4397   case AArch64::LDRHHroW:
4398   case AArch64::LDRWroW:
4399     return true;
4400   }
4401 }
4402 
4403 static bool isSExtLoad(const MachineInstr *LI) {
4404   switch (LI->getOpcode()) {
4405   default:
4406     return false;
4407   case AArch64::LDURSBWi:
4408   case AArch64::LDURSHWi:
4409   case AArch64::LDURSBXi:
4410   case AArch64::LDURSHXi:
4411   case AArch64::LDURSWi:
4412   case AArch64::LDRSBWui:
4413   case AArch64::LDRSHWui:
4414   case AArch64::LDRSBXui:
4415   case AArch64::LDRSHXui:
4416   case AArch64::LDRSWui:
4417   case AArch64::LDRSBWroX:
4418   case AArch64::LDRSHWroX:
4419   case AArch64::LDRSBXroX:
4420   case AArch64::LDRSHXroX:
4421   case AArch64::LDRSWroX:
4422   case AArch64::LDRSBWroW:
4423   case AArch64::LDRSHWroW:
4424   case AArch64::LDRSBXroW:
4425   case AArch64::LDRSHXroW:
4426   case AArch64::LDRSWroW:
4427     return true;
4428   }
4429 }
4430 
4431 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4432                                          MVT SrcVT) {
4433   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4434   if (!LI || !LI->hasOneUse())
4435     return false;
4436 
4437   // Check if the load instruction has already been selected.
4438   Register Reg = lookUpRegForValue(LI);
4439   if (!Reg)
4440     return false;
4441 
4442   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4443   if (!MI)
4444     return false;
4445 
4446   // Check if the correct load instruction has been emitted - SelectionDAG might
4447   // have emitted a zero-extending load, but we need a sign-extending load.
4448   bool IsZExt = isa<ZExtInst>(I);
4449   const auto *LoadMI = MI;
4450   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4451       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4452     Register LoadReg = MI->getOperand(1).getReg();
4453     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4454     assert(LoadMI && "Expected valid instruction");
4455   }
4456   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4457     return false;
4458 
4459   // Nothing to be done.
4460   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4461     updateValueMap(I, Reg);
4462     return true;
4463   }
4464 
4465   if (IsZExt) {
4466     Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4467     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4468             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4469         .addImm(0)
4470         .addReg(Reg, getKillRegState(true))
4471         .addImm(AArch64::sub_32);
4472     Reg = Reg64;
4473   } else {
4474     assert((MI->getOpcode() == TargetOpcode::COPY &&
4475             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4476            "Expected copy instruction");
4477     Reg = MI->getOperand(1).getReg();
4478     MachineBasicBlock::iterator I(MI);
4479     removeDeadCode(I, std::next(I));
4480   }
4481   updateValueMap(I, Reg);
4482   return true;
4483 }
4484 
4485 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4486   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4487          "Unexpected integer extend instruction.");
4488   MVT RetVT;
4489   MVT SrcVT;
4490   if (!isTypeSupported(I->getType(), RetVT))
4491     return false;
4492 
4493   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4494     return false;
4495 
4496   // Try to optimize already sign-/zero-extended values from load instructions.
4497   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4498     return true;
4499 
4500   Register SrcReg = getRegForValue(I->getOperand(0));
4501   if (!SrcReg)
4502     return false;
4503 
4504   // Try to optimize already sign-/zero-extended values from function arguments.
4505   bool IsZExt = isa<ZExtInst>(I);
4506   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4507     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4508       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4509         Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4510         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4511                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4512             .addImm(0)
4513             .addReg(SrcReg)
4514             .addImm(AArch64::sub_32);
4515         SrcReg = ResultReg;
4516       }
4517 
4518       updateValueMap(I, SrcReg);
4519       return true;
4520     }
4521   }
4522 
4523   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4524   if (!ResultReg)
4525     return false;
4526 
4527   updateValueMap(I, ResultReg);
4528   return true;
4529 }
4530 
4531 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4532   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4533   if (!DestEVT.isSimple())
4534     return false;
4535 
4536   MVT DestVT = DestEVT.getSimpleVT();
4537   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4538     return false;
4539 
4540   unsigned DivOpc;
4541   bool Is64bit = (DestVT == MVT::i64);
4542   switch (ISDOpcode) {
4543   default:
4544     return false;
4545   case ISD::SREM:
4546     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4547     break;
4548   case ISD::UREM:
4549     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4550     break;
4551   }
4552   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4553   Register Src0Reg = getRegForValue(I->getOperand(0));
4554   if (!Src0Reg)
4555     return false;
4556 
4557   Register Src1Reg = getRegForValue(I->getOperand(1));
4558   if (!Src1Reg)
4559     return false;
4560 
4561   const TargetRegisterClass *RC =
4562       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4563   Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4564   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4565   // The remainder is computed as numerator - (quotient * denominator) using the
4566   // MSUB instruction.
4567   Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4568   updateValueMap(I, ResultReg);
4569   return true;
4570 }
4571 
4572 bool AArch64FastISel::selectMul(const Instruction *I) {
4573   MVT VT;
4574   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4575     return false;
4576 
4577   if (VT.isVector())
4578     return selectBinaryOp(I, ISD::MUL);
4579 
4580   const Value *Src0 = I->getOperand(0);
4581   const Value *Src1 = I->getOperand(1);
4582   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4583     if (C->getValue().isPowerOf2())
4584       std::swap(Src0, Src1);
4585 
4586   // Try to simplify to a shift instruction.
4587   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4588     if (C->getValue().isPowerOf2()) {
4589       uint64_t ShiftVal = C->getValue().logBase2();
4590       MVT SrcVT = VT;
4591       bool IsZExt = true;
4592       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4593         if (!isIntExtFree(ZExt)) {
4594           MVT VT;
4595           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4596             SrcVT = VT;
4597             IsZExt = true;
4598             Src0 = ZExt->getOperand(0);
4599           }
4600         }
4601       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4602         if (!isIntExtFree(SExt)) {
4603           MVT VT;
4604           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4605             SrcVT = VT;
4606             IsZExt = false;
4607             Src0 = SExt->getOperand(0);
4608           }
4609         }
4610       }
4611 
4612       Register Src0Reg = getRegForValue(Src0);
4613       if (!Src0Reg)
4614         return false;
4615 
4616       unsigned ResultReg =
4617           emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4618 
4619       if (ResultReg) {
4620         updateValueMap(I, ResultReg);
4621         return true;
4622       }
4623     }
4624 
4625   Register Src0Reg = getRegForValue(I->getOperand(0));
4626   if (!Src0Reg)
4627     return false;
4628 
4629   Register Src1Reg = getRegForValue(I->getOperand(1));
4630   if (!Src1Reg)
4631     return false;
4632 
4633   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4634 
4635   if (!ResultReg)
4636     return false;
4637 
4638   updateValueMap(I, ResultReg);
4639   return true;
4640 }
4641 
4642 bool AArch64FastISel::selectShift(const Instruction *I) {
4643   MVT RetVT;
4644   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4645     return false;
4646 
4647   if (RetVT.isVector())
4648     return selectOperator(I, I->getOpcode());
4649 
4650   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4651     unsigned ResultReg = 0;
4652     uint64_t ShiftVal = C->getZExtValue();
4653     MVT SrcVT = RetVT;
4654     bool IsZExt = I->getOpcode() != Instruction::AShr;
4655     const Value *Op0 = I->getOperand(0);
4656     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4657       if (!isIntExtFree(ZExt)) {
4658         MVT TmpVT;
4659         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4660           SrcVT = TmpVT;
4661           IsZExt = true;
4662           Op0 = ZExt->getOperand(0);
4663         }
4664       }
4665     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4666       if (!isIntExtFree(SExt)) {
4667         MVT TmpVT;
4668         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4669           SrcVT = TmpVT;
4670           IsZExt = false;
4671           Op0 = SExt->getOperand(0);
4672         }
4673       }
4674     }
4675 
4676     Register Op0Reg = getRegForValue(Op0);
4677     if (!Op0Reg)
4678       return false;
4679 
4680     switch (I->getOpcode()) {
4681     default: llvm_unreachable("Unexpected instruction.");
4682     case Instruction::Shl:
4683       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4684       break;
4685     case Instruction::AShr:
4686       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4687       break;
4688     case Instruction::LShr:
4689       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4690       break;
4691     }
4692     if (!ResultReg)
4693       return false;
4694 
4695     updateValueMap(I, ResultReg);
4696     return true;
4697   }
4698 
4699   Register Op0Reg = getRegForValue(I->getOperand(0));
4700   if (!Op0Reg)
4701     return false;
4702 
4703   Register Op1Reg = getRegForValue(I->getOperand(1));
4704   if (!Op1Reg)
4705     return false;
4706 
4707   unsigned ResultReg = 0;
4708   switch (I->getOpcode()) {
4709   default: llvm_unreachable("Unexpected instruction.");
4710   case Instruction::Shl:
4711     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4712     break;
4713   case Instruction::AShr:
4714     ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4715     break;
4716   case Instruction::LShr:
4717     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4718     break;
4719   }
4720 
4721   if (!ResultReg)
4722     return false;
4723 
4724   updateValueMap(I, ResultReg);
4725   return true;
4726 }
4727 
4728 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4729   MVT RetVT, SrcVT;
4730 
4731   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4732     return false;
4733   if (!isTypeLegal(I->getType(), RetVT))
4734     return false;
4735 
4736   unsigned Opc;
4737   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4738     Opc = AArch64::FMOVWSr;
4739   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4740     Opc = AArch64::FMOVXDr;
4741   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4742     Opc = AArch64::FMOVSWr;
4743   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4744     Opc = AArch64::FMOVDXr;
4745   else
4746     return false;
4747 
4748   const TargetRegisterClass *RC = nullptr;
4749   switch (RetVT.SimpleTy) {
4750   default: llvm_unreachable("Unexpected value type.");
4751   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4752   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4753   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4754   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4755   }
4756   Register Op0Reg = getRegForValue(I->getOperand(0));
4757   if (!Op0Reg)
4758     return false;
4759 
4760   Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4761   if (!ResultReg)
4762     return false;
4763 
4764   updateValueMap(I, ResultReg);
4765   return true;
4766 }
4767 
4768 bool AArch64FastISel::selectFRem(const Instruction *I) {
4769   MVT RetVT;
4770   if (!isTypeLegal(I->getType(), RetVT))
4771     return false;
4772 
4773   RTLIB::Libcall LC;
4774   switch (RetVT.SimpleTy) {
4775   default:
4776     return false;
4777   case MVT::f32:
4778     LC = RTLIB::REM_F32;
4779     break;
4780   case MVT::f64:
4781     LC = RTLIB::REM_F64;
4782     break;
4783   }
4784 
4785   ArgListTy Args;
4786   Args.reserve(I->getNumOperands());
4787 
4788   // Populate the argument list.
4789   for (auto &Arg : I->operands()) {
4790     ArgListEntry Entry;
4791     Entry.Val = Arg;
4792     Entry.Ty = Arg->getType();
4793     Args.push_back(Entry);
4794   }
4795 
4796   CallLoweringInfo CLI;
4797   MCContext &Ctx = MF->getContext();
4798   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4799                 TLI.getLibcallName(LC), std::move(Args));
4800   if (!lowerCallTo(CLI))
4801     return false;
4802   updateValueMap(I, CLI.ResultReg);
4803   return true;
4804 }
4805 
4806 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4807   MVT VT;
4808   if (!isTypeLegal(I->getType(), VT))
4809     return false;
4810 
4811   if (!isa<ConstantInt>(I->getOperand(1)))
4812     return selectBinaryOp(I, ISD::SDIV);
4813 
4814   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4815   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4816       !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4817     return selectBinaryOp(I, ISD::SDIV);
4818 
4819   unsigned Lg2 = C.countTrailingZeros();
4820   Register Src0Reg = getRegForValue(I->getOperand(0));
4821   if (!Src0Reg)
4822     return false;
4823 
4824   if (cast<BinaryOperator>(I)->isExact()) {
4825     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4826     if (!ResultReg)
4827       return false;
4828     updateValueMap(I, ResultReg);
4829     return true;
4830   }
4831 
4832   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4833   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4834   if (!AddReg)
4835     return false;
4836 
4837   // (Src0 < 0) ? Pow2 - 1 : 0;
4838   if (!emitICmp_ri(VT, Src0Reg, 0))
4839     return false;
4840 
4841   unsigned SelectOpc;
4842   const TargetRegisterClass *RC;
4843   if (VT == MVT::i64) {
4844     SelectOpc = AArch64::CSELXr;
4845     RC = &AArch64::GPR64RegClass;
4846   } else {
4847     SelectOpc = AArch64::CSELWr;
4848     RC = &AArch64::GPR32RegClass;
4849   }
4850   Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4851                                         AArch64CC::LT);
4852   if (!SelectReg)
4853     return false;
4854 
4855   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4856   // negate the result.
4857   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4858   unsigned ResultReg;
4859   if (C.isNegative())
4860     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4861                               AArch64_AM::ASR, Lg2);
4862   else
4863     ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4864 
4865   if (!ResultReg)
4866     return false;
4867 
4868   updateValueMap(I, ResultReg);
4869   return true;
4870 }
4871 
4872 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4873 /// have to duplicate it for AArch64, because otherwise we would fail during the
4874 /// sign-extend emission.
4875 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4876   Register IdxN = getRegForValue(Idx);
4877   if (IdxN == 0)
4878     // Unhandled operand. Halt "fast" selection and bail.
4879     return 0;
4880 
4881   // If the index is smaller or larger than intptr_t, truncate or extend it.
4882   MVT PtrVT = TLI.getPointerTy(DL);
4883   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4884   if (IdxVT.bitsLT(PtrVT)) {
4885     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4886   } else if (IdxVT.bitsGT(PtrVT))
4887     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4888   return IdxN;
4889 }
4890 
4891 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4892 /// duplicate it for AArch64, because otherwise we would bail out even for
4893 /// simple cases. This is because the standard fastEmit functions don't cover
4894 /// MUL at all and ADD is lowered very inefficientily.
4895 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4896   if (Subtarget->isTargetILP32())
4897     return false;
4898 
4899   Register N = getRegForValue(I->getOperand(0));
4900   if (!N)
4901     return false;
4902 
4903   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4904   // into a single N = N + TotalOffset.
4905   uint64_t TotalOffs = 0;
4906   MVT VT = TLI.getPointerTy(DL);
4907   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4908        GTI != E; ++GTI) {
4909     const Value *Idx = GTI.getOperand();
4910     if (auto *StTy = GTI.getStructTypeOrNull()) {
4911       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4912       // N = N + Offset
4913       if (Field)
4914         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4915     } else {
4916       Type *Ty = GTI.getIndexedType();
4917 
4918       // If this is a constant subscript, handle it quickly.
4919       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4920         if (CI->isZero())
4921           continue;
4922         // N = N + Offset
4923         TotalOffs +=
4924             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4925         continue;
4926       }
4927       if (TotalOffs) {
4928         N = emitAdd_ri_(VT, N, TotalOffs);
4929         if (!N)
4930           return false;
4931         TotalOffs = 0;
4932       }
4933 
4934       // N = N + Idx * ElementSize;
4935       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4936       unsigned IdxN = getRegForGEPIndex(Idx);
4937       if (!IdxN)
4938         return false;
4939 
4940       if (ElementSize != 1) {
4941         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4942         if (!C)
4943           return false;
4944         IdxN = emitMul_rr(VT, IdxN, C);
4945         if (!IdxN)
4946           return false;
4947       }
4948       N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4949       if (!N)
4950         return false;
4951     }
4952   }
4953   if (TotalOffs) {
4954     N = emitAdd_ri_(VT, N, TotalOffs);
4955     if (!N)
4956       return false;
4957   }
4958   updateValueMap(I, N);
4959   return true;
4960 }
4961 
4962 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4963   assert(TM.getOptLevel() == CodeGenOpt::None &&
4964          "cmpxchg survived AtomicExpand at optlevel > -O0");
4965 
4966   auto *RetPairTy = cast<StructType>(I->getType());
4967   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4968   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4969          "cmpxchg has a non-i1 status result");
4970 
4971   MVT VT;
4972   if (!isTypeLegal(RetTy, VT))
4973     return false;
4974 
4975   const TargetRegisterClass *ResRC;
4976   unsigned Opc, CmpOpc;
4977   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
4978   // extractvalue selection doesn't support that.
4979   if (VT == MVT::i32) {
4980     Opc = AArch64::CMP_SWAP_32;
4981     CmpOpc = AArch64::SUBSWrs;
4982     ResRC = &AArch64::GPR32RegClass;
4983   } else if (VT == MVT::i64) {
4984     Opc = AArch64::CMP_SWAP_64;
4985     CmpOpc = AArch64::SUBSXrs;
4986     ResRC = &AArch64::GPR64RegClass;
4987   } else {
4988     return false;
4989   }
4990 
4991   const MCInstrDesc &II = TII.get(Opc);
4992 
4993   const Register AddrReg = constrainOperandRegClass(
4994       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
4995   const Register DesiredReg = constrainOperandRegClass(
4996       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
4997   const Register NewReg = constrainOperandRegClass(
4998       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
4999 
5000   const Register ResultReg1 = createResultReg(ResRC);
5001   const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5002   const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5003 
5004   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5005   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5006       .addDef(ResultReg1)
5007       .addDef(ScratchReg)
5008       .addUse(AddrReg)
5009       .addUse(DesiredReg)
5010       .addUse(NewReg);
5011 
5012   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5013       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5014       .addUse(ResultReg1)
5015       .addUse(DesiredReg)
5016       .addImm(0);
5017 
5018   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5019       .addDef(ResultReg2)
5020       .addUse(AArch64::WZR)
5021       .addUse(AArch64::WZR)
5022       .addImm(AArch64CC::NE);
5023 
5024   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5025   updateValueMap(I, ResultReg1, 2);
5026   return true;
5027 }
5028 
5029 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5030   switch (I->getOpcode()) {
5031   default:
5032     break;
5033   case Instruction::Add:
5034   case Instruction::Sub:
5035     return selectAddSub(I);
5036   case Instruction::Mul:
5037     return selectMul(I);
5038   case Instruction::SDiv:
5039     return selectSDiv(I);
5040   case Instruction::SRem:
5041     if (!selectBinaryOp(I, ISD::SREM))
5042       return selectRem(I, ISD::SREM);
5043     return true;
5044   case Instruction::URem:
5045     if (!selectBinaryOp(I, ISD::UREM))
5046       return selectRem(I, ISD::UREM);
5047     return true;
5048   case Instruction::Shl:
5049   case Instruction::LShr:
5050   case Instruction::AShr:
5051     return selectShift(I);
5052   case Instruction::And:
5053   case Instruction::Or:
5054   case Instruction::Xor:
5055     return selectLogicalOp(I);
5056   case Instruction::Br:
5057     return selectBranch(I);
5058   case Instruction::IndirectBr:
5059     return selectIndirectBr(I);
5060   case Instruction::BitCast:
5061     if (!FastISel::selectBitCast(I))
5062       return selectBitCast(I);
5063     return true;
5064   case Instruction::FPToSI:
5065     if (!selectCast(I, ISD::FP_TO_SINT))
5066       return selectFPToInt(I, /*Signed=*/true);
5067     return true;
5068   case Instruction::FPToUI:
5069     return selectFPToInt(I, /*Signed=*/false);
5070   case Instruction::ZExt:
5071   case Instruction::SExt:
5072     return selectIntExt(I);
5073   case Instruction::Trunc:
5074     if (!selectCast(I, ISD::TRUNCATE))
5075       return selectTrunc(I);
5076     return true;
5077   case Instruction::FPExt:
5078     return selectFPExt(I);
5079   case Instruction::FPTrunc:
5080     return selectFPTrunc(I);
5081   case Instruction::SIToFP:
5082     if (!selectCast(I, ISD::SINT_TO_FP))
5083       return selectIntToFP(I, /*Signed=*/true);
5084     return true;
5085   case Instruction::UIToFP:
5086     return selectIntToFP(I, /*Signed=*/false);
5087   case Instruction::Load:
5088     return selectLoad(I);
5089   case Instruction::Store:
5090     return selectStore(I);
5091   case Instruction::FCmp:
5092   case Instruction::ICmp:
5093     return selectCmp(I);
5094   case Instruction::Select:
5095     return selectSelect(I);
5096   case Instruction::Ret:
5097     return selectRet(I);
5098   case Instruction::FRem:
5099     return selectFRem(I);
5100   case Instruction::GetElementPtr:
5101     return selectGetElementPtr(I);
5102   case Instruction::AtomicCmpXchg:
5103     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5104   }
5105 
5106   // fall-back to target-independent instruction selection.
5107   return selectOperator(I, I->getOpcode());
5108 }
5109 
5110 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5111                                         const TargetLibraryInfo *LibInfo) {
5112   return new AArch64FastISel(FuncInfo, LibInfo);
5113 }
5114