xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "MCTargetDesc/AArch64AddressingModes.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Analysis/BranchProbabilityInfo.h"
26 #include "llvm/CodeGen/CallingConvLower.h"
27 #include "llvm/CodeGen/FastISel.h"
28 #include "llvm/CodeGen/FunctionLoweringInfo.h"
29 #include "llvm/CodeGen/ISDOpcodes.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineConstantPool.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/RuntimeLibcalls.h"
38 #include "llvm/CodeGen/ValueTypes.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/GetElementPtrTypeIterator.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
62 #include "llvm/Support/AtomicOrdering.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/ErrorHandling.h"
67 #include "llvm/Support/MachineValueType.h"
68 #include "llvm/Support/MathExtras.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 class AArch64FastISel final : public FastISel {
80   class Address {
81   public:
82     using BaseKind = enum {
83       RegBase,
84       FrameIndexBase
85     };
86 
87   private:
88     BaseKind Kind = RegBase;
89     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
90     union {
91       unsigned Reg;
92       int FI;
93     } Base;
94     unsigned OffsetReg = 0;
95     unsigned Shift = 0;
96     int64_t Offset = 0;
97     const GlobalValue *GV = nullptr;
98 
99   public:
100     Address() { Base.Reg = 0; }
101 
102     void setKind(BaseKind K) { Kind = K; }
103     BaseKind getKind() const { return Kind; }
104     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
105     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
106     bool isRegBase() const { return Kind == RegBase; }
107     bool isFIBase() const { return Kind == FrameIndexBase; }
108 
109     void setReg(unsigned Reg) {
110       assert(isRegBase() && "Invalid base register access!");
111       Base.Reg = Reg;
112     }
113 
114     unsigned getReg() const {
115       assert(isRegBase() && "Invalid base register access!");
116       return Base.Reg;
117     }
118 
119     void setOffsetReg(unsigned Reg) {
120       OffsetReg = Reg;
121     }
122 
123     unsigned getOffsetReg() const {
124       return OffsetReg;
125     }
126 
127     void setFI(unsigned FI) {
128       assert(isFIBase() && "Invalid base frame index  access!");
129       Base.FI = FI;
130     }
131 
132     unsigned getFI() const {
133       assert(isFIBase() && "Invalid base frame index access!");
134       return Base.FI;
135     }
136 
137     void setOffset(int64_t O) { Offset = O; }
138     int64_t getOffset() { return Offset; }
139     void setShift(unsigned S) { Shift = S; }
140     unsigned getShift() { return Shift; }
141 
142     void setGlobalValue(const GlobalValue *G) { GV = G; }
143     const GlobalValue *getGlobalValue() { return GV; }
144   };
145 
146   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147   /// make the right decision when generating code for different targets.
148   const AArch64Subtarget *Subtarget;
149   LLVMContext *Context;
150 
151   bool fastLowerArguments() override;
152   bool fastLowerCall(CallLoweringInfo &CLI) override;
153   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154 
155 private:
156   // Selection routines.
157   bool selectAddSub(const Instruction *I);
158   bool selectLogicalOp(const Instruction *I);
159   bool selectLoad(const Instruction *I);
160   bool selectStore(const Instruction *I);
161   bool selectBranch(const Instruction *I);
162   bool selectIndirectBr(const Instruction *I);
163   bool selectCmp(const Instruction *I);
164   bool selectSelect(const Instruction *I);
165   bool selectFPExt(const Instruction *I);
166   bool selectFPTrunc(const Instruction *I);
167   bool selectFPToInt(const Instruction *I, bool Signed);
168   bool selectIntToFP(const Instruction *I, bool Signed);
169   bool selectRem(const Instruction *I, unsigned ISDOpcode);
170   bool selectRet(const Instruction *I);
171   bool selectTrunc(const Instruction *I);
172   bool selectIntExt(const Instruction *I);
173   bool selectMul(const Instruction *I);
174   bool selectShift(const Instruction *I);
175   bool selectBitCast(const Instruction *I);
176   bool selectFRem(const Instruction *I);
177   bool selectSDiv(const Instruction *I);
178   bool selectGetElementPtr(const Instruction *I);
179   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180 
181   // Utility helper routines.
182   bool isTypeLegal(Type *Ty, MVT &VT);
183   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184   bool isValueAvailable(const Value *V) const;
185   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186   bool computeCallAddress(const Value *V, Address &Addr);
187   bool simplifyAddress(Address &Addr, MVT VT);
188   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
189                             MachineMemOperand::Flags Flags,
190                             unsigned ScaleFactor, MachineMemOperand *MMO);
191   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193                           unsigned Alignment);
194   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195                          const Value *Cond);
196   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197   bool optimizeSelect(const SelectInst *SI);
198   unsigned getRegForGEPIndex(const Value *Idx);
199 
200   // Emit helper routines.
201   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202                       const Value *RHS, bool SetFlags = false,
203                       bool WantResult = true,  bool IsZExt = false);
204   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205                          unsigned RHSReg, bool SetFlags = false,
206                          bool WantResult = true);
207   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208                          uint64_t Imm, bool SetFlags = false,
209                          bool WantResult = true);
210   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211                          unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
212                          uint64_t ShiftImm, bool SetFlags = false,
213                          bool WantResult = true);
214   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
215                          unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
216                          uint64_t ShiftImm, bool SetFlags = false,
217                          bool WantResult = true);
218 
219   // Emit functions.
220   bool emitCompareAndBranch(const BranchInst *BI);
221   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
222   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
223   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
224   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
225   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
226                     MachineMemOperand *MMO = nullptr);
227   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
228                  MachineMemOperand *MMO = nullptr);
229   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
230                         MachineMemOperand *MMO = nullptr);
231   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
232   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
233   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
234                    bool SetFlags = false, bool WantResult = true,
235                    bool IsZExt = false);
236   unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
237   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
238                    bool SetFlags = false, bool WantResult = true,
239                    bool IsZExt = false);
240   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
241                        bool WantResult = true);
242   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
243                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
244                        bool WantResult = true);
245   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
246                          const Value *RHS);
247   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
248                             uint64_t Imm);
249   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
250                             unsigned RHSReg, uint64_t ShiftImm);
251   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
252   unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
253   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
256   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
257                       bool IsZExt = true);
258   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
259   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
260                       bool IsZExt = true);
261   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
262   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
263                       bool IsZExt = false);
264 
265   unsigned materializeInt(const ConstantInt *CI, MVT VT);
266   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
267   unsigned materializeGV(const GlobalValue *GV);
268 
269   // Call handling routines.
270 private:
271   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
272   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
273                        unsigned &NumBytes);
274   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
275 
276 public:
277   // Backend specific FastISel code.
278   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
279   unsigned fastMaterializeConstant(const Constant *C) override;
280   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
281 
282   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
283                            const TargetLibraryInfo *LibInfo)
284       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
285     Subtarget =
286         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
287     Context = &FuncInfo.Fn->getContext();
288   }
289 
290   bool fastSelectInstruction(const Instruction *I) override;
291 
292 #include "AArch64GenFastISel.inc"
293 };
294 
295 } // end anonymous namespace
296 
297 /// Check if the sign-/zero-extend will be a noop.
298 static bool isIntExtFree(const Instruction *I) {
299   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
300          "Unexpected integer extend instruction.");
301   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
302          "Unexpected value type.");
303   bool IsZExt = isa<ZExtInst>(I);
304 
305   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
306     if (LI->hasOneUse())
307       return true;
308 
309   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
310     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
311       return true;
312 
313   return false;
314 }
315 
316 /// Determine the implicit scale factor that is applied by a memory
317 /// operation for a given value type.
318 static unsigned getImplicitScaleFactor(MVT VT) {
319   switch (VT.SimpleTy) {
320   default:
321     return 0;    // invalid
322   case MVT::i1:  // fall-through
323   case MVT::i8:
324     return 1;
325   case MVT::i16:
326     return 2;
327   case MVT::i32: // fall-through
328   case MVT::f32:
329     return 4;
330   case MVT::i64: // fall-through
331   case MVT::f64:
332     return 8;
333   }
334 }
335 
336 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
337   if (CC == CallingConv::WebKit_JS)
338     return CC_AArch64_WebKit_JS;
339   if (CC == CallingConv::GHC)
340     return CC_AArch64_GHC;
341   if (CC == CallingConv::CFGuard_Check)
342     return CC_AArch64_Win64_CFGuard_Check;
343   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
344 }
345 
346 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
347   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
348          "Alloca should always return a pointer.");
349 
350   // Don't handle dynamic allocas.
351   if (!FuncInfo.StaticAllocaMap.count(AI))
352     return 0;
353 
354   DenseMap<const AllocaInst *, int>::iterator SI =
355       FuncInfo.StaticAllocaMap.find(AI);
356 
357   if (SI != FuncInfo.StaticAllocaMap.end()) {
358     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
359     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
360             ResultReg)
361         .addFrameIndex(SI->second)
362         .addImm(0)
363         .addImm(0);
364     return ResultReg;
365   }
366 
367   return 0;
368 }
369 
370 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
371   if (VT > MVT::i64)
372     return 0;
373 
374   if (!CI->isZero())
375     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
376 
377   // Create a copy from the zero register to materialize a "0" value.
378   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
379                                                    : &AArch64::GPR32RegClass;
380   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
381   unsigned ResultReg = createResultReg(RC);
382   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
383           ResultReg).addReg(ZeroReg, getKillRegState(true));
384   return ResultReg;
385 }
386 
387 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
388   // Positive zero (+0.0) has to be materialized with a fmov from the zero
389   // register, because the immediate version of fmov cannot encode zero.
390   if (CFP->isNullValue())
391     return fastMaterializeFloatZero(CFP);
392 
393   if (VT != MVT::f32 && VT != MVT::f64)
394     return 0;
395 
396   const APFloat Val = CFP->getValueAPF();
397   bool Is64Bit = (VT == MVT::f64);
398   // This checks to see if we can use FMOV instructions to materialize
399   // a constant, otherwise we have to materialize via the constant pool.
400   int Imm =
401       Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
402   if (Imm != -1) {
403     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
404     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
405   }
406 
407   // For the large code model materialize the FP constant in code.
408   if (TM.getCodeModel() == CodeModel::Large) {
409     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
410     const TargetRegisterClass *RC = Is64Bit ?
411         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
412 
413     unsigned TmpReg = createResultReg(RC);
414     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
415         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
416 
417     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
418     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
419             TII.get(TargetOpcode::COPY), ResultReg)
420         .addReg(TmpReg, getKillRegState(true));
421 
422     return ResultReg;
423   }
424 
425   // Materialize via constant pool.  MachineConstantPool wants an explicit
426   // alignment.
427   Align Alignment = DL.getPrefTypeAlign(CFP->getType());
428 
429   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
430   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
431   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
432           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
433 
434   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
435   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
436   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
437       .addReg(ADRPReg)
438       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
439   return ResultReg;
440 }
441 
442 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
443   // We can't handle thread-local variables quickly yet.
444   if (GV->isThreadLocal())
445     return 0;
446 
447   // MachO still uses GOT for large code-model accesses, but ELF requires
448   // movz/movk sequences, which FastISel doesn't handle yet.
449   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
450     return 0;
451 
452   unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
453 
454   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
455   if (!DestEVT.isSimple())
456     return 0;
457 
458   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
459   unsigned ResultReg;
460 
461   if (OpFlags & AArch64II::MO_GOT) {
462     // ADRP + LDRX
463     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
464             ADRPReg)
465         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
466 
467     unsigned LdrOpc;
468     if (Subtarget->isTargetILP32()) {
469       ResultReg = createResultReg(&AArch64::GPR32RegClass);
470       LdrOpc = AArch64::LDRWui;
471     } else {
472       ResultReg = createResultReg(&AArch64::GPR64RegClass);
473       LdrOpc = AArch64::LDRXui;
474     }
475     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
476             ResultReg)
477       .addReg(ADRPReg)
478       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
479                         AArch64II::MO_NC | OpFlags);
480     if (!Subtarget->isTargetILP32())
481       return ResultReg;
482 
483     // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
484     // so we must extend the result on ILP32.
485     unsigned Result64 = createResultReg(&AArch64::GPR64RegClass);
486     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
487             TII.get(TargetOpcode::SUBREG_TO_REG))
488         .addDef(Result64)
489         .addImm(0)
490         .addReg(ResultReg, RegState::Kill)
491         .addImm(AArch64::sub_32);
492     return Result64;
493   } else {
494     // ADRP + ADDX
495     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
496             ADRPReg)
497         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
498 
499     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
500     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
501             ResultReg)
502         .addReg(ADRPReg)
503         .addGlobalAddress(GV, 0,
504                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
505         .addImm(0);
506   }
507   return ResultReg;
508 }
509 
510 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
511   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
512 
513   // Only handle simple types.
514   if (!CEVT.isSimple())
515     return 0;
516   MVT VT = CEVT.getSimpleVT();
517   // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
518   // 'null' pointers need to have a somewhat special treatment.
519   if (isa<ConstantPointerNull>(C)) {
520     assert(VT == MVT::i64 && "Expected 64-bit pointers");
521     return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
522   }
523 
524   if (const auto *CI = dyn_cast<ConstantInt>(C))
525     return materializeInt(CI, VT);
526   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
527     return materializeFP(CFP, VT);
528   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
529     return materializeGV(GV);
530 
531   return 0;
532 }
533 
534 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
535   assert(CFP->isNullValue() &&
536          "Floating-point constant is not a positive zero.");
537   MVT VT;
538   if (!isTypeLegal(CFP->getType(), VT))
539     return 0;
540 
541   if (VT != MVT::f32 && VT != MVT::f64)
542     return 0;
543 
544   bool Is64Bit = (VT == MVT::f64);
545   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
546   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
547   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
548 }
549 
550 /// Check if the multiply is by a power-of-2 constant.
551 static bool isMulPowOf2(const Value *I) {
552   if (const auto *MI = dyn_cast<MulOperator>(I)) {
553     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
554       if (C->getValue().isPowerOf2())
555         return true;
556     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
557       if (C->getValue().isPowerOf2())
558         return true;
559   }
560   return false;
561 }
562 
563 // Computes the address to get to an object.
564 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
565 {
566   const User *U = nullptr;
567   unsigned Opcode = Instruction::UserOp1;
568   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
569     // Don't walk into other basic blocks unless the object is an alloca from
570     // another block, otherwise it may not have a virtual register assigned.
571     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
572         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
573       Opcode = I->getOpcode();
574       U = I;
575     }
576   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
577     Opcode = C->getOpcode();
578     U = C;
579   }
580 
581   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
582     if (Ty->getAddressSpace() > 255)
583       // Fast instruction selection doesn't support the special
584       // address spaces.
585       return false;
586 
587   switch (Opcode) {
588   default:
589     break;
590   case Instruction::BitCast:
591     // Look through bitcasts.
592     return computeAddress(U->getOperand(0), Addr, Ty);
593 
594   case Instruction::IntToPtr:
595     // Look past no-op inttoptrs.
596     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
597         TLI.getPointerTy(DL))
598       return computeAddress(U->getOperand(0), Addr, Ty);
599     break;
600 
601   case Instruction::PtrToInt:
602     // Look past no-op ptrtoints.
603     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
604       return computeAddress(U->getOperand(0), Addr, Ty);
605     break;
606 
607   case Instruction::GetElementPtr: {
608     Address SavedAddr = Addr;
609     uint64_t TmpOffset = Addr.getOffset();
610 
611     // Iterate through the GEP folding the constants into offsets where
612     // we can.
613     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
614          GTI != E; ++GTI) {
615       const Value *Op = GTI.getOperand();
616       if (StructType *STy = GTI.getStructTypeOrNull()) {
617         const StructLayout *SL = DL.getStructLayout(STy);
618         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
619         TmpOffset += SL->getElementOffset(Idx);
620       } else {
621         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
622         while (true) {
623           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
624             // Constant-offset addressing.
625             TmpOffset += CI->getSExtValue() * S;
626             break;
627           }
628           if (canFoldAddIntoGEP(U, Op)) {
629             // A compatible add with a constant operand. Fold the constant.
630             ConstantInt *CI =
631                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
632             TmpOffset += CI->getSExtValue() * S;
633             // Iterate on the other operand.
634             Op = cast<AddOperator>(Op)->getOperand(0);
635             continue;
636           }
637           // Unsupported
638           goto unsupported_gep;
639         }
640       }
641     }
642 
643     // Try to grab the base operand now.
644     Addr.setOffset(TmpOffset);
645     if (computeAddress(U->getOperand(0), Addr, Ty))
646       return true;
647 
648     // We failed, restore everything and try the other options.
649     Addr = SavedAddr;
650 
651   unsupported_gep:
652     break;
653   }
654   case Instruction::Alloca: {
655     const AllocaInst *AI = cast<AllocaInst>(Obj);
656     DenseMap<const AllocaInst *, int>::iterator SI =
657         FuncInfo.StaticAllocaMap.find(AI);
658     if (SI != FuncInfo.StaticAllocaMap.end()) {
659       Addr.setKind(Address::FrameIndexBase);
660       Addr.setFI(SI->second);
661       return true;
662     }
663     break;
664   }
665   case Instruction::Add: {
666     // Adds of constants are common and easy enough.
667     const Value *LHS = U->getOperand(0);
668     const Value *RHS = U->getOperand(1);
669 
670     if (isa<ConstantInt>(LHS))
671       std::swap(LHS, RHS);
672 
673     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
674       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
675       return computeAddress(LHS, Addr, Ty);
676     }
677 
678     Address Backup = Addr;
679     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
680       return true;
681     Addr = Backup;
682 
683     break;
684   }
685   case Instruction::Sub: {
686     // Subs of constants are common and easy enough.
687     const Value *LHS = U->getOperand(0);
688     const Value *RHS = U->getOperand(1);
689 
690     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
691       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
692       return computeAddress(LHS, Addr, Ty);
693     }
694     break;
695   }
696   case Instruction::Shl: {
697     if (Addr.getOffsetReg())
698       break;
699 
700     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
701     if (!CI)
702       break;
703 
704     unsigned Val = CI->getZExtValue();
705     if (Val < 1 || Val > 3)
706       break;
707 
708     uint64_t NumBytes = 0;
709     if (Ty && Ty->isSized()) {
710       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
711       NumBytes = NumBits / 8;
712       if (!isPowerOf2_64(NumBits))
713         NumBytes = 0;
714     }
715 
716     if (NumBytes != (1ULL << Val))
717       break;
718 
719     Addr.setShift(Val);
720     Addr.setExtendType(AArch64_AM::LSL);
721 
722     const Value *Src = U->getOperand(0);
723     if (const auto *I = dyn_cast<Instruction>(Src)) {
724       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
725         // Fold the zext or sext when it won't become a noop.
726         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
727           if (!isIntExtFree(ZE) &&
728               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
729             Addr.setExtendType(AArch64_AM::UXTW);
730             Src = ZE->getOperand(0);
731           }
732         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
733           if (!isIntExtFree(SE) &&
734               SE->getOperand(0)->getType()->isIntegerTy(32)) {
735             Addr.setExtendType(AArch64_AM::SXTW);
736             Src = SE->getOperand(0);
737           }
738         }
739       }
740     }
741 
742     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
743       if (AI->getOpcode() == Instruction::And) {
744         const Value *LHS = AI->getOperand(0);
745         const Value *RHS = AI->getOperand(1);
746 
747         if (const auto *C = dyn_cast<ConstantInt>(LHS))
748           if (C->getValue() == 0xffffffff)
749             std::swap(LHS, RHS);
750 
751         if (const auto *C = dyn_cast<ConstantInt>(RHS))
752           if (C->getValue() == 0xffffffff) {
753             Addr.setExtendType(AArch64_AM::UXTW);
754             unsigned Reg = getRegForValue(LHS);
755             if (!Reg)
756               return false;
757             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
758             Addr.setOffsetReg(Reg);
759             return true;
760           }
761       }
762 
763     unsigned Reg = getRegForValue(Src);
764     if (!Reg)
765       return false;
766     Addr.setOffsetReg(Reg);
767     return true;
768   }
769   case Instruction::Mul: {
770     if (Addr.getOffsetReg())
771       break;
772 
773     if (!isMulPowOf2(U))
774       break;
775 
776     const Value *LHS = U->getOperand(0);
777     const Value *RHS = U->getOperand(1);
778 
779     // Canonicalize power-of-2 value to the RHS.
780     if (const auto *C = dyn_cast<ConstantInt>(LHS))
781       if (C->getValue().isPowerOf2())
782         std::swap(LHS, RHS);
783 
784     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
785     const auto *C = cast<ConstantInt>(RHS);
786     unsigned Val = C->getValue().logBase2();
787     if (Val < 1 || Val > 3)
788       break;
789 
790     uint64_t NumBytes = 0;
791     if (Ty && Ty->isSized()) {
792       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
793       NumBytes = NumBits / 8;
794       if (!isPowerOf2_64(NumBits))
795         NumBytes = 0;
796     }
797 
798     if (NumBytes != (1ULL << Val))
799       break;
800 
801     Addr.setShift(Val);
802     Addr.setExtendType(AArch64_AM::LSL);
803 
804     const Value *Src = LHS;
805     if (const auto *I = dyn_cast<Instruction>(Src)) {
806       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
807         // Fold the zext or sext when it won't become a noop.
808         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
809           if (!isIntExtFree(ZE) &&
810               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
811             Addr.setExtendType(AArch64_AM::UXTW);
812             Src = ZE->getOperand(0);
813           }
814         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
815           if (!isIntExtFree(SE) &&
816               SE->getOperand(0)->getType()->isIntegerTy(32)) {
817             Addr.setExtendType(AArch64_AM::SXTW);
818             Src = SE->getOperand(0);
819           }
820         }
821       }
822     }
823 
824     unsigned Reg = getRegForValue(Src);
825     if (!Reg)
826       return false;
827     Addr.setOffsetReg(Reg);
828     return true;
829   }
830   case Instruction::And: {
831     if (Addr.getOffsetReg())
832       break;
833 
834     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
835       break;
836 
837     const Value *LHS = U->getOperand(0);
838     const Value *RHS = U->getOperand(1);
839 
840     if (const auto *C = dyn_cast<ConstantInt>(LHS))
841       if (C->getValue() == 0xffffffff)
842         std::swap(LHS, RHS);
843 
844     if (const auto *C = dyn_cast<ConstantInt>(RHS))
845       if (C->getValue() == 0xffffffff) {
846         Addr.setShift(0);
847         Addr.setExtendType(AArch64_AM::LSL);
848         Addr.setExtendType(AArch64_AM::UXTW);
849 
850         unsigned Reg = getRegForValue(LHS);
851         if (!Reg)
852           return false;
853         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
854         Addr.setOffsetReg(Reg);
855         return true;
856       }
857     break;
858   }
859   case Instruction::SExt:
860   case Instruction::ZExt: {
861     if (!Addr.getReg() || Addr.getOffsetReg())
862       break;
863 
864     const Value *Src = nullptr;
865     // Fold the zext or sext when it won't become a noop.
866     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
867       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
868         Addr.setExtendType(AArch64_AM::UXTW);
869         Src = ZE->getOperand(0);
870       }
871     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
872       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
873         Addr.setExtendType(AArch64_AM::SXTW);
874         Src = SE->getOperand(0);
875       }
876     }
877 
878     if (!Src)
879       break;
880 
881     Addr.setShift(0);
882     unsigned Reg = getRegForValue(Src);
883     if (!Reg)
884       return false;
885     Addr.setOffsetReg(Reg);
886     return true;
887   }
888   } // end switch
889 
890   if (Addr.isRegBase() && !Addr.getReg()) {
891     unsigned Reg = getRegForValue(Obj);
892     if (!Reg)
893       return false;
894     Addr.setReg(Reg);
895     return true;
896   }
897 
898   if (!Addr.getOffsetReg()) {
899     unsigned Reg = getRegForValue(Obj);
900     if (!Reg)
901       return false;
902     Addr.setOffsetReg(Reg);
903     return true;
904   }
905 
906   return false;
907 }
908 
909 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
910   const User *U = nullptr;
911   unsigned Opcode = Instruction::UserOp1;
912   bool InMBB = true;
913 
914   if (const auto *I = dyn_cast<Instruction>(V)) {
915     Opcode = I->getOpcode();
916     U = I;
917     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
918   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
919     Opcode = C->getOpcode();
920     U = C;
921   }
922 
923   switch (Opcode) {
924   default: break;
925   case Instruction::BitCast:
926     // Look past bitcasts if its operand is in the same BB.
927     if (InMBB)
928       return computeCallAddress(U->getOperand(0), Addr);
929     break;
930   case Instruction::IntToPtr:
931     // Look past no-op inttoptrs if its operand is in the same BB.
932     if (InMBB &&
933         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
934             TLI.getPointerTy(DL))
935       return computeCallAddress(U->getOperand(0), Addr);
936     break;
937   case Instruction::PtrToInt:
938     // Look past no-op ptrtoints if its operand is in the same BB.
939     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
940       return computeCallAddress(U->getOperand(0), Addr);
941     break;
942   }
943 
944   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
945     Addr.setGlobalValue(GV);
946     return true;
947   }
948 
949   // If all else fails, try to materialize the value in a register.
950   if (!Addr.getGlobalValue()) {
951     Addr.setReg(getRegForValue(V));
952     return Addr.getReg() != 0;
953   }
954 
955   return false;
956 }
957 
958 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
959   EVT evt = TLI.getValueType(DL, Ty, true);
960 
961   if (Subtarget->isTargetILP32() && Ty->isPointerTy())
962     return false;
963 
964   // Only handle simple types.
965   if (evt == MVT::Other || !evt.isSimple())
966     return false;
967   VT = evt.getSimpleVT();
968 
969   // This is a legal type, but it's not something we handle in fast-isel.
970   if (VT == MVT::f128)
971     return false;
972 
973   // Handle all other legal types, i.e. a register that will directly hold this
974   // value.
975   return TLI.isTypeLegal(VT);
976 }
977 
978 /// Determine if the value type is supported by FastISel.
979 ///
980 /// FastISel for AArch64 can handle more value types than are legal. This adds
981 /// simple value type such as i1, i8, and i16.
982 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
983   if (Ty->isVectorTy() && !IsVectorAllowed)
984     return false;
985 
986   if (isTypeLegal(Ty, VT))
987     return true;
988 
989   // If this is a type than can be sign or zero-extended to a basic operation
990   // go ahead and accept it now.
991   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
992     return true;
993 
994   return false;
995 }
996 
997 bool AArch64FastISel::isValueAvailable(const Value *V) const {
998   if (!isa<Instruction>(V))
999     return true;
1000 
1001   const auto *I = cast<Instruction>(V);
1002   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1003 }
1004 
1005 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1006   if (Subtarget->isTargetILP32())
1007     return false;
1008 
1009   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1010   if (!ScaleFactor)
1011     return false;
1012 
1013   bool ImmediateOffsetNeedsLowering = false;
1014   bool RegisterOffsetNeedsLowering = false;
1015   int64_t Offset = Addr.getOffset();
1016   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1017     ImmediateOffsetNeedsLowering = true;
1018   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1019            !isUInt<12>(Offset / ScaleFactor))
1020     ImmediateOffsetNeedsLowering = true;
1021 
1022   // Cannot encode an offset register and an immediate offset in the same
1023   // instruction. Fold the immediate offset into the load/store instruction and
1024   // emit an additional add to take care of the offset register.
1025   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1026     RegisterOffsetNeedsLowering = true;
1027 
1028   // Cannot encode zero register as base.
1029   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1030     RegisterOffsetNeedsLowering = true;
1031 
1032   // If this is a stack pointer and the offset needs to be simplified then put
1033   // the alloca address into a register, set the base type back to register and
1034   // continue. This should almost never happen.
1035   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1036   {
1037     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1038     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1039             ResultReg)
1040       .addFrameIndex(Addr.getFI())
1041       .addImm(0)
1042       .addImm(0);
1043     Addr.setKind(Address::RegBase);
1044     Addr.setReg(ResultReg);
1045   }
1046 
1047   if (RegisterOffsetNeedsLowering) {
1048     unsigned ResultReg = 0;
1049     if (Addr.getReg()) {
1050       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1051           Addr.getExtendType() == AArch64_AM::UXTW   )
1052         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1053                                   Addr.getOffsetReg(), Addr.getExtendType(),
1054                                   Addr.getShift());
1055       else
1056         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1057                                   Addr.getOffsetReg(), AArch64_AM::LSL,
1058                                   Addr.getShift());
1059     } else {
1060       if (Addr.getExtendType() == AArch64_AM::UXTW)
1061         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1062                                Addr.getShift(), /*IsZExt=*/true);
1063       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1064         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1065                                Addr.getShift(), /*IsZExt=*/false);
1066       else
1067         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1068                                Addr.getShift());
1069     }
1070     if (!ResultReg)
1071       return false;
1072 
1073     Addr.setReg(ResultReg);
1074     Addr.setOffsetReg(0);
1075     Addr.setShift(0);
1076     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1077   }
1078 
1079   // Since the offset is too large for the load/store instruction get the
1080   // reg+offset into a register.
1081   if (ImmediateOffsetNeedsLowering) {
1082     unsigned ResultReg;
1083     if (Addr.getReg())
1084       // Try to fold the immediate into the add instruction.
1085       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1086     else
1087       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1088 
1089     if (!ResultReg)
1090       return false;
1091     Addr.setReg(ResultReg);
1092     Addr.setOffset(0);
1093   }
1094   return true;
1095 }
1096 
1097 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1098                                            const MachineInstrBuilder &MIB,
1099                                            MachineMemOperand::Flags Flags,
1100                                            unsigned ScaleFactor,
1101                                            MachineMemOperand *MMO) {
1102   int64_t Offset = Addr.getOffset() / ScaleFactor;
1103   // Frame base works a bit differently. Handle it separately.
1104   if (Addr.isFIBase()) {
1105     int FI = Addr.getFI();
1106     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1107     // and alignment should be based on the VT.
1108     MMO = FuncInfo.MF->getMachineMemOperand(
1109         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1110         MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1111     // Now add the rest of the operands.
1112     MIB.addFrameIndex(FI).addImm(Offset);
1113   } else {
1114     assert(Addr.isRegBase() && "Unexpected address kind.");
1115     const MCInstrDesc &II = MIB->getDesc();
1116     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1117     Addr.setReg(
1118       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1119     Addr.setOffsetReg(
1120       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1121     if (Addr.getOffsetReg()) {
1122       assert(Addr.getOffset() == 0 && "Unexpected offset");
1123       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1124                       Addr.getExtendType() == AArch64_AM::SXTX;
1125       MIB.addReg(Addr.getReg());
1126       MIB.addReg(Addr.getOffsetReg());
1127       MIB.addImm(IsSigned);
1128       MIB.addImm(Addr.getShift() != 0);
1129     } else
1130       MIB.addReg(Addr.getReg()).addImm(Offset);
1131   }
1132 
1133   if (MMO)
1134     MIB.addMemOperand(MMO);
1135 }
1136 
1137 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1138                                      const Value *RHS, bool SetFlags,
1139                                      bool WantResult,  bool IsZExt) {
1140   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1141   bool NeedExtend = false;
1142   switch (RetVT.SimpleTy) {
1143   default:
1144     return 0;
1145   case MVT::i1:
1146     NeedExtend = true;
1147     break;
1148   case MVT::i8:
1149     NeedExtend = true;
1150     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1151     break;
1152   case MVT::i16:
1153     NeedExtend = true;
1154     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1155     break;
1156   case MVT::i32:  // fall-through
1157   case MVT::i64:
1158     break;
1159   }
1160   MVT SrcVT = RetVT;
1161   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1162 
1163   // Canonicalize immediates to the RHS first.
1164   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1165     std::swap(LHS, RHS);
1166 
1167   // Canonicalize mul by power of 2 to the RHS.
1168   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1169     if (isMulPowOf2(LHS))
1170       std::swap(LHS, RHS);
1171 
1172   // Canonicalize shift immediate to the RHS.
1173   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1174     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1175       if (isa<ConstantInt>(SI->getOperand(1)))
1176         if (SI->getOpcode() == Instruction::Shl  ||
1177             SI->getOpcode() == Instruction::LShr ||
1178             SI->getOpcode() == Instruction::AShr   )
1179           std::swap(LHS, RHS);
1180 
1181   unsigned LHSReg = getRegForValue(LHS);
1182   if (!LHSReg)
1183     return 0;
1184 
1185   if (NeedExtend)
1186     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1187 
1188   unsigned ResultReg = 0;
1189   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1190     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1191     if (C->isNegative())
1192       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1193                                 WantResult);
1194     else
1195       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1196                                 WantResult);
1197   } else if (const auto *C = dyn_cast<Constant>(RHS))
1198     if (C->isNullValue())
1199       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1200 
1201   if (ResultReg)
1202     return ResultReg;
1203 
1204   // Only extend the RHS within the instruction if there is a valid extend type.
1205   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1206       isValueAvailable(RHS)) {
1207     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1208       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1209         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1210           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1211           if (!RHSReg)
1212             return 0;
1213           return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1214                                C->getZExtValue(), SetFlags, WantResult);
1215         }
1216     unsigned RHSReg = getRegForValue(RHS);
1217     if (!RHSReg)
1218       return 0;
1219     return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1220                          SetFlags, WantResult);
1221   }
1222 
1223   // Check if the mul can be folded into the instruction.
1224   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1225     if (isMulPowOf2(RHS)) {
1226       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1227       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1228 
1229       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1230         if (C->getValue().isPowerOf2())
1231           std::swap(MulLHS, MulRHS);
1232 
1233       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1234       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1235       unsigned RHSReg = getRegForValue(MulLHS);
1236       if (!RHSReg)
1237         return 0;
1238       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1239                                 ShiftVal, SetFlags, WantResult);
1240       if (ResultReg)
1241         return ResultReg;
1242     }
1243   }
1244 
1245   // Check if the shift can be folded into the instruction.
1246   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1247     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1248       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1249         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1250         switch (SI->getOpcode()) {
1251         default: break;
1252         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1253         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1254         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1255         }
1256         uint64_t ShiftVal = C->getZExtValue();
1257         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1258           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1259           if (!RHSReg)
1260             return 0;
1261           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1262                                     ShiftVal, SetFlags, WantResult);
1263           if (ResultReg)
1264             return ResultReg;
1265         }
1266       }
1267     }
1268   }
1269 
1270   unsigned RHSReg = getRegForValue(RHS);
1271   if (!RHSReg)
1272     return 0;
1273 
1274   if (NeedExtend)
1275     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1276 
1277   return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1278 }
1279 
1280 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1281                                         unsigned RHSReg, bool SetFlags,
1282                                         bool WantResult) {
1283   assert(LHSReg && RHSReg && "Invalid register number.");
1284 
1285   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1286       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1287     return 0;
1288 
1289   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1290     return 0;
1291 
1292   static const unsigned OpcTable[2][2][2] = {
1293     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1294       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1295     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1296       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1297   };
1298   bool Is64Bit = RetVT == MVT::i64;
1299   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1300   const TargetRegisterClass *RC =
1301       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1302   unsigned ResultReg;
1303   if (WantResult)
1304     ResultReg = createResultReg(RC);
1305   else
1306     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1307 
1308   const MCInstrDesc &II = TII.get(Opc);
1309   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1310   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1311   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1312       .addReg(LHSReg)
1313       .addReg(RHSReg);
1314   return ResultReg;
1315 }
1316 
1317 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1318                                         uint64_t Imm, bool SetFlags,
1319                                         bool WantResult) {
1320   assert(LHSReg && "Invalid register number.");
1321 
1322   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1323     return 0;
1324 
1325   unsigned ShiftImm;
1326   if (isUInt<12>(Imm))
1327     ShiftImm = 0;
1328   else if ((Imm & 0xfff000) == Imm) {
1329     ShiftImm = 12;
1330     Imm >>= 12;
1331   } else
1332     return 0;
1333 
1334   static const unsigned OpcTable[2][2][2] = {
1335     { { AArch64::SUBWri,  AArch64::SUBXri  },
1336       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1337     { { AArch64::SUBSWri, AArch64::SUBSXri },
1338       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1339   };
1340   bool Is64Bit = RetVT == MVT::i64;
1341   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1342   const TargetRegisterClass *RC;
1343   if (SetFlags)
1344     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1345   else
1346     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1347   unsigned ResultReg;
1348   if (WantResult)
1349     ResultReg = createResultReg(RC);
1350   else
1351     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1352 
1353   const MCInstrDesc &II = TII.get(Opc);
1354   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1355   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1356       .addReg(LHSReg)
1357       .addImm(Imm)
1358       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1359   return ResultReg;
1360 }
1361 
1362 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1363                                         unsigned RHSReg,
1364                                         AArch64_AM::ShiftExtendType ShiftType,
1365                                         uint64_t ShiftImm, bool SetFlags,
1366                                         bool WantResult) {
1367   assert(LHSReg && RHSReg && "Invalid register number.");
1368   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1369          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1370 
1371   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1372     return 0;
1373 
1374   // Don't deal with undefined shifts.
1375   if (ShiftImm >= RetVT.getSizeInBits())
1376     return 0;
1377 
1378   static const unsigned OpcTable[2][2][2] = {
1379     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1380       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1381     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1382       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1383   };
1384   bool Is64Bit = RetVT == MVT::i64;
1385   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1386   const TargetRegisterClass *RC =
1387       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1388   unsigned ResultReg;
1389   if (WantResult)
1390     ResultReg = createResultReg(RC);
1391   else
1392     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1393 
1394   const MCInstrDesc &II = TII.get(Opc);
1395   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1396   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1397   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1398       .addReg(LHSReg)
1399       .addReg(RHSReg)
1400       .addImm(getShifterImm(ShiftType, ShiftImm));
1401   return ResultReg;
1402 }
1403 
1404 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1405                                         unsigned RHSReg,
1406                                         AArch64_AM::ShiftExtendType ExtType,
1407                                         uint64_t ShiftImm, bool SetFlags,
1408                                         bool WantResult) {
1409   assert(LHSReg && RHSReg && "Invalid register number.");
1410   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412 
1413   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414     return 0;
1415 
1416   if (ShiftImm >= 4)
1417     return 0;
1418 
1419   static const unsigned OpcTable[2][2][2] = {
1420     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1421       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1422     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1424   };
1425   bool Is64Bit = RetVT == MVT::i64;
1426   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427   const TargetRegisterClass *RC = nullptr;
1428   if (SetFlags)
1429     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430   else
1431     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432   unsigned ResultReg;
1433   if (WantResult)
1434     ResultReg = createResultReg(RC);
1435   else
1436     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1437 
1438   const MCInstrDesc &II = TII.get(Opc);
1439   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442       .addReg(LHSReg)
1443       .addReg(RHSReg)
1444       .addImm(getArithExtendImm(ExtType, ShiftImm));
1445   return ResultReg;
1446 }
1447 
1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449   Type *Ty = LHS->getType();
1450   EVT EVT = TLI.getValueType(DL, Ty, true);
1451   if (!EVT.isSimple())
1452     return false;
1453   MVT VT = EVT.getSimpleVT();
1454 
1455   switch (VT.SimpleTy) {
1456   default:
1457     return false;
1458   case MVT::i1:
1459   case MVT::i8:
1460   case MVT::i16:
1461   case MVT::i32:
1462   case MVT::i64:
1463     return emitICmp(VT, LHS, RHS, IsZExt);
1464   case MVT::f32:
1465   case MVT::f64:
1466     return emitFCmp(VT, LHS, RHS);
1467   }
1468 }
1469 
1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471                                bool IsZExt) {
1472   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473                  IsZExt) != 0;
1474 }
1475 
1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1477   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1478                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1479 }
1480 
1481 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1482   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1483     return false;
1484 
1485   // Check to see if the 2nd operand is a constant that we can encode directly
1486   // in the compare.
1487   bool UseImm = false;
1488   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1489     if (CFP->isZero() && !CFP->isNegative())
1490       UseImm = true;
1491 
1492   unsigned LHSReg = getRegForValue(LHS);
1493   if (!LHSReg)
1494     return false;
1495 
1496   if (UseImm) {
1497     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1498     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1499         .addReg(LHSReg);
1500     return true;
1501   }
1502 
1503   unsigned RHSReg = getRegForValue(RHS);
1504   if (!RHSReg)
1505     return false;
1506 
1507   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1508   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1509       .addReg(LHSReg)
1510       .addReg(RHSReg);
1511   return true;
1512 }
1513 
1514 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1515                                   bool SetFlags, bool WantResult, bool IsZExt) {
1516   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1517                     IsZExt);
1518 }
1519 
1520 /// This method is a wrapper to simplify add emission.
1521 ///
1522 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1523 /// that fails, then try to materialize the immediate into a register and use
1524 /// emitAddSub_rr instead.
1525 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1526   unsigned ResultReg;
1527   if (Imm < 0)
1528     ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1529   else
1530     ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1531 
1532   if (ResultReg)
1533     return ResultReg;
1534 
1535   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1536   if (!CReg)
1537     return 0;
1538 
1539   ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1540   return ResultReg;
1541 }
1542 
1543 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1544                                   bool SetFlags, bool WantResult, bool IsZExt) {
1545   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1546                     IsZExt);
1547 }
1548 
1549 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1550                                       unsigned RHSReg, bool WantResult) {
1551   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1552                        /*SetFlags=*/true, WantResult);
1553 }
1554 
1555 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1556                                       unsigned RHSReg,
1557                                       AArch64_AM::ShiftExtendType ShiftType,
1558                                       uint64_t ShiftImm, bool WantResult) {
1559   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1560                        ShiftImm, /*SetFlags=*/true, WantResult);
1561 }
1562 
1563 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1564                                         const Value *LHS, const Value *RHS) {
1565   // Canonicalize immediates to the RHS first.
1566   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1567     std::swap(LHS, RHS);
1568 
1569   // Canonicalize mul by power-of-2 to the RHS.
1570   if (LHS->hasOneUse() && isValueAvailable(LHS))
1571     if (isMulPowOf2(LHS))
1572       std::swap(LHS, RHS);
1573 
1574   // Canonicalize shift immediate to the RHS.
1575   if (LHS->hasOneUse() && isValueAvailable(LHS))
1576     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1577       if (isa<ConstantInt>(SI->getOperand(1)))
1578         std::swap(LHS, RHS);
1579 
1580   unsigned LHSReg = getRegForValue(LHS);
1581   if (!LHSReg)
1582     return 0;
1583 
1584   unsigned ResultReg = 0;
1585   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1586     uint64_t Imm = C->getZExtValue();
1587     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1588   }
1589   if (ResultReg)
1590     return ResultReg;
1591 
1592   // Check if the mul can be folded into the instruction.
1593   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1594     if (isMulPowOf2(RHS)) {
1595       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1596       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1597 
1598       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1599         if (C->getValue().isPowerOf2())
1600           std::swap(MulLHS, MulRHS);
1601 
1602       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1603       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1604 
1605       unsigned RHSReg = getRegForValue(MulLHS);
1606       if (!RHSReg)
1607         return 0;
1608       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1609       if (ResultReg)
1610         return ResultReg;
1611     }
1612   }
1613 
1614   // Check if the shift can be folded into the instruction.
1615   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1616     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1617       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1618         uint64_t ShiftVal = C->getZExtValue();
1619         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1620         if (!RHSReg)
1621           return 0;
1622         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1623         if (ResultReg)
1624           return ResultReg;
1625       }
1626   }
1627 
1628   unsigned RHSReg = getRegForValue(RHS);
1629   if (!RHSReg)
1630     return 0;
1631 
1632   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1633   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1634   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1635     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1636     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1637   }
1638   return ResultReg;
1639 }
1640 
1641 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1642                                            unsigned LHSReg, uint64_t Imm) {
1643   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1644                 "ISD nodes are not consecutive!");
1645   static const unsigned OpcTable[3][2] = {
1646     { AArch64::ANDWri, AArch64::ANDXri },
1647     { AArch64::ORRWri, AArch64::ORRXri },
1648     { AArch64::EORWri, AArch64::EORXri }
1649   };
1650   const TargetRegisterClass *RC;
1651   unsigned Opc;
1652   unsigned RegSize;
1653   switch (RetVT.SimpleTy) {
1654   default:
1655     return 0;
1656   case MVT::i1:
1657   case MVT::i8:
1658   case MVT::i16:
1659   case MVT::i32: {
1660     unsigned Idx = ISDOpc - ISD::AND;
1661     Opc = OpcTable[Idx][0];
1662     RC = &AArch64::GPR32spRegClass;
1663     RegSize = 32;
1664     break;
1665   }
1666   case MVT::i64:
1667     Opc = OpcTable[ISDOpc - ISD::AND][1];
1668     RC = &AArch64::GPR64spRegClass;
1669     RegSize = 64;
1670     break;
1671   }
1672 
1673   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1674     return 0;
1675 
1676   unsigned ResultReg =
1677       fastEmitInst_ri(Opc, RC, LHSReg,
1678                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1679   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1680     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1681     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1682   }
1683   return ResultReg;
1684 }
1685 
1686 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1687                                            unsigned LHSReg, unsigned RHSReg,
1688                                            uint64_t ShiftImm) {
1689   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1690                 "ISD nodes are not consecutive!");
1691   static const unsigned OpcTable[3][2] = {
1692     { AArch64::ANDWrs, AArch64::ANDXrs },
1693     { AArch64::ORRWrs, AArch64::ORRXrs },
1694     { AArch64::EORWrs, AArch64::EORXrs }
1695   };
1696 
1697   // Don't deal with undefined shifts.
1698   if (ShiftImm >= RetVT.getSizeInBits())
1699     return 0;
1700 
1701   const TargetRegisterClass *RC;
1702   unsigned Opc;
1703   switch (RetVT.SimpleTy) {
1704   default:
1705     return 0;
1706   case MVT::i1:
1707   case MVT::i8:
1708   case MVT::i16:
1709   case MVT::i32:
1710     Opc = OpcTable[ISDOpc - ISD::AND][0];
1711     RC = &AArch64::GPR32RegClass;
1712     break;
1713   case MVT::i64:
1714     Opc = OpcTable[ISDOpc - ISD::AND][1];
1715     RC = &AArch64::GPR64RegClass;
1716     break;
1717   }
1718   unsigned ResultReg =
1719       fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1720                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1721   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1722     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1723     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1724   }
1725   return ResultReg;
1726 }
1727 
1728 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1729                                      uint64_t Imm) {
1730   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1731 }
1732 
1733 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1734                                    bool WantZExt, MachineMemOperand *MMO) {
1735   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1736     return 0;
1737 
1738   // Simplify this down to something we can handle.
1739   if (!simplifyAddress(Addr, VT))
1740     return 0;
1741 
1742   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1743   if (!ScaleFactor)
1744     llvm_unreachable("Unexpected value type.");
1745 
1746   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1747   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1748   bool UseScaled = true;
1749   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1750     UseScaled = false;
1751     ScaleFactor = 1;
1752   }
1753 
1754   static const unsigned GPOpcTable[2][8][4] = {
1755     // Sign-extend.
1756     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1757         AArch64::LDURXi  },
1758       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1759         AArch64::LDURXi  },
1760       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1761         AArch64::LDRXui  },
1762       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1763         AArch64::LDRXui  },
1764       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1765         AArch64::LDRXroX },
1766       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1767         AArch64::LDRXroX },
1768       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1769         AArch64::LDRXroW },
1770       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1771         AArch64::LDRXroW }
1772     },
1773     // Zero-extend.
1774     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1775         AArch64::LDURXi  },
1776       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1777         AArch64::LDURXi  },
1778       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1779         AArch64::LDRXui  },
1780       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1781         AArch64::LDRXui  },
1782       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1783         AArch64::LDRXroX },
1784       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1785         AArch64::LDRXroX },
1786       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1787         AArch64::LDRXroW },
1788       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1789         AArch64::LDRXroW }
1790     }
1791   };
1792 
1793   static const unsigned FPOpcTable[4][2] = {
1794     { AArch64::LDURSi,  AArch64::LDURDi  },
1795     { AArch64::LDRSui,  AArch64::LDRDui  },
1796     { AArch64::LDRSroX, AArch64::LDRDroX },
1797     { AArch64::LDRSroW, AArch64::LDRDroW }
1798   };
1799 
1800   unsigned Opc;
1801   const TargetRegisterClass *RC;
1802   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1803                       Addr.getOffsetReg();
1804   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1805   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1806       Addr.getExtendType() == AArch64_AM::SXTW)
1807     Idx++;
1808 
1809   bool IsRet64Bit = RetVT == MVT::i64;
1810   switch (VT.SimpleTy) {
1811   default:
1812     llvm_unreachable("Unexpected value type.");
1813   case MVT::i1: // Intentional fall-through.
1814   case MVT::i8:
1815     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1816     RC = (IsRet64Bit && !WantZExt) ?
1817              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1818     break;
1819   case MVT::i16:
1820     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1821     RC = (IsRet64Bit && !WantZExt) ?
1822              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1823     break;
1824   case MVT::i32:
1825     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1826     RC = (IsRet64Bit && !WantZExt) ?
1827              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1828     break;
1829   case MVT::i64:
1830     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1831     RC = &AArch64::GPR64RegClass;
1832     break;
1833   case MVT::f32:
1834     Opc = FPOpcTable[Idx][0];
1835     RC = &AArch64::FPR32RegClass;
1836     break;
1837   case MVT::f64:
1838     Opc = FPOpcTable[Idx][1];
1839     RC = &AArch64::FPR64RegClass;
1840     break;
1841   }
1842 
1843   // Create the base instruction, then add the operands.
1844   unsigned ResultReg = createResultReg(RC);
1845   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1846                                     TII.get(Opc), ResultReg);
1847   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1848 
1849   // Loading an i1 requires special handling.
1850   if (VT == MVT::i1) {
1851     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1852     assert(ANDReg && "Unexpected AND instruction emission failure.");
1853     ResultReg = ANDReg;
1854   }
1855 
1856   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1857   // the 32bit reg to a 64bit reg.
1858   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1859     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1860     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1862         .addImm(0)
1863         .addReg(ResultReg, getKillRegState(true))
1864         .addImm(AArch64::sub_32);
1865     ResultReg = Reg64;
1866   }
1867   return ResultReg;
1868 }
1869 
1870 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1871   MVT VT;
1872   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1873     return false;
1874 
1875   if (VT.isVector())
1876     return selectOperator(I, I->getOpcode());
1877 
1878   unsigned ResultReg;
1879   switch (I->getOpcode()) {
1880   default:
1881     llvm_unreachable("Unexpected instruction.");
1882   case Instruction::Add:
1883     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1884     break;
1885   case Instruction::Sub:
1886     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1887     break;
1888   }
1889   if (!ResultReg)
1890     return false;
1891 
1892   updateValueMap(I, ResultReg);
1893   return true;
1894 }
1895 
1896 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1897   MVT VT;
1898   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1899     return false;
1900 
1901   if (VT.isVector())
1902     return selectOperator(I, I->getOpcode());
1903 
1904   unsigned ResultReg;
1905   switch (I->getOpcode()) {
1906   default:
1907     llvm_unreachable("Unexpected instruction.");
1908   case Instruction::And:
1909     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1910     break;
1911   case Instruction::Or:
1912     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1913     break;
1914   case Instruction::Xor:
1915     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1916     break;
1917   }
1918   if (!ResultReg)
1919     return false;
1920 
1921   updateValueMap(I, ResultReg);
1922   return true;
1923 }
1924 
1925 bool AArch64FastISel::selectLoad(const Instruction *I) {
1926   MVT VT;
1927   // Verify we have a legal type before going any further.  Currently, we handle
1928   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1929   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1930   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1931       cast<LoadInst>(I)->isAtomic())
1932     return false;
1933 
1934   const Value *SV = I->getOperand(0);
1935   if (TLI.supportSwiftError()) {
1936     // Swifterror values can come from either a function parameter with
1937     // swifterror attribute or an alloca with swifterror attribute.
1938     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1939       if (Arg->hasSwiftErrorAttr())
1940         return false;
1941     }
1942 
1943     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1944       if (Alloca->isSwiftError())
1945         return false;
1946     }
1947   }
1948 
1949   // See if we can handle this address.
1950   Address Addr;
1951   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1952     return false;
1953 
1954   // Fold the following sign-/zero-extend into the load instruction.
1955   bool WantZExt = true;
1956   MVT RetVT = VT;
1957   const Value *IntExtVal = nullptr;
1958   if (I->hasOneUse()) {
1959     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1960       if (isTypeSupported(ZE->getType(), RetVT))
1961         IntExtVal = ZE;
1962       else
1963         RetVT = VT;
1964     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1965       if (isTypeSupported(SE->getType(), RetVT))
1966         IntExtVal = SE;
1967       else
1968         RetVT = VT;
1969       WantZExt = false;
1970     }
1971   }
1972 
1973   unsigned ResultReg =
1974       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1975   if (!ResultReg)
1976     return false;
1977 
1978   // There are a few different cases we have to handle, because the load or the
1979   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1980   // SelectionDAG. There is also an ordering issue when both instructions are in
1981   // different basic blocks.
1982   // 1.) The load instruction is selected by FastISel, but the integer extend
1983   //     not. This usually happens when the integer extend is in a different
1984   //     basic block and SelectionDAG took over for that basic block.
1985   // 2.) The load instruction is selected before the integer extend. This only
1986   //     happens when the integer extend is in a different basic block.
1987   // 3.) The load instruction is selected by SelectionDAG and the integer extend
1988   //     by FastISel. This happens if there are instructions between the load
1989   //     and the integer extend that couldn't be selected by FastISel.
1990   if (IntExtVal) {
1991     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1992     // could select it. Emit a copy to subreg if necessary. FastISel will remove
1993     // it when it selects the integer extend.
1994     unsigned Reg = lookUpRegForValue(IntExtVal);
1995     auto *MI = MRI.getUniqueVRegDef(Reg);
1996     if (!MI) {
1997       if (RetVT == MVT::i64 && VT <= MVT::i32) {
1998         if (WantZExt) {
1999           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2000           MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2001           ResultReg = std::prev(I)->getOperand(0).getReg();
2002           removeDeadCode(I, std::next(I));
2003         } else
2004           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2005                                                  AArch64::sub_32);
2006       }
2007       updateValueMap(I, ResultReg);
2008       return true;
2009     }
2010 
2011     // The integer extend has already been emitted - delete all the instructions
2012     // that have been emitted by the integer extend lowering code and use the
2013     // result from the load instruction directly.
2014     while (MI) {
2015       Reg = 0;
2016       for (auto &Opnd : MI->uses()) {
2017         if (Opnd.isReg()) {
2018           Reg = Opnd.getReg();
2019           break;
2020         }
2021       }
2022       MachineBasicBlock::iterator I(MI);
2023       removeDeadCode(I, std::next(I));
2024       MI = nullptr;
2025       if (Reg)
2026         MI = MRI.getUniqueVRegDef(Reg);
2027     }
2028     updateValueMap(IntExtVal, ResultReg);
2029     return true;
2030   }
2031 
2032   updateValueMap(I, ResultReg);
2033   return true;
2034 }
2035 
2036 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2037                                        unsigned AddrReg,
2038                                        MachineMemOperand *MMO) {
2039   unsigned Opc;
2040   switch (VT.SimpleTy) {
2041   default: return false;
2042   case MVT::i8:  Opc = AArch64::STLRB; break;
2043   case MVT::i16: Opc = AArch64::STLRH; break;
2044   case MVT::i32: Opc = AArch64::STLRW; break;
2045   case MVT::i64: Opc = AArch64::STLRX; break;
2046   }
2047 
2048   const MCInstrDesc &II = TII.get(Opc);
2049   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2050   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2051   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2052       .addReg(SrcReg)
2053       .addReg(AddrReg)
2054       .addMemOperand(MMO);
2055   return true;
2056 }
2057 
2058 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2059                                 MachineMemOperand *MMO) {
2060   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2061     return false;
2062 
2063   // Simplify this down to something we can handle.
2064   if (!simplifyAddress(Addr, VT))
2065     return false;
2066 
2067   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2068   if (!ScaleFactor)
2069     llvm_unreachable("Unexpected value type.");
2070 
2071   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2072   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2073   bool UseScaled = true;
2074   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2075     UseScaled = false;
2076     ScaleFactor = 1;
2077   }
2078 
2079   static const unsigned OpcTable[4][6] = {
2080     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2081       AArch64::STURSi,   AArch64::STURDi },
2082     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2083       AArch64::STRSui,   AArch64::STRDui },
2084     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2085       AArch64::STRSroX,  AArch64::STRDroX },
2086     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2087       AArch64::STRSroW,  AArch64::STRDroW }
2088   };
2089 
2090   unsigned Opc;
2091   bool VTIsi1 = false;
2092   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2093                       Addr.getOffsetReg();
2094   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2095   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2096       Addr.getExtendType() == AArch64_AM::SXTW)
2097     Idx++;
2098 
2099   switch (VT.SimpleTy) {
2100   default: llvm_unreachable("Unexpected value type.");
2101   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2102   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2103   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2104   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2105   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2106   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2107   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2108   }
2109 
2110   // Storing an i1 requires special handling.
2111   if (VTIsi1 && SrcReg != AArch64::WZR) {
2112     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2113     assert(ANDReg && "Unexpected AND instruction emission failure.");
2114     SrcReg = ANDReg;
2115   }
2116   // Create the base instruction, then add the operands.
2117   const MCInstrDesc &II = TII.get(Opc);
2118   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2119   MachineInstrBuilder MIB =
2120       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2121   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2122 
2123   return true;
2124 }
2125 
2126 bool AArch64FastISel::selectStore(const Instruction *I) {
2127   MVT VT;
2128   const Value *Op0 = I->getOperand(0);
2129   // Verify we have a legal type before going any further.  Currently, we handle
2130   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2131   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2132   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2133     return false;
2134 
2135   const Value *PtrV = I->getOperand(1);
2136   if (TLI.supportSwiftError()) {
2137     // Swifterror values can come from either a function parameter with
2138     // swifterror attribute or an alloca with swifterror attribute.
2139     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2140       if (Arg->hasSwiftErrorAttr())
2141         return false;
2142     }
2143 
2144     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2145       if (Alloca->isSwiftError())
2146         return false;
2147     }
2148   }
2149 
2150   // Get the value to be stored into a register. Use the zero register directly
2151   // when possible to avoid an unnecessary copy and a wasted register.
2152   unsigned SrcReg = 0;
2153   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2154     if (CI->isZero())
2155       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2156   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2157     if (CF->isZero() && !CF->isNegative()) {
2158       VT = MVT::getIntegerVT(VT.getSizeInBits());
2159       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2160     }
2161   }
2162 
2163   if (!SrcReg)
2164     SrcReg = getRegForValue(Op0);
2165 
2166   if (!SrcReg)
2167     return false;
2168 
2169   auto *SI = cast<StoreInst>(I);
2170 
2171   // Try to emit a STLR for seq_cst/release.
2172   if (SI->isAtomic()) {
2173     AtomicOrdering Ord = SI->getOrdering();
2174     // The non-atomic instructions are sufficient for relaxed stores.
2175     if (isReleaseOrStronger(Ord)) {
2176       // The STLR addressing mode only supports a base reg; pass that directly.
2177       unsigned AddrReg = getRegForValue(PtrV);
2178       return emitStoreRelease(VT, SrcReg, AddrReg,
2179                               createMachineMemOperandFor(I));
2180     }
2181   }
2182 
2183   // See if we can handle this address.
2184   Address Addr;
2185   if (!computeAddress(PtrV, Addr, Op0->getType()))
2186     return false;
2187 
2188   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2189     return false;
2190   return true;
2191 }
2192 
2193 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2194   switch (Pred) {
2195   case CmpInst::FCMP_ONE:
2196   case CmpInst::FCMP_UEQ:
2197   default:
2198     // AL is our "false" for now. The other two need more compares.
2199     return AArch64CC::AL;
2200   case CmpInst::ICMP_EQ:
2201   case CmpInst::FCMP_OEQ:
2202     return AArch64CC::EQ;
2203   case CmpInst::ICMP_SGT:
2204   case CmpInst::FCMP_OGT:
2205     return AArch64CC::GT;
2206   case CmpInst::ICMP_SGE:
2207   case CmpInst::FCMP_OGE:
2208     return AArch64CC::GE;
2209   case CmpInst::ICMP_UGT:
2210   case CmpInst::FCMP_UGT:
2211     return AArch64CC::HI;
2212   case CmpInst::FCMP_OLT:
2213     return AArch64CC::MI;
2214   case CmpInst::ICMP_ULE:
2215   case CmpInst::FCMP_OLE:
2216     return AArch64CC::LS;
2217   case CmpInst::FCMP_ORD:
2218     return AArch64CC::VC;
2219   case CmpInst::FCMP_UNO:
2220     return AArch64CC::VS;
2221   case CmpInst::FCMP_UGE:
2222     return AArch64CC::PL;
2223   case CmpInst::ICMP_SLT:
2224   case CmpInst::FCMP_ULT:
2225     return AArch64CC::LT;
2226   case CmpInst::ICMP_SLE:
2227   case CmpInst::FCMP_ULE:
2228     return AArch64CC::LE;
2229   case CmpInst::FCMP_UNE:
2230   case CmpInst::ICMP_NE:
2231     return AArch64CC::NE;
2232   case CmpInst::ICMP_UGE:
2233     return AArch64CC::HS;
2234   case CmpInst::ICMP_ULT:
2235     return AArch64CC::LO;
2236   }
2237 }
2238 
2239 /// Try to emit a combined compare-and-branch instruction.
2240 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2241   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2242   // will not be produced, as they are conditional branch instructions that do
2243   // not set flags.
2244   if (FuncInfo.MF->getFunction().hasFnAttribute(
2245           Attribute::SpeculativeLoadHardening))
2246     return false;
2247 
2248   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2249   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2250   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2251 
2252   const Value *LHS = CI->getOperand(0);
2253   const Value *RHS = CI->getOperand(1);
2254 
2255   MVT VT;
2256   if (!isTypeSupported(LHS->getType(), VT))
2257     return false;
2258 
2259   unsigned BW = VT.getSizeInBits();
2260   if (BW > 64)
2261     return false;
2262 
2263   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2264   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2265 
2266   // Try to take advantage of fallthrough opportunities.
2267   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2268     std::swap(TBB, FBB);
2269     Predicate = CmpInst::getInversePredicate(Predicate);
2270   }
2271 
2272   int TestBit = -1;
2273   bool IsCmpNE;
2274   switch (Predicate) {
2275   default:
2276     return false;
2277   case CmpInst::ICMP_EQ:
2278   case CmpInst::ICMP_NE:
2279     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2280       std::swap(LHS, RHS);
2281 
2282     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2283       return false;
2284 
2285     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2286       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2287         const Value *AndLHS = AI->getOperand(0);
2288         const Value *AndRHS = AI->getOperand(1);
2289 
2290         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2291           if (C->getValue().isPowerOf2())
2292             std::swap(AndLHS, AndRHS);
2293 
2294         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2295           if (C->getValue().isPowerOf2()) {
2296             TestBit = C->getValue().logBase2();
2297             LHS = AndLHS;
2298           }
2299       }
2300 
2301     if (VT == MVT::i1)
2302       TestBit = 0;
2303 
2304     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2305     break;
2306   case CmpInst::ICMP_SLT:
2307   case CmpInst::ICMP_SGE:
2308     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2309       return false;
2310 
2311     TestBit = BW - 1;
2312     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2313     break;
2314   case CmpInst::ICMP_SGT:
2315   case CmpInst::ICMP_SLE:
2316     if (!isa<ConstantInt>(RHS))
2317       return false;
2318 
2319     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2320       return false;
2321 
2322     TestBit = BW - 1;
2323     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2324     break;
2325   } // end switch
2326 
2327   static const unsigned OpcTable[2][2][2] = {
2328     { {AArch64::CBZW,  AArch64::CBZX },
2329       {AArch64::CBNZW, AArch64::CBNZX} },
2330     { {AArch64::TBZW,  AArch64::TBZX },
2331       {AArch64::TBNZW, AArch64::TBNZX} }
2332   };
2333 
2334   bool IsBitTest = TestBit != -1;
2335   bool Is64Bit = BW == 64;
2336   if (TestBit < 32 && TestBit >= 0)
2337     Is64Bit = false;
2338 
2339   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2340   const MCInstrDesc &II = TII.get(Opc);
2341 
2342   unsigned SrcReg = getRegForValue(LHS);
2343   if (!SrcReg)
2344     return false;
2345 
2346   if (BW == 64 && !Is64Bit)
2347     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2348 
2349   if ((BW < 32) && !IsBitTest)
2350     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2351 
2352   // Emit the combined compare and branch instruction.
2353   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2354   MachineInstrBuilder MIB =
2355       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2356           .addReg(SrcReg);
2357   if (IsBitTest)
2358     MIB.addImm(TestBit);
2359   MIB.addMBB(TBB);
2360 
2361   finishCondBranch(BI->getParent(), TBB, FBB);
2362   return true;
2363 }
2364 
2365 bool AArch64FastISel::selectBranch(const Instruction *I) {
2366   const BranchInst *BI = cast<BranchInst>(I);
2367   if (BI->isUnconditional()) {
2368     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2369     fastEmitBranch(MSucc, BI->getDebugLoc());
2370     return true;
2371   }
2372 
2373   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2374   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2375 
2376   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2377     if (CI->hasOneUse() && isValueAvailable(CI)) {
2378       // Try to optimize or fold the cmp.
2379       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2380       switch (Predicate) {
2381       default:
2382         break;
2383       case CmpInst::FCMP_FALSE:
2384         fastEmitBranch(FBB, DbgLoc);
2385         return true;
2386       case CmpInst::FCMP_TRUE:
2387         fastEmitBranch(TBB, DbgLoc);
2388         return true;
2389       }
2390 
2391       // Try to emit a combined compare-and-branch first.
2392       if (emitCompareAndBranch(BI))
2393         return true;
2394 
2395       // Try to take advantage of fallthrough opportunities.
2396       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2397         std::swap(TBB, FBB);
2398         Predicate = CmpInst::getInversePredicate(Predicate);
2399       }
2400 
2401       // Emit the cmp.
2402       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2403         return false;
2404 
2405       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2406       // instruction.
2407       AArch64CC::CondCode CC = getCompareCC(Predicate);
2408       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2409       switch (Predicate) {
2410       default:
2411         break;
2412       case CmpInst::FCMP_UEQ:
2413         ExtraCC = AArch64CC::EQ;
2414         CC = AArch64CC::VS;
2415         break;
2416       case CmpInst::FCMP_ONE:
2417         ExtraCC = AArch64CC::MI;
2418         CC = AArch64CC::GT;
2419         break;
2420       }
2421       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2422 
2423       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2424       if (ExtraCC != AArch64CC::AL) {
2425         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2426             .addImm(ExtraCC)
2427             .addMBB(TBB);
2428       }
2429 
2430       // Emit the branch.
2431       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2432           .addImm(CC)
2433           .addMBB(TBB);
2434 
2435       finishCondBranch(BI->getParent(), TBB, FBB);
2436       return true;
2437     }
2438   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2439     uint64_t Imm = CI->getZExtValue();
2440     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2441     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2442         .addMBB(Target);
2443 
2444     // Obtain the branch probability and add the target to the successor list.
2445     if (FuncInfo.BPI) {
2446       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2447           BI->getParent(), Target->getBasicBlock());
2448       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2449     } else
2450       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2451     return true;
2452   } else {
2453     AArch64CC::CondCode CC = AArch64CC::NE;
2454     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2455       // Fake request the condition, otherwise the intrinsic might be completely
2456       // optimized away.
2457       unsigned CondReg = getRegForValue(BI->getCondition());
2458       if (!CondReg)
2459         return false;
2460 
2461       // Emit the branch.
2462       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2463         .addImm(CC)
2464         .addMBB(TBB);
2465 
2466       finishCondBranch(BI->getParent(), TBB, FBB);
2467       return true;
2468     }
2469   }
2470 
2471   unsigned CondReg = getRegForValue(BI->getCondition());
2472   if (CondReg == 0)
2473     return false;
2474 
2475   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2476   unsigned Opcode = AArch64::TBNZW;
2477   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2478     std::swap(TBB, FBB);
2479     Opcode = AArch64::TBZW;
2480   }
2481 
2482   const MCInstrDesc &II = TII.get(Opcode);
2483   unsigned ConstrainedCondReg
2484     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2485   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2486       .addReg(ConstrainedCondReg)
2487       .addImm(0)
2488       .addMBB(TBB);
2489 
2490   finishCondBranch(BI->getParent(), TBB, FBB);
2491   return true;
2492 }
2493 
2494 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2495   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2496   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2497   if (AddrReg == 0)
2498     return false;
2499 
2500   // Emit the indirect branch.
2501   const MCInstrDesc &II = TII.get(AArch64::BR);
2502   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2503   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2504 
2505   // Make sure the CFG is up-to-date.
2506   for (auto *Succ : BI->successors())
2507     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2508 
2509   return true;
2510 }
2511 
2512 bool AArch64FastISel::selectCmp(const Instruction *I) {
2513   const CmpInst *CI = cast<CmpInst>(I);
2514 
2515   // Vectors of i1 are weird: bail out.
2516   if (CI->getType()->isVectorTy())
2517     return false;
2518 
2519   // Try to optimize or fold the cmp.
2520   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2521   unsigned ResultReg = 0;
2522   switch (Predicate) {
2523   default:
2524     break;
2525   case CmpInst::FCMP_FALSE:
2526     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2527     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2528             TII.get(TargetOpcode::COPY), ResultReg)
2529         .addReg(AArch64::WZR, getKillRegState(true));
2530     break;
2531   case CmpInst::FCMP_TRUE:
2532     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2533     break;
2534   }
2535 
2536   if (ResultReg) {
2537     updateValueMap(I, ResultReg);
2538     return true;
2539   }
2540 
2541   // Emit the cmp.
2542   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2543     return false;
2544 
2545   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546 
2547   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2548   // condition codes are inverted, because they are used by CSINC.
2549   static unsigned CondCodeTable[2][2] = {
2550     { AArch64CC::NE, AArch64CC::VC },
2551     { AArch64CC::PL, AArch64CC::LE }
2552   };
2553   unsigned *CondCodes = nullptr;
2554   switch (Predicate) {
2555   default:
2556     break;
2557   case CmpInst::FCMP_UEQ:
2558     CondCodes = &CondCodeTable[0][0];
2559     break;
2560   case CmpInst::FCMP_ONE:
2561     CondCodes = &CondCodeTable[1][0];
2562     break;
2563   }
2564 
2565   if (CondCodes) {
2566     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2567     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2568             TmpReg1)
2569         .addReg(AArch64::WZR, getKillRegState(true))
2570         .addReg(AArch64::WZR, getKillRegState(true))
2571         .addImm(CondCodes[0]);
2572     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2573             ResultReg)
2574         .addReg(TmpReg1, getKillRegState(true))
2575         .addReg(AArch64::WZR, getKillRegState(true))
2576         .addImm(CondCodes[1]);
2577 
2578     updateValueMap(I, ResultReg);
2579     return true;
2580   }
2581 
2582   // Now set a register based on the comparison.
2583   AArch64CC::CondCode CC = getCompareCC(Predicate);
2584   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2585   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2586   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587           ResultReg)
2588       .addReg(AArch64::WZR, getKillRegState(true))
2589       .addReg(AArch64::WZR, getKillRegState(true))
2590       .addImm(invertedCC);
2591 
2592   updateValueMap(I, ResultReg);
2593   return true;
2594 }
2595 
2596 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2597 /// value.
2598 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2599   if (!SI->getType()->isIntegerTy(1))
2600     return false;
2601 
2602   const Value *Src1Val, *Src2Val;
2603   unsigned Opc = 0;
2604   bool NeedExtraOp = false;
2605   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2606     if (CI->isOne()) {
2607       Src1Val = SI->getCondition();
2608       Src2Val = SI->getFalseValue();
2609       Opc = AArch64::ORRWrr;
2610     } else {
2611       assert(CI->isZero());
2612       Src1Val = SI->getFalseValue();
2613       Src2Val = SI->getCondition();
2614       Opc = AArch64::BICWrr;
2615     }
2616   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2617     if (CI->isOne()) {
2618       Src1Val = SI->getCondition();
2619       Src2Val = SI->getTrueValue();
2620       Opc = AArch64::ORRWrr;
2621       NeedExtraOp = true;
2622     } else {
2623       assert(CI->isZero());
2624       Src1Val = SI->getCondition();
2625       Src2Val = SI->getTrueValue();
2626       Opc = AArch64::ANDWrr;
2627     }
2628   }
2629 
2630   if (!Opc)
2631     return false;
2632 
2633   unsigned Src1Reg = getRegForValue(Src1Val);
2634   if (!Src1Reg)
2635     return false;
2636 
2637   unsigned Src2Reg = getRegForValue(Src2Val);
2638   if (!Src2Reg)
2639     return false;
2640 
2641   if (NeedExtraOp)
2642     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2643 
2644   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2645                                        Src2Reg);
2646   updateValueMap(SI, ResultReg);
2647   return true;
2648 }
2649 
2650 bool AArch64FastISel::selectSelect(const Instruction *I) {
2651   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2652   MVT VT;
2653   if (!isTypeSupported(I->getType(), VT))
2654     return false;
2655 
2656   unsigned Opc;
2657   const TargetRegisterClass *RC;
2658   switch (VT.SimpleTy) {
2659   default:
2660     return false;
2661   case MVT::i1:
2662   case MVT::i8:
2663   case MVT::i16:
2664   case MVT::i32:
2665     Opc = AArch64::CSELWr;
2666     RC = &AArch64::GPR32RegClass;
2667     break;
2668   case MVT::i64:
2669     Opc = AArch64::CSELXr;
2670     RC = &AArch64::GPR64RegClass;
2671     break;
2672   case MVT::f32:
2673     Opc = AArch64::FCSELSrrr;
2674     RC = &AArch64::FPR32RegClass;
2675     break;
2676   case MVT::f64:
2677     Opc = AArch64::FCSELDrrr;
2678     RC = &AArch64::FPR64RegClass;
2679     break;
2680   }
2681 
2682   const SelectInst *SI = cast<SelectInst>(I);
2683   const Value *Cond = SI->getCondition();
2684   AArch64CC::CondCode CC = AArch64CC::NE;
2685   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2686 
2687   if (optimizeSelect(SI))
2688     return true;
2689 
2690   // Try to pickup the flags, so we don't have to emit another compare.
2691   if (foldXALUIntrinsic(CC, I, Cond)) {
2692     // Fake request the condition to force emission of the XALU intrinsic.
2693     unsigned CondReg = getRegForValue(Cond);
2694     if (!CondReg)
2695       return false;
2696   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2697              isValueAvailable(Cond)) {
2698     const auto *Cmp = cast<CmpInst>(Cond);
2699     // Try to optimize or fold the cmp.
2700     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2701     const Value *FoldSelect = nullptr;
2702     switch (Predicate) {
2703     default:
2704       break;
2705     case CmpInst::FCMP_FALSE:
2706       FoldSelect = SI->getFalseValue();
2707       break;
2708     case CmpInst::FCMP_TRUE:
2709       FoldSelect = SI->getTrueValue();
2710       break;
2711     }
2712 
2713     if (FoldSelect) {
2714       unsigned SrcReg = getRegForValue(FoldSelect);
2715       if (!SrcReg)
2716         return false;
2717 
2718       updateValueMap(I, SrcReg);
2719       return true;
2720     }
2721 
2722     // Emit the cmp.
2723     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2724       return false;
2725 
2726     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2727     CC = getCompareCC(Predicate);
2728     switch (Predicate) {
2729     default:
2730       break;
2731     case CmpInst::FCMP_UEQ:
2732       ExtraCC = AArch64CC::EQ;
2733       CC = AArch64CC::VS;
2734       break;
2735     case CmpInst::FCMP_ONE:
2736       ExtraCC = AArch64CC::MI;
2737       CC = AArch64CC::GT;
2738       break;
2739     }
2740     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2741   } else {
2742     unsigned CondReg = getRegForValue(Cond);
2743     if (!CondReg)
2744       return false;
2745 
2746     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2747     CondReg = constrainOperandRegClass(II, CondReg, 1);
2748 
2749     // Emit a TST instruction (ANDS wzr, reg, #imm).
2750     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2751             AArch64::WZR)
2752         .addReg(CondReg)
2753         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2754   }
2755 
2756   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2757   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2758 
2759   if (!Src1Reg || !Src2Reg)
2760     return false;
2761 
2762   if (ExtraCC != AArch64CC::AL)
2763     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2764 
2765   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2766   updateValueMap(I, ResultReg);
2767   return true;
2768 }
2769 
2770 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2771   Value *V = I->getOperand(0);
2772   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2773     return false;
2774 
2775   unsigned Op = getRegForValue(V);
2776   if (Op == 0)
2777     return false;
2778 
2779   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2780   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2781           ResultReg).addReg(Op);
2782   updateValueMap(I, ResultReg);
2783   return true;
2784 }
2785 
2786 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2787   Value *V = I->getOperand(0);
2788   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2789     return false;
2790 
2791   unsigned Op = getRegForValue(V);
2792   if (Op == 0)
2793     return false;
2794 
2795   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2796   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2797           ResultReg).addReg(Op);
2798   updateValueMap(I, ResultReg);
2799   return true;
2800 }
2801 
2802 // FPToUI and FPToSI
2803 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2804   MVT DestVT;
2805   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2806     return false;
2807 
2808   unsigned SrcReg = getRegForValue(I->getOperand(0));
2809   if (SrcReg == 0)
2810     return false;
2811 
2812   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2813   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2814     return false;
2815 
2816   unsigned Opc;
2817   if (SrcVT == MVT::f64) {
2818     if (Signed)
2819       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2820     else
2821       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2822   } else {
2823     if (Signed)
2824       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2825     else
2826       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2827   }
2828   unsigned ResultReg = createResultReg(
2829       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2830   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2831       .addReg(SrcReg);
2832   updateValueMap(I, ResultReg);
2833   return true;
2834 }
2835 
2836 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2837   MVT DestVT;
2838   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2839     return false;
2840   // Let regular ISEL handle FP16
2841   if (DestVT == MVT::f16)
2842     return false;
2843 
2844   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2845          "Unexpected value type.");
2846 
2847   unsigned SrcReg = getRegForValue(I->getOperand(0));
2848   if (!SrcReg)
2849     return false;
2850 
2851   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2852 
2853   // Handle sign-extension.
2854   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2855     SrcReg =
2856         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2857     if (!SrcReg)
2858       return false;
2859   }
2860 
2861   unsigned Opc;
2862   if (SrcVT == MVT::i64) {
2863     if (Signed)
2864       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2865     else
2866       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2867   } else {
2868     if (Signed)
2869       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2870     else
2871       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2872   }
2873 
2874   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2875   updateValueMap(I, ResultReg);
2876   return true;
2877 }
2878 
2879 bool AArch64FastISel::fastLowerArguments() {
2880   if (!FuncInfo.CanLowerReturn)
2881     return false;
2882 
2883   const Function *F = FuncInfo.Fn;
2884   if (F->isVarArg())
2885     return false;
2886 
2887   CallingConv::ID CC = F->getCallingConv();
2888   if (CC != CallingConv::C && CC != CallingConv::Swift)
2889     return false;
2890 
2891   if (Subtarget->hasCustomCallingConv())
2892     return false;
2893 
2894   // Only handle simple cases of up to 8 GPR and FPR each.
2895   unsigned GPRCnt = 0;
2896   unsigned FPRCnt = 0;
2897   for (auto const &Arg : F->args()) {
2898     if (Arg.hasAttribute(Attribute::ByVal) ||
2899         Arg.hasAttribute(Attribute::InReg) ||
2900         Arg.hasAttribute(Attribute::StructRet) ||
2901         Arg.hasAttribute(Attribute::SwiftSelf) ||
2902         Arg.hasAttribute(Attribute::SwiftAsync) ||
2903         Arg.hasAttribute(Attribute::SwiftError) ||
2904         Arg.hasAttribute(Attribute::Nest))
2905       return false;
2906 
2907     Type *ArgTy = Arg.getType();
2908     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2909       return false;
2910 
2911     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2912     if (!ArgVT.isSimple())
2913       return false;
2914 
2915     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2916     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2917       return false;
2918 
2919     if (VT.isVector() &&
2920         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2921       return false;
2922 
2923     if (VT >= MVT::i1 && VT <= MVT::i64)
2924       ++GPRCnt;
2925     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2926              VT.is128BitVector())
2927       ++FPRCnt;
2928     else
2929       return false;
2930 
2931     if (GPRCnt > 8 || FPRCnt > 8)
2932       return false;
2933   }
2934 
2935   static const MCPhysReg Registers[6][8] = {
2936     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2937       AArch64::W5, AArch64::W6, AArch64::W7 },
2938     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2939       AArch64::X5, AArch64::X6, AArch64::X7 },
2940     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2941       AArch64::H5, AArch64::H6, AArch64::H7 },
2942     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2943       AArch64::S5, AArch64::S6, AArch64::S7 },
2944     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2945       AArch64::D5, AArch64::D6, AArch64::D7 },
2946     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2947       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2948   };
2949 
2950   unsigned GPRIdx = 0;
2951   unsigned FPRIdx = 0;
2952   for (auto const &Arg : F->args()) {
2953     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2954     unsigned SrcReg;
2955     const TargetRegisterClass *RC;
2956     if (VT >= MVT::i1 && VT <= MVT::i32) {
2957       SrcReg = Registers[0][GPRIdx++];
2958       RC = &AArch64::GPR32RegClass;
2959       VT = MVT::i32;
2960     } else if (VT == MVT::i64) {
2961       SrcReg = Registers[1][GPRIdx++];
2962       RC = &AArch64::GPR64RegClass;
2963     } else if (VT == MVT::f16) {
2964       SrcReg = Registers[2][FPRIdx++];
2965       RC = &AArch64::FPR16RegClass;
2966     } else if (VT ==  MVT::f32) {
2967       SrcReg = Registers[3][FPRIdx++];
2968       RC = &AArch64::FPR32RegClass;
2969     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2970       SrcReg = Registers[4][FPRIdx++];
2971       RC = &AArch64::FPR64RegClass;
2972     } else if (VT.is128BitVector()) {
2973       SrcReg = Registers[5][FPRIdx++];
2974       RC = &AArch64::FPR128RegClass;
2975     } else
2976       llvm_unreachable("Unexpected value type.");
2977 
2978     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2979     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2980     // Without this, EmitLiveInCopies may eliminate the livein if its only
2981     // use is a bitcast (which isn't turned into an instruction).
2982     unsigned ResultReg = createResultReg(RC);
2983     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2984             TII.get(TargetOpcode::COPY), ResultReg)
2985         .addReg(DstReg, getKillRegState(true));
2986     updateValueMap(&Arg, ResultReg);
2987   }
2988   return true;
2989 }
2990 
2991 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2992                                       SmallVectorImpl<MVT> &OutVTs,
2993                                       unsigned &NumBytes) {
2994   CallingConv::ID CC = CLI.CallConv;
2995   SmallVector<CCValAssign, 16> ArgLocs;
2996   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2997   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2998 
2999   // Get a count of how many bytes are to be pushed on the stack.
3000   NumBytes = CCInfo.getNextStackOffset();
3001 
3002   // Issue CALLSEQ_START
3003   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3004   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3005     .addImm(NumBytes).addImm(0);
3006 
3007   // Process the args.
3008   for (CCValAssign &VA : ArgLocs) {
3009     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3010     MVT ArgVT = OutVTs[VA.getValNo()];
3011 
3012     unsigned ArgReg = getRegForValue(ArgVal);
3013     if (!ArgReg)
3014       return false;
3015 
3016     // Handle arg promotion: SExt, ZExt, AExt.
3017     switch (VA.getLocInfo()) {
3018     case CCValAssign::Full:
3019       break;
3020     case CCValAssign::SExt: {
3021       MVT DestVT = VA.getLocVT();
3022       MVT SrcVT = ArgVT;
3023       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3024       if (!ArgReg)
3025         return false;
3026       break;
3027     }
3028     case CCValAssign::AExt:
3029     // Intentional fall-through.
3030     case CCValAssign::ZExt: {
3031       MVT DestVT = VA.getLocVT();
3032       MVT SrcVT = ArgVT;
3033       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3034       if (!ArgReg)
3035         return false;
3036       break;
3037     }
3038     default:
3039       llvm_unreachable("Unknown arg promotion!");
3040     }
3041 
3042     // Now copy/store arg to correct locations.
3043     if (VA.isRegLoc() && !VA.needsCustom()) {
3044       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3045               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3046       CLI.OutRegs.push_back(VA.getLocReg());
3047     } else if (VA.needsCustom()) {
3048       // FIXME: Handle custom args.
3049       return false;
3050     } else {
3051       assert(VA.isMemLoc() && "Assuming store on stack.");
3052 
3053       // Don't emit stores for undef values.
3054       if (isa<UndefValue>(ArgVal))
3055         continue;
3056 
3057       // Need to store on the stack.
3058       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3059 
3060       unsigned BEAlign = 0;
3061       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3062         BEAlign = 8 - ArgSize;
3063 
3064       Address Addr;
3065       Addr.setKind(Address::RegBase);
3066       Addr.setReg(AArch64::SP);
3067       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3068 
3069       Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3070       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3071           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3072           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3073 
3074       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3075         return false;
3076     }
3077   }
3078   return true;
3079 }
3080 
3081 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3082                                  unsigned NumBytes) {
3083   CallingConv::ID CC = CLI.CallConv;
3084 
3085   // Issue CALLSEQ_END
3086   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3087   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3088     .addImm(NumBytes).addImm(0);
3089 
3090   // Now the return value.
3091   if (RetVT != MVT::isVoid) {
3092     SmallVector<CCValAssign, 16> RVLocs;
3093     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3094     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3095 
3096     // Only handle a single return value.
3097     if (RVLocs.size() != 1)
3098       return false;
3099 
3100     // Copy all of the result registers out of their specified physreg.
3101     MVT CopyVT = RVLocs[0].getValVT();
3102 
3103     // TODO: Handle big-endian results
3104     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3105       return false;
3106 
3107     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3108     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3109             TII.get(TargetOpcode::COPY), ResultReg)
3110         .addReg(RVLocs[0].getLocReg());
3111     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3112 
3113     CLI.ResultReg = ResultReg;
3114     CLI.NumResultRegs = 1;
3115   }
3116 
3117   return true;
3118 }
3119 
3120 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3121   CallingConv::ID CC  = CLI.CallConv;
3122   bool IsTailCall     = CLI.IsTailCall;
3123   bool IsVarArg       = CLI.IsVarArg;
3124   const Value *Callee = CLI.Callee;
3125   MCSymbol *Symbol = CLI.Symbol;
3126 
3127   if (!Callee && !Symbol)
3128     return false;
3129 
3130   // Allow SelectionDAG isel to handle tail calls.
3131   if (IsTailCall)
3132     return false;
3133 
3134   // FIXME: we could and should support this, but for now correctness at -O0 is
3135   // more important.
3136   if (Subtarget->isTargetILP32())
3137     return false;
3138 
3139   CodeModel::Model CM = TM.getCodeModel();
3140   // Only support the small-addressing and large code models.
3141   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3142     return false;
3143 
3144   // FIXME: Add large code model support for ELF.
3145   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3146     return false;
3147 
3148   // Let SDISel handle vararg functions.
3149   if (IsVarArg)
3150     return false;
3151 
3152   // FIXME: Only handle *simple* calls for now.
3153   MVT RetVT;
3154   if (CLI.RetTy->isVoidTy())
3155     RetVT = MVT::isVoid;
3156   else if (!isTypeLegal(CLI.RetTy, RetVT))
3157     return false;
3158 
3159   for (auto Flag : CLI.OutFlags)
3160     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3161         Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3162       return false;
3163 
3164   // Set up the argument vectors.
3165   SmallVector<MVT, 16> OutVTs;
3166   OutVTs.reserve(CLI.OutVals.size());
3167 
3168   for (auto *Val : CLI.OutVals) {
3169     MVT VT;
3170     if (!isTypeLegal(Val->getType(), VT) &&
3171         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3172       return false;
3173 
3174     // We don't handle vector parameters yet.
3175     if (VT.isVector() || VT.getSizeInBits() > 64)
3176       return false;
3177 
3178     OutVTs.push_back(VT);
3179   }
3180 
3181   Address Addr;
3182   if (Callee && !computeCallAddress(Callee, Addr))
3183     return false;
3184 
3185   // The weak function target may be zero; in that case we must use indirect
3186   // addressing via a stub on windows as it may be out of range for a
3187   // PC-relative jump.
3188   if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3189       Addr.getGlobalValue()->hasExternalWeakLinkage())
3190     return false;
3191 
3192   // Handle the arguments now that we've gotten them.
3193   unsigned NumBytes;
3194   if (!processCallArgs(CLI, OutVTs, NumBytes))
3195     return false;
3196 
3197   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3198   if (RegInfo->isAnyArgRegReserved(*MF))
3199     RegInfo->emitReservedArgRegCallError(*MF);
3200 
3201   // Issue the call.
3202   MachineInstrBuilder MIB;
3203   if (Subtarget->useSmallAddressing()) {
3204     const MCInstrDesc &II =
3205         TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3206     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3207     if (Symbol)
3208       MIB.addSym(Symbol, 0);
3209     else if (Addr.getGlobalValue())
3210       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3211     else if (Addr.getReg()) {
3212       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3213       MIB.addReg(Reg);
3214     } else
3215       return false;
3216   } else {
3217     unsigned CallReg = 0;
3218     if (Symbol) {
3219       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3220       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3221               ADRPReg)
3222           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3223 
3224       CallReg = createResultReg(&AArch64::GPR64RegClass);
3225       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3226               TII.get(AArch64::LDRXui), CallReg)
3227           .addReg(ADRPReg)
3228           .addSym(Symbol,
3229                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3230     } else if (Addr.getGlobalValue())
3231       CallReg = materializeGV(Addr.getGlobalValue());
3232     else if (Addr.getReg())
3233       CallReg = Addr.getReg();
3234 
3235     if (!CallReg)
3236       return false;
3237 
3238     const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3239     CallReg = constrainOperandRegClass(II, CallReg, 0);
3240     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3241   }
3242 
3243   // Add implicit physical register uses to the call.
3244   for (auto Reg : CLI.OutRegs)
3245     MIB.addReg(Reg, RegState::Implicit);
3246 
3247   // Add a register mask with the call-preserved registers.
3248   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3249   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3250 
3251   CLI.Call = MIB;
3252 
3253   // Finish off the call including any return values.
3254   return finishCall(CLI, RetVT, NumBytes);
3255 }
3256 
3257 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3258   if (Alignment)
3259     return Len / Alignment <= 4;
3260   else
3261     return Len < 32;
3262 }
3263 
3264 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3265                                          uint64_t Len, unsigned Alignment) {
3266   // Make sure we don't bloat code by inlining very large memcpy's.
3267   if (!isMemCpySmall(Len, Alignment))
3268     return false;
3269 
3270   int64_t UnscaledOffset = 0;
3271   Address OrigDest = Dest;
3272   Address OrigSrc = Src;
3273 
3274   while (Len) {
3275     MVT VT;
3276     if (!Alignment || Alignment >= 8) {
3277       if (Len >= 8)
3278         VT = MVT::i64;
3279       else if (Len >= 4)
3280         VT = MVT::i32;
3281       else if (Len >= 2)
3282         VT = MVT::i16;
3283       else {
3284         VT = MVT::i8;
3285       }
3286     } else {
3287       // Bound based on alignment.
3288       if (Len >= 4 && Alignment == 4)
3289         VT = MVT::i32;
3290       else if (Len >= 2 && Alignment == 2)
3291         VT = MVT::i16;
3292       else {
3293         VT = MVT::i8;
3294       }
3295     }
3296 
3297     unsigned ResultReg = emitLoad(VT, VT, Src);
3298     if (!ResultReg)
3299       return false;
3300 
3301     if (!emitStore(VT, ResultReg, Dest))
3302       return false;
3303 
3304     int64_t Size = VT.getSizeInBits() / 8;
3305     Len -= Size;
3306     UnscaledOffset += Size;
3307 
3308     // We need to recompute the unscaled offset for each iteration.
3309     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3310     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3311   }
3312 
3313   return true;
3314 }
3315 
3316 /// Check if it is possible to fold the condition from the XALU intrinsic
3317 /// into the user. The condition code will only be updated on success.
3318 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3319                                         const Instruction *I,
3320                                         const Value *Cond) {
3321   if (!isa<ExtractValueInst>(Cond))
3322     return false;
3323 
3324   const auto *EV = cast<ExtractValueInst>(Cond);
3325   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3326     return false;
3327 
3328   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3329   MVT RetVT;
3330   const Function *Callee = II->getCalledFunction();
3331   Type *RetTy =
3332   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3333   if (!isTypeLegal(RetTy, RetVT))
3334     return false;
3335 
3336   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3337     return false;
3338 
3339   const Value *LHS = II->getArgOperand(0);
3340   const Value *RHS = II->getArgOperand(1);
3341 
3342   // Canonicalize immediate to the RHS.
3343   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3344     std::swap(LHS, RHS);
3345 
3346   // Simplify multiplies.
3347   Intrinsic::ID IID = II->getIntrinsicID();
3348   switch (IID) {
3349   default:
3350     break;
3351   case Intrinsic::smul_with_overflow:
3352     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3353       if (C->getValue() == 2)
3354         IID = Intrinsic::sadd_with_overflow;
3355     break;
3356   case Intrinsic::umul_with_overflow:
3357     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3358       if (C->getValue() == 2)
3359         IID = Intrinsic::uadd_with_overflow;
3360     break;
3361   }
3362 
3363   AArch64CC::CondCode TmpCC;
3364   switch (IID) {
3365   default:
3366     return false;
3367   case Intrinsic::sadd_with_overflow:
3368   case Intrinsic::ssub_with_overflow:
3369     TmpCC = AArch64CC::VS;
3370     break;
3371   case Intrinsic::uadd_with_overflow:
3372     TmpCC = AArch64CC::HS;
3373     break;
3374   case Intrinsic::usub_with_overflow:
3375     TmpCC = AArch64CC::LO;
3376     break;
3377   case Intrinsic::smul_with_overflow:
3378   case Intrinsic::umul_with_overflow:
3379     TmpCC = AArch64CC::NE;
3380     break;
3381   }
3382 
3383   // Check if both instructions are in the same basic block.
3384   if (!isValueAvailable(II))
3385     return false;
3386 
3387   // Make sure nothing is in the way
3388   BasicBlock::const_iterator Start(I);
3389   BasicBlock::const_iterator End(II);
3390   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3391     // We only expect extractvalue instructions between the intrinsic and the
3392     // instruction to be selected.
3393     if (!isa<ExtractValueInst>(Itr))
3394       return false;
3395 
3396     // Check that the extractvalue operand comes from the intrinsic.
3397     const auto *EVI = cast<ExtractValueInst>(Itr);
3398     if (EVI->getAggregateOperand() != II)
3399       return false;
3400   }
3401 
3402   CC = TmpCC;
3403   return true;
3404 }
3405 
3406 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3407   // FIXME: Handle more intrinsics.
3408   switch (II->getIntrinsicID()) {
3409   default: return false;
3410   case Intrinsic::frameaddress: {
3411     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3412     MFI.setFrameAddressIsTaken(true);
3413 
3414     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3415     Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3416     Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3417     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3418             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3419     // Recursively load frame address
3420     // ldr x0, [fp]
3421     // ldr x0, [x0]
3422     // ldr x0, [x0]
3423     // ...
3424     unsigned DestReg;
3425     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3426     while (Depth--) {
3427       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3428                                 SrcReg, 0);
3429       assert(DestReg && "Unexpected LDR instruction emission failure.");
3430       SrcReg = DestReg;
3431     }
3432 
3433     updateValueMap(II, SrcReg);
3434     return true;
3435   }
3436   case Intrinsic::sponentry: {
3437     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3438 
3439     // SP = FP + Fixed Object + 16
3440     int FI = MFI.CreateFixedObject(4, 0, false);
3441     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3442     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3443             TII.get(AArch64::ADDXri), ResultReg)
3444             .addFrameIndex(FI)
3445             .addImm(0)
3446             .addImm(0);
3447 
3448     updateValueMap(II, ResultReg);
3449     return true;
3450   }
3451   case Intrinsic::memcpy:
3452   case Intrinsic::memmove: {
3453     const auto *MTI = cast<MemTransferInst>(II);
3454     // Don't handle volatile.
3455     if (MTI->isVolatile())
3456       return false;
3457 
3458     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3459     // we would emit dead code because we don't currently handle memmoves.
3460     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3461     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3462       // Small memcpy's are common enough that we want to do them without a call
3463       // if possible.
3464       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3465       unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3466                                     MTI->getSourceAlignment());
3467       if (isMemCpySmall(Len, Alignment)) {
3468         Address Dest, Src;
3469         if (!computeAddress(MTI->getRawDest(), Dest) ||
3470             !computeAddress(MTI->getRawSource(), Src))
3471           return false;
3472         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3473           return true;
3474       }
3475     }
3476 
3477     if (!MTI->getLength()->getType()->isIntegerTy(64))
3478       return false;
3479 
3480     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3481       // Fast instruction selection doesn't support the special
3482       // address spaces.
3483       return false;
3484 
3485     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3486     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3487   }
3488   case Intrinsic::memset: {
3489     const MemSetInst *MSI = cast<MemSetInst>(II);
3490     // Don't handle volatile.
3491     if (MSI->isVolatile())
3492       return false;
3493 
3494     if (!MSI->getLength()->getType()->isIntegerTy(64))
3495       return false;
3496 
3497     if (MSI->getDestAddressSpace() > 255)
3498       // Fast instruction selection doesn't support the special
3499       // address spaces.
3500       return false;
3501 
3502     return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3503   }
3504   case Intrinsic::sin:
3505   case Intrinsic::cos:
3506   case Intrinsic::pow: {
3507     MVT RetVT;
3508     if (!isTypeLegal(II->getType(), RetVT))
3509       return false;
3510 
3511     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3512       return false;
3513 
3514     static const RTLIB::Libcall LibCallTable[3][2] = {
3515       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3516       { RTLIB::COS_F32, RTLIB::COS_F64 },
3517       { RTLIB::POW_F32, RTLIB::POW_F64 }
3518     };
3519     RTLIB::Libcall LC;
3520     bool Is64Bit = RetVT == MVT::f64;
3521     switch (II->getIntrinsicID()) {
3522     default:
3523       llvm_unreachable("Unexpected intrinsic.");
3524     case Intrinsic::sin:
3525       LC = LibCallTable[0][Is64Bit];
3526       break;
3527     case Intrinsic::cos:
3528       LC = LibCallTable[1][Is64Bit];
3529       break;
3530     case Intrinsic::pow:
3531       LC = LibCallTable[2][Is64Bit];
3532       break;
3533     }
3534 
3535     ArgListTy Args;
3536     Args.reserve(II->getNumArgOperands());
3537 
3538     // Populate the argument list.
3539     for (auto &Arg : II->arg_operands()) {
3540       ArgListEntry Entry;
3541       Entry.Val = Arg;
3542       Entry.Ty = Arg->getType();
3543       Args.push_back(Entry);
3544     }
3545 
3546     CallLoweringInfo CLI;
3547     MCContext &Ctx = MF->getContext();
3548     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3549                   TLI.getLibcallName(LC), std::move(Args));
3550     if (!lowerCallTo(CLI))
3551       return false;
3552     updateValueMap(II, CLI.ResultReg);
3553     return true;
3554   }
3555   case Intrinsic::fabs: {
3556     MVT VT;
3557     if (!isTypeLegal(II->getType(), VT))
3558       return false;
3559 
3560     unsigned Opc;
3561     switch (VT.SimpleTy) {
3562     default:
3563       return false;
3564     case MVT::f32:
3565       Opc = AArch64::FABSSr;
3566       break;
3567     case MVT::f64:
3568       Opc = AArch64::FABSDr;
3569       break;
3570     }
3571     unsigned SrcReg = getRegForValue(II->getOperand(0));
3572     if (!SrcReg)
3573       return false;
3574     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3575     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3576       .addReg(SrcReg);
3577     updateValueMap(II, ResultReg);
3578     return true;
3579   }
3580   case Intrinsic::trap:
3581     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3582         .addImm(1);
3583     return true;
3584   case Intrinsic::debugtrap:
3585     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3586         .addImm(0xF000);
3587     return true;
3588 
3589   case Intrinsic::sqrt: {
3590     Type *RetTy = II->getCalledFunction()->getReturnType();
3591 
3592     MVT VT;
3593     if (!isTypeLegal(RetTy, VT))
3594       return false;
3595 
3596     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3597     if (!Op0Reg)
3598       return false;
3599 
3600     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3601     if (!ResultReg)
3602       return false;
3603 
3604     updateValueMap(II, ResultReg);
3605     return true;
3606   }
3607   case Intrinsic::sadd_with_overflow:
3608   case Intrinsic::uadd_with_overflow:
3609   case Intrinsic::ssub_with_overflow:
3610   case Intrinsic::usub_with_overflow:
3611   case Intrinsic::smul_with_overflow:
3612   case Intrinsic::umul_with_overflow: {
3613     // This implements the basic lowering of the xalu with overflow intrinsics.
3614     const Function *Callee = II->getCalledFunction();
3615     auto *Ty = cast<StructType>(Callee->getReturnType());
3616     Type *RetTy = Ty->getTypeAtIndex(0U);
3617 
3618     MVT VT;
3619     if (!isTypeLegal(RetTy, VT))
3620       return false;
3621 
3622     if (VT != MVT::i32 && VT != MVT::i64)
3623       return false;
3624 
3625     const Value *LHS = II->getArgOperand(0);
3626     const Value *RHS = II->getArgOperand(1);
3627     // Canonicalize immediate to the RHS.
3628     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3629       std::swap(LHS, RHS);
3630 
3631     // Simplify multiplies.
3632     Intrinsic::ID IID = II->getIntrinsicID();
3633     switch (IID) {
3634     default:
3635       break;
3636     case Intrinsic::smul_with_overflow:
3637       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3638         if (C->getValue() == 2) {
3639           IID = Intrinsic::sadd_with_overflow;
3640           RHS = LHS;
3641         }
3642       break;
3643     case Intrinsic::umul_with_overflow:
3644       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3645         if (C->getValue() == 2) {
3646           IID = Intrinsic::uadd_with_overflow;
3647           RHS = LHS;
3648         }
3649       break;
3650     }
3651 
3652     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3653     AArch64CC::CondCode CC = AArch64CC::Invalid;
3654     switch (IID) {
3655     default: llvm_unreachable("Unexpected intrinsic!");
3656     case Intrinsic::sadd_with_overflow:
3657       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3658       CC = AArch64CC::VS;
3659       break;
3660     case Intrinsic::uadd_with_overflow:
3661       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3662       CC = AArch64CC::HS;
3663       break;
3664     case Intrinsic::ssub_with_overflow:
3665       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3666       CC = AArch64CC::VS;
3667       break;
3668     case Intrinsic::usub_with_overflow:
3669       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3670       CC = AArch64CC::LO;
3671       break;
3672     case Intrinsic::smul_with_overflow: {
3673       CC = AArch64CC::NE;
3674       unsigned LHSReg = getRegForValue(LHS);
3675       if (!LHSReg)
3676         return false;
3677 
3678       unsigned RHSReg = getRegForValue(RHS);
3679       if (!RHSReg)
3680         return false;
3681 
3682       if (VT == MVT::i32) {
3683         MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3684         unsigned MulSubReg =
3685             fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3686         // cmp xreg, wreg, sxtw
3687         emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3688                       AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3689                       /*WantResult=*/false);
3690         MulReg = MulSubReg;
3691       } else {
3692         assert(VT == MVT::i64 && "Unexpected value type.");
3693         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3694         // reused in the next instruction.
3695         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3696         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3697         emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3698                     /*WantResult=*/false);
3699       }
3700       break;
3701     }
3702     case Intrinsic::umul_with_overflow: {
3703       CC = AArch64CC::NE;
3704       unsigned LHSReg = getRegForValue(LHS);
3705       if (!LHSReg)
3706         return false;
3707 
3708       unsigned RHSReg = getRegForValue(RHS);
3709       if (!RHSReg)
3710         return false;
3711 
3712       if (VT == MVT::i32) {
3713         MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3714         // tst xreg, #0xffffffff00000000
3715         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3716                 TII.get(AArch64::ANDSXri), AArch64::XZR)
3717             .addReg(MulReg)
3718             .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3719         MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3720       } else {
3721         assert(VT == MVT::i64 && "Unexpected value type.");
3722         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3723         // reused in the next instruction.
3724         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3725         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3726         emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3727       }
3728       break;
3729     }
3730     }
3731 
3732     if (MulReg) {
3733       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3734       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3735               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3736     }
3737 
3738     if (!ResultReg1)
3739       return false;
3740 
3741     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3742                                   AArch64::WZR, AArch64::WZR,
3743                                   getInvertedCondCode(CC));
3744     (void)ResultReg2;
3745     assert((ResultReg1 + 1) == ResultReg2 &&
3746            "Nonconsecutive result registers.");
3747     updateValueMap(II, ResultReg1, 2);
3748     return true;
3749   }
3750   }
3751   return false;
3752 }
3753 
3754 bool AArch64FastISel::selectRet(const Instruction *I) {
3755   const ReturnInst *Ret = cast<ReturnInst>(I);
3756   const Function &F = *I->getParent()->getParent();
3757 
3758   if (!FuncInfo.CanLowerReturn)
3759     return false;
3760 
3761   if (F.isVarArg())
3762     return false;
3763 
3764   if (TLI.supportSwiftError() &&
3765       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3766     return false;
3767 
3768   if (TLI.supportSplitCSR(FuncInfo.MF))
3769     return false;
3770 
3771   // Build a list of return value registers.
3772   SmallVector<unsigned, 4> RetRegs;
3773 
3774   if (Ret->getNumOperands() > 0) {
3775     CallingConv::ID CC = F.getCallingConv();
3776     SmallVector<ISD::OutputArg, 4> Outs;
3777     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3778 
3779     // Analyze operands of the call, assigning locations to each operand.
3780     SmallVector<CCValAssign, 16> ValLocs;
3781     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3782     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3783                                                      : RetCC_AArch64_AAPCS;
3784     CCInfo.AnalyzeReturn(Outs, RetCC);
3785 
3786     // Only handle a single return value for now.
3787     if (ValLocs.size() != 1)
3788       return false;
3789 
3790     CCValAssign &VA = ValLocs[0];
3791     const Value *RV = Ret->getOperand(0);
3792 
3793     // Don't bother handling odd stuff for now.
3794     if ((VA.getLocInfo() != CCValAssign::Full) &&
3795         (VA.getLocInfo() != CCValAssign::BCvt))
3796       return false;
3797 
3798     // Only handle register returns for now.
3799     if (!VA.isRegLoc())
3800       return false;
3801 
3802     unsigned Reg = getRegForValue(RV);
3803     if (Reg == 0)
3804       return false;
3805 
3806     unsigned SrcReg = Reg + VA.getValNo();
3807     Register DestReg = VA.getLocReg();
3808     // Avoid a cross-class copy. This is very unlikely.
3809     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3810       return false;
3811 
3812     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3813     if (!RVEVT.isSimple())
3814       return false;
3815 
3816     // Vectors (of > 1 lane) in big endian need tricky handling.
3817     if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3818         !Subtarget->isLittleEndian())
3819       return false;
3820 
3821     MVT RVVT = RVEVT.getSimpleVT();
3822     if (RVVT == MVT::f128)
3823       return false;
3824 
3825     MVT DestVT = VA.getValVT();
3826     // Special handling for extended integers.
3827     if (RVVT != DestVT) {
3828       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3829         return false;
3830 
3831       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3832         return false;
3833 
3834       bool IsZExt = Outs[0].Flags.isZExt();
3835       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3836       if (SrcReg == 0)
3837         return false;
3838     }
3839 
3840     // "Callee" (i.e. value producer) zero extends pointers at function
3841     // boundary.
3842     if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3843       SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3844 
3845     // Make the copy.
3846     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3847             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3848 
3849     // Add register to return instruction.
3850     RetRegs.push_back(VA.getLocReg());
3851   }
3852 
3853   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3854                                     TII.get(AArch64::RET_ReallyLR));
3855   for (unsigned RetReg : RetRegs)
3856     MIB.addReg(RetReg, RegState::Implicit);
3857   return true;
3858 }
3859 
3860 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3861   Type *DestTy = I->getType();
3862   Value *Op = I->getOperand(0);
3863   Type *SrcTy = Op->getType();
3864 
3865   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3866   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3867   if (!SrcEVT.isSimple())
3868     return false;
3869   if (!DestEVT.isSimple())
3870     return false;
3871 
3872   MVT SrcVT = SrcEVT.getSimpleVT();
3873   MVT DestVT = DestEVT.getSimpleVT();
3874 
3875   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3876       SrcVT != MVT::i8)
3877     return false;
3878   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3879       DestVT != MVT::i1)
3880     return false;
3881 
3882   unsigned SrcReg = getRegForValue(Op);
3883   if (!SrcReg)
3884     return false;
3885 
3886   // If we're truncating from i64 to a smaller non-legal type then generate an
3887   // AND. Otherwise, we know the high bits are undefined and a truncate only
3888   // generate a COPY. We cannot mark the source register also as result
3889   // register, because this can incorrectly transfer the kill flag onto the
3890   // source register.
3891   unsigned ResultReg;
3892   if (SrcVT == MVT::i64) {
3893     uint64_t Mask = 0;
3894     switch (DestVT.SimpleTy) {
3895     default:
3896       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3897       return false;
3898     case MVT::i1:
3899       Mask = 0x1;
3900       break;
3901     case MVT::i8:
3902       Mask = 0xff;
3903       break;
3904     case MVT::i16:
3905       Mask = 0xffff;
3906       break;
3907     }
3908     // Issue an extract_subreg to get the lower 32-bits.
3909     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3910                                                 AArch64::sub_32);
3911     // Create the AND instruction which performs the actual truncation.
3912     ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3913     assert(ResultReg && "Unexpected AND instruction emission failure.");
3914   } else {
3915     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3916     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3917             TII.get(TargetOpcode::COPY), ResultReg)
3918         .addReg(SrcReg);
3919   }
3920 
3921   updateValueMap(I, ResultReg);
3922   return true;
3923 }
3924 
3925 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3926   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3927           DestVT == MVT::i64) &&
3928          "Unexpected value type.");
3929   // Handle i8 and i16 as i32.
3930   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3931     DestVT = MVT::i32;
3932 
3933   if (IsZExt) {
3934     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3935     assert(ResultReg && "Unexpected AND instruction emission failure.");
3936     if (DestVT == MVT::i64) {
3937       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3938       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3939       Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3940       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3941               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3942           .addImm(0)
3943           .addReg(ResultReg)
3944           .addImm(AArch64::sub_32);
3945       ResultReg = Reg64;
3946     }
3947     return ResultReg;
3948   } else {
3949     if (DestVT == MVT::i64) {
3950       // FIXME: We're SExt i1 to i64.
3951       return 0;
3952     }
3953     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3954                             0, 0);
3955   }
3956 }
3957 
3958 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3959   unsigned Opc, ZReg;
3960   switch (RetVT.SimpleTy) {
3961   default: return 0;
3962   case MVT::i8:
3963   case MVT::i16:
3964   case MVT::i32:
3965     RetVT = MVT::i32;
3966     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3967   case MVT::i64:
3968     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3969   }
3970 
3971   const TargetRegisterClass *RC =
3972       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3973   return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
3974 }
3975 
3976 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3977   if (RetVT != MVT::i64)
3978     return 0;
3979 
3980   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3981                           Op0, Op1, AArch64::XZR);
3982 }
3983 
3984 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3985   if (RetVT != MVT::i64)
3986     return 0;
3987 
3988   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3989                           Op0, Op1, AArch64::XZR);
3990 }
3991 
3992 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
3993                                      unsigned Op1Reg) {
3994   unsigned Opc = 0;
3995   bool NeedTrunc = false;
3996   uint64_t Mask = 0;
3997   switch (RetVT.SimpleTy) {
3998   default: return 0;
3999   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4000   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4001   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4002   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4003   }
4004 
4005   const TargetRegisterClass *RC =
4006       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4007   if (NeedTrunc)
4008     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4009 
4010   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4011   if (NeedTrunc)
4012     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4013   return ResultReg;
4014 }
4015 
4016 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4017                                      uint64_t Shift, bool IsZExt) {
4018   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4019          "Unexpected source/return type pair.");
4020   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4021           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4022          "Unexpected source value type.");
4023   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4024           RetVT == MVT::i64) && "Unexpected return value type.");
4025 
4026   bool Is64Bit = (RetVT == MVT::i64);
4027   unsigned RegSize = Is64Bit ? 64 : 32;
4028   unsigned DstBits = RetVT.getSizeInBits();
4029   unsigned SrcBits = SrcVT.getSizeInBits();
4030   const TargetRegisterClass *RC =
4031       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4032 
4033   // Just emit a copy for "zero" shifts.
4034   if (Shift == 0) {
4035     if (RetVT == SrcVT) {
4036       unsigned ResultReg = createResultReg(RC);
4037       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4038               TII.get(TargetOpcode::COPY), ResultReg)
4039           .addReg(Op0);
4040       return ResultReg;
4041     } else
4042       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4043   }
4044 
4045   // Don't deal with undefined shifts.
4046   if (Shift >= DstBits)
4047     return 0;
4048 
4049   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4050   // {S|U}BFM Wd, Wn, #r, #s
4051   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4052 
4053   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4054   // %2 = shl i16 %1, 4
4055   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4056   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4057   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4058   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4059 
4060   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4061   // %2 = shl i16 %1, 8
4062   // Wd<32+7-24,32-24> = Wn<7:0>
4063   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4064   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4065   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4066 
4067   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4068   // %2 = shl i16 %1, 12
4069   // Wd<32+3-20,32-20> = Wn<3:0>
4070   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4071   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4072   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4073 
4074   unsigned ImmR = RegSize - Shift;
4075   // Limit the width to the length of the source type.
4076   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4077   static const unsigned OpcTable[2][2] = {
4078     {AArch64::SBFMWri, AArch64::SBFMXri},
4079     {AArch64::UBFMWri, AArch64::UBFMXri}
4080   };
4081   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4082   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4083     Register TmpReg = MRI.createVirtualRegister(RC);
4084     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4085             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4086         .addImm(0)
4087         .addReg(Op0)
4088         .addImm(AArch64::sub_32);
4089     Op0 = TmpReg;
4090   }
4091   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4092 }
4093 
4094 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4095                                      unsigned Op1Reg) {
4096   unsigned Opc = 0;
4097   bool NeedTrunc = false;
4098   uint64_t Mask = 0;
4099   switch (RetVT.SimpleTy) {
4100   default: return 0;
4101   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4102   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4103   case MVT::i32: Opc = AArch64::LSRVWr; break;
4104   case MVT::i64: Opc = AArch64::LSRVXr; break;
4105   }
4106 
4107   const TargetRegisterClass *RC =
4108       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4109   if (NeedTrunc) {
4110     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4111     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4112   }
4113   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4114   if (NeedTrunc)
4115     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4116   return ResultReg;
4117 }
4118 
4119 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4120                                      uint64_t Shift, bool IsZExt) {
4121   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4122          "Unexpected source/return type pair.");
4123   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4124           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4125          "Unexpected source value type.");
4126   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4127           RetVT == MVT::i64) && "Unexpected return value type.");
4128 
4129   bool Is64Bit = (RetVT == MVT::i64);
4130   unsigned RegSize = Is64Bit ? 64 : 32;
4131   unsigned DstBits = RetVT.getSizeInBits();
4132   unsigned SrcBits = SrcVT.getSizeInBits();
4133   const TargetRegisterClass *RC =
4134       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4135 
4136   // Just emit a copy for "zero" shifts.
4137   if (Shift == 0) {
4138     if (RetVT == SrcVT) {
4139       unsigned ResultReg = createResultReg(RC);
4140       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4141               TII.get(TargetOpcode::COPY), ResultReg)
4142       .addReg(Op0);
4143       return ResultReg;
4144     } else
4145       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4146   }
4147 
4148   // Don't deal with undefined shifts.
4149   if (Shift >= DstBits)
4150     return 0;
4151 
4152   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4153   // {S|U}BFM Wd, Wn, #r, #s
4154   // Wd<s-r:0> = Wn<s:r> when r <= s
4155 
4156   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4157   // %2 = lshr i16 %1, 4
4158   // Wd<7-4:0> = Wn<7:4>
4159   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4160   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4161   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4162 
4163   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4164   // %2 = lshr i16 %1, 8
4165   // Wd<7-7,0> = Wn<7:7>
4166   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4167   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4168   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4169 
4170   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4171   // %2 = lshr i16 %1, 12
4172   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4173   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4174   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4175   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4176 
4177   if (Shift >= SrcBits && IsZExt)
4178     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4179 
4180   // It is not possible to fold a sign-extend into the LShr instruction. In this
4181   // case emit a sign-extend.
4182   if (!IsZExt) {
4183     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4184     if (!Op0)
4185       return 0;
4186     SrcVT = RetVT;
4187     SrcBits = SrcVT.getSizeInBits();
4188     IsZExt = true;
4189   }
4190 
4191   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4192   unsigned ImmS = SrcBits - 1;
4193   static const unsigned OpcTable[2][2] = {
4194     {AArch64::SBFMWri, AArch64::SBFMXri},
4195     {AArch64::UBFMWri, AArch64::UBFMXri}
4196   };
4197   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4198   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4199     Register TmpReg = MRI.createVirtualRegister(RC);
4200     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4201             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4202         .addImm(0)
4203         .addReg(Op0)
4204         .addImm(AArch64::sub_32);
4205     Op0 = TmpReg;
4206   }
4207   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4208 }
4209 
4210 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4211                                      unsigned Op1Reg) {
4212   unsigned Opc = 0;
4213   bool NeedTrunc = false;
4214   uint64_t Mask = 0;
4215   switch (RetVT.SimpleTy) {
4216   default: return 0;
4217   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4218   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4219   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4220   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4221   }
4222 
4223   const TargetRegisterClass *RC =
4224       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4225   if (NeedTrunc) {
4226     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4227     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4228   }
4229   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4230   if (NeedTrunc)
4231     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4232   return ResultReg;
4233 }
4234 
4235 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4236                                      uint64_t Shift, bool IsZExt) {
4237   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4238          "Unexpected source/return type pair.");
4239   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4240           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4241          "Unexpected source value type.");
4242   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4243           RetVT == MVT::i64) && "Unexpected return value type.");
4244 
4245   bool Is64Bit = (RetVT == MVT::i64);
4246   unsigned RegSize = Is64Bit ? 64 : 32;
4247   unsigned DstBits = RetVT.getSizeInBits();
4248   unsigned SrcBits = SrcVT.getSizeInBits();
4249   const TargetRegisterClass *RC =
4250       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4251 
4252   // Just emit a copy for "zero" shifts.
4253   if (Shift == 0) {
4254     if (RetVT == SrcVT) {
4255       unsigned ResultReg = createResultReg(RC);
4256       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4257               TII.get(TargetOpcode::COPY), ResultReg)
4258       .addReg(Op0);
4259       return ResultReg;
4260     } else
4261       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4262   }
4263 
4264   // Don't deal with undefined shifts.
4265   if (Shift >= DstBits)
4266     return 0;
4267 
4268   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4269   // {S|U}BFM Wd, Wn, #r, #s
4270   // Wd<s-r:0> = Wn<s:r> when r <= s
4271 
4272   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4273   // %2 = ashr i16 %1, 4
4274   // Wd<7-4:0> = Wn<7:4>
4275   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4276   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4277   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4278 
4279   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4280   // %2 = ashr i16 %1, 8
4281   // Wd<7-7,0> = Wn<7:7>
4282   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4283   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4284   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4285 
4286   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4287   // %2 = ashr i16 %1, 12
4288   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4289   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4290   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4291   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4292 
4293   if (Shift >= SrcBits && IsZExt)
4294     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4295 
4296   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4297   unsigned ImmS = SrcBits - 1;
4298   static const unsigned OpcTable[2][2] = {
4299     {AArch64::SBFMWri, AArch64::SBFMXri},
4300     {AArch64::UBFMWri, AArch64::UBFMXri}
4301   };
4302   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4303   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4304     Register TmpReg = MRI.createVirtualRegister(RC);
4305     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4306             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4307         .addImm(0)
4308         .addReg(Op0)
4309         .addImm(AArch64::sub_32);
4310     Op0 = TmpReg;
4311   }
4312   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4313 }
4314 
4315 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4316                                      bool IsZExt) {
4317   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4318 
4319   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4320   // DestVT are odd things, so test to make sure that they are both types we can
4321   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4322   // bail out to SelectionDAG.
4323   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4324        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4325       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4326        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4327     return 0;
4328 
4329   unsigned Opc;
4330   unsigned Imm = 0;
4331 
4332   switch (SrcVT.SimpleTy) {
4333   default:
4334     return 0;
4335   case MVT::i1:
4336     return emiti1Ext(SrcReg, DestVT, IsZExt);
4337   case MVT::i8:
4338     if (DestVT == MVT::i64)
4339       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4340     else
4341       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4342     Imm = 7;
4343     break;
4344   case MVT::i16:
4345     if (DestVT == MVT::i64)
4346       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4347     else
4348       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4349     Imm = 15;
4350     break;
4351   case MVT::i32:
4352     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4353     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4354     Imm = 31;
4355     break;
4356   }
4357 
4358   // Handle i8 and i16 as i32.
4359   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4360     DestVT = MVT::i32;
4361   else if (DestVT == MVT::i64) {
4362     Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4363     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4364             TII.get(AArch64::SUBREG_TO_REG), Src64)
4365         .addImm(0)
4366         .addReg(SrcReg)
4367         .addImm(AArch64::sub_32);
4368     SrcReg = Src64;
4369   }
4370 
4371   const TargetRegisterClass *RC =
4372       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4373   return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4374 }
4375 
4376 static bool isZExtLoad(const MachineInstr *LI) {
4377   switch (LI->getOpcode()) {
4378   default:
4379     return false;
4380   case AArch64::LDURBBi:
4381   case AArch64::LDURHHi:
4382   case AArch64::LDURWi:
4383   case AArch64::LDRBBui:
4384   case AArch64::LDRHHui:
4385   case AArch64::LDRWui:
4386   case AArch64::LDRBBroX:
4387   case AArch64::LDRHHroX:
4388   case AArch64::LDRWroX:
4389   case AArch64::LDRBBroW:
4390   case AArch64::LDRHHroW:
4391   case AArch64::LDRWroW:
4392     return true;
4393   }
4394 }
4395 
4396 static bool isSExtLoad(const MachineInstr *LI) {
4397   switch (LI->getOpcode()) {
4398   default:
4399     return false;
4400   case AArch64::LDURSBWi:
4401   case AArch64::LDURSHWi:
4402   case AArch64::LDURSBXi:
4403   case AArch64::LDURSHXi:
4404   case AArch64::LDURSWi:
4405   case AArch64::LDRSBWui:
4406   case AArch64::LDRSHWui:
4407   case AArch64::LDRSBXui:
4408   case AArch64::LDRSHXui:
4409   case AArch64::LDRSWui:
4410   case AArch64::LDRSBWroX:
4411   case AArch64::LDRSHWroX:
4412   case AArch64::LDRSBXroX:
4413   case AArch64::LDRSHXroX:
4414   case AArch64::LDRSWroX:
4415   case AArch64::LDRSBWroW:
4416   case AArch64::LDRSHWroW:
4417   case AArch64::LDRSBXroW:
4418   case AArch64::LDRSHXroW:
4419   case AArch64::LDRSWroW:
4420     return true;
4421   }
4422 }
4423 
4424 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4425                                          MVT SrcVT) {
4426   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4427   if (!LI || !LI->hasOneUse())
4428     return false;
4429 
4430   // Check if the load instruction has already been selected.
4431   unsigned Reg = lookUpRegForValue(LI);
4432   if (!Reg)
4433     return false;
4434 
4435   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4436   if (!MI)
4437     return false;
4438 
4439   // Check if the correct load instruction has been emitted - SelectionDAG might
4440   // have emitted a zero-extending load, but we need a sign-extending load.
4441   bool IsZExt = isa<ZExtInst>(I);
4442   const auto *LoadMI = MI;
4443   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4444       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4445     Register LoadReg = MI->getOperand(1).getReg();
4446     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4447     assert(LoadMI && "Expected valid instruction");
4448   }
4449   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4450     return false;
4451 
4452   // Nothing to be done.
4453   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4454     updateValueMap(I, Reg);
4455     return true;
4456   }
4457 
4458   if (IsZExt) {
4459     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4460     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4461             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4462         .addImm(0)
4463         .addReg(Reg, getKillRegState(true))
4464         .addImm(AArch64::sub_32);
4465     Reg = Reg64;
4466   } else {
4467     assert((MI->getOpcode() == TargetOpcode::COPY &&
4468             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4469            "Expected copy instruction");
4470     Reg = MI->getOperand(1).getReg();
4471     MachineBasicBlock::iterator I(MI);
4472     removeDeadCode(I, std::next(I));
4473   }
4474   updateValueMap(I, Reg);
4475   return true;
4476 }
4477 
4478 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4479   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4480          "Unexpected integer extend instruction.");
4481   MVT RetVT;
4482   MVT SrcVT;
4483   if (!isTypeSupported(I->getType(), RetVT))
4484     return false;
4485 
4486   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4487     return false;
4488 
4489   // Try to optimize already sign-/zero-extended values from load instructions.
4490   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4491     return true;
4492 
4493   unsigned SrcReg = getRegForValue(I->getOperand(0));
4494   if (!SrcReg)
4495     return false;
4496 
4497   // Try to optimize already sign-/zero-extended values from function arguments.
4498   bool IsZExt = isa<ZExtInst>(I);
4499   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4500     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4501       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4502         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4503         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4504                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4505             .addImm(0)
4506             .addReg(SrcReg)
4507             .addImm(AArch64::sub_32);
4508         SrcReg = ResultReg;
4509       }
4510 
4511       updateValueMap(I, SrcReg);
4512       return true;
4513     }
4514   }
4515 
4516   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4517   if (!ResultReg)
4518     return false;
4519 
4520   updateValueMap(I, ResultReg);
4521   return true;
4522 }
4523 
4524 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4525   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4526   if (!DestEVT.isSimple())
4527     return false;
4528 
4529   MVT DestVT = DestEVT.getSimpleVT();
4530   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4531     return false;
4532 
4533   unsigned DivOpc;
4534   bool Is64bit = (DestVT == MVT::i64);
4535   switch (ISDOpcode) {
4536   default:
4537     return false;
4538   case ISD::SREM:
4539     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4540     break;
4541   case ISD::UREM:
4542     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4543     break;
4544   }
4545   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4546   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4547   if (!Src0Reg)
4548     return false;
4549 
4550   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4551   if (!Src1Reg)
4552     return false;
4553 
4554   const TargetRegisterClass *RC =
4555       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4556   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4557   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4558   // The remainder is computed as numerator - (quotient * denominator) using the
4559   // MSUB instruction.
4560   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4561   updateValueMap(I, ResultReg);
4562   return true;
4563 }
4564 
4565 bool AArch64FastISel::selectMul(const Instruction *I) {
4566   MVT VT;
4567   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4568     return false;
4569 
4570   if (VT.isVector())
4571     return selectBinaryOp(I, ISD::MUL);
4572 
4573   const Value *Src0 = I->getOperand(0);
4574   const Value *Src1 = I->getOperand(1);
4575   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4576     if (C->getValue().isPowerOf2())
4577       std::swap(Src0, Src1);
4578 
4579   // Try to simplify to a shift instruction.
4580   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4581     if (C->getValue().isPowerOf2()) {
4582       uint64_t ShiftVal = C->getValue().logBase2();
4583       MVT SrcVT = VT;
4584       bool IsZExt = true;
4585       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4586         if (!isIntExtFree(ZExt)) {
4587           MVT VT;
4588           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4589             SrcVT = VT;
4590             IsZExt = true;
4591             Src0 = ZExt->getOperand(0);
4592           }
4593         }
4594       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4595         if (!isIntExtFree(SExt)) {
4596           MVT VT;
4597           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4598             SrcVT = VT;
4599             IsZExt = false;
4600             Src0 = SExt->getOperand(0);
4601           }
4602         }
4603       }
4604 
4605       unsigned Src0Reg = getRegForValue(Src0);
4606       if (!Src0Reg)
4607         return false;
4608 
4609       unsigned ResultReg =
4610           emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4611 
4612       if (ResultReg) {
4613         updateValueMap(I, ResultReg);
4614         return true;
4615       }
4616     }
4617 
4618   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4619   if (!Src0Reg)
4620     return false;
4621 
4622   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4623   if (!Src1Reg)
4624     return false;
4625 
4626   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4627 
4628   if (!ResultReg)
4629     return false;
4630 
4631   updateValueMap(I, ResultReg);
4632   return true;
4633 }
4634 
4635 bool AArch64FastISel::selectShift(const Instruction *I) {
4636   MVT RetVT;
4637   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4638     return false;
4639 
4640   if (RetVT.isVector())
4641     return selectOperator(I, I->getOpcode());
4642 
4643   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4644     unsigned ResultReg = 0;
4645     uint64_t ShiftVal = C->getZExtValue();
4646     MVT SrcVT = RetVT;
4647     bool IsZExt = I->getOpcode() != Instruction::AShr;
4648     const Value *Op0 = I->getOperand(0);
4649     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4650       if (!isIntExtFree(ZExt)) {
4651         MVT TmpVT;
4652         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4653           SrcVT = TmpVT;
4654           IsZExt = true;
4655           Op0 = ZExt->getOperand(0);
4656         }
4657       }
4658     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4659       if (!isIntExtFree(SExt)) {
4660         MVT TmpVT;
4661         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4662           SrcVT = TmpVT;
4663           IsZExt = false;
4664           Op0 = SExt->getOperand(0);
4665         }
4666       }
4667     }
4668 
4669     unsigned Op0Reg = getRegForValue(Op0);
4670     if (!Op0Reg)
4671       return false;
4672 
4673     switch (I->getOpcode()) {
4674     default: llvm_unreachable("Unexpected instruction.");
4675     case Instruction::Shl:
4676       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4677       break;
4678     case Instruction::AShr:
4679       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4680       break;
4681     case Instruction::LShr:
4682       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4683       break;
4684     }
4685     if (!ResultReg)
4686       return false;
4687 
4688     updateValueMap(I, ResultReg);
4689     return true;
4690   }
4691 
4692   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4693   if (!Op0Reg)
4694     return false;
4695 
4696   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4697   if (!Op1Reg)
4698     return false;
4699 
4700   unsigned ResultReg = 0;
4701   switch (I->getOpcode()) {
4702   default: llvm_unreachable("Unexpected instruction.");
4703   case Instruction::Shl:
4704     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4705     break;
4706   case Instruction::AShr:
4707     ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4708     break;
4709   case Instruction::LShr:
4710     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4711     break;
4712   }
4713 
4714   if (!ResultReg)
4715     return false;
4716 
4717   updateValueMap(I, ResultReg);
4718   return true;
4719 }
4720 
4721 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4722   MVT RetVT, SrcVT;
4723 
4724   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4725     return false;
4726   if (!isTypeLegal(I->getType(), RetVT))
4727     return false;
4728 
4729   unsigned Opc;
4730   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4731     Opc = AArch64::FMOVWSr;
4732   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4733     Opc = AArch64::FMOVXDr;
4734   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4735     Opc = AArch64::FMOVSWr;
4736   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4737     Opc = AArch64::FMOVDXr;
4738   else
4739     return false;
4740 
4741   const TargetRegisterClass *RC = nullptr;
4742   switch (RetVT.SimpleTy) {
4743   default: llvm_unreachable("Unexpected value type.");
4744   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4745   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4746   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4747   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4748   }
4749   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4750   if (!Op0Reg)
4751     return false;
4752 
4753   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4754   if (!ResultReg)
4755     return false;
4756 
4757   updateValueMap(I, ResultReg);
4758   return true;
4759 }
4760 
4761 bool AArch64FastISel::selectFRem(const Instruction *I) {
4762   MVT RetVT;
4763   if (!isTypeLegal(I->getType(), RetVT))
4764     return false;
4765 
4766   RTLIB::Libcall LC;
4767   switch (RetVT.SimpleTy) {
4768   default:
4769     return false;
4770   case MVT::f32:
4771     LC = RTLIB::REM_F32;
4772     break;
4773   case MVT::f64:
4774     LC = RTLIB::REM_F64;
4775     break;
4776   }
4777 
4778   ArgListTy Args;
4779   Args.reserve(I->getNumOperands());
4780 
4781   // Populate the argument list.
4782   for (auto &Arg : I->operands()) {
4783     ArgListEntry Entry;
4784     Entry.Val = Arg;
4785     Entry.Ty = Arg->getType();
4786     Args.push_back(Entry);
4787   }
4788 
4789   CallLoweringInfo CLI;
4790   MCContext &Ctx = MF->getContext();
4791   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4792                 TLI.getLibcallName(LC), std::move(Args));
4793   if (!lowerCallTo(CLI))
4794     return false;
4795   updateValueMap(I, CLI.ResultReg);
4796   return true;
4797 }
4798 
4799 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4800   MVT VT;
4801   if (!isTypeLegal(I->getType(), VT))
4802     return false;
4803 
4804   if (!isa<ConstantInt>(I->getOperand(1)))
4805     return selectBinaryOp(I, ISD::SDIV);
4806 
4807   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4808   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4809       !(C.isPowerOf2() || (-C).isPowerOf2()))
4810     return selectBinaryOp(I, ISD::SDIV);
4811 
4812   unsigned Lg2 = C.countTrailingZeros();
4813   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4814   if (!Src0Reg)
4815     return false;
4816 
4817   if (cast<BinaryOperator>(I)->isExact()) {
4818     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4819     if (!ResultReg)
4820       return false;
4821     updateValueMap(I, ResultReg);
4822     return true;
4823   }
4824 
4825   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4826   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4827   if (!AddReg)
4828     return false;
4829 
4830   // (Src0 < 0) ? Pow2 - 1 : 0;
4831   if (!emitICmp_ri(VT, Src0Reg, 0))
4832     return false;
4833 
4834   unsigned SelectOpc;
4835   const TargetRegisterClass *RC;
4836   if (VT == MVT::i64) {
4837     SelectOpc = AArch64::CSELXr;
4838     RC = &AArch64::GPR64RegClass;
4839   } else {
4840     SelectOpc = AArch64::CSELWr;
4841     RC = &AArch64::GPR32RegClass;
4842   }
4843   unsigned SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4844                                         AArch64CC::LT);
4845   if (!SelectReg)
4846     return false;
4847 
4848   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4849   // negate the result.
4850   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4851   unsigned ResultReg;
4852   if (C.isNegative())
4853     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4854                               AArch64_AM::ASR, Lg2);
4855   else
4856     ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4857 
4858   if (!ResultReg)
4859     return false;
4860 
4861   updateValueMap(I, ResultReg);
4862   return true;
4863 }
4864 
4865 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4866 /// have to duplicate it for AArch64, because otherwise we would fail during the
4867 /// sign-extend emission.
4868 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4869   unsigned IdxN = getRegForValue(Idx);
4870   if (IdxN == 0)
4871     // Unhandled operand. Halt "fast" selection and bail.
4872     return 0;
4873 
4874   // If the index is smaller or larger than intptr_t, truncate or extend it.
4875   MVT PtrVT = TLI.getPointerTy(DL);
4876   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4877   if (IdxVT.bitsLT(PtrVT)) {
4878     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4879   } else if (IdxVT.bitsGT(PtrVT))
4880     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4881   return IdxN;
4882 }
4883 
4884 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4885 /// duplicate it for AArch64, because otherwise we would bail out even for
4886 /// simple cases. This is because the standard fastEmit functions don't cover
4887 /// MUL at all and ADD is lowered very inefficientily.
4888 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4889   if (Subtarget->isTargetILP32())
4890     return false;
4891 
4892   unsigned N = getRegForValue(I->getOperand(0));
4893   if (!N)
4894     return false;
4895 
4896   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4897   // into a single N = N + TotalOffset.
4898   uint64_t TotalOffs = 0;
4899   MVT VT = TLI.getPointerTy(DL);
4900   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4901        GTI != E; ++GTI) {
4902     const Value *Idx = GTI.getOperand();
4903     if (auto *StTy = GTI.getStructTypeOrNull()) {
4904       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4905       // N = N + Offset
4906       if (Field)
4907         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4908     } else {
4909       Type *Ty = GTI.getIndexedType();
4910 
4911       // If this is a constant subscript, handle it quickly.
4912       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4913         if (CI->isZero())
4914           continue;
4915         // N = N + Offset
4916         TotalOffs +=
4917             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4918         continue;
4919       }
4920       if (TotalOffs) {
4921         N = emitAdd_ri_(VT, N, TotalOffs);
4922         if (!N)
4923           return false;
4924         TotalOffs = 0;
4925       }
4926 
4927       // N = N + Idx * ElementSize;
4928       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4929       unsigned IdxN = getRegForGEPIndex(Idx);
4930       if (!IdxN)
4931         return false;
4932 
4933       if (ElementSize != 1) {
4934         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4935         if (!C)
4936           return false;
4937         IdxN = emitMul_rr(VT, IdxN, C);
4938         if (!IdxN)
4939           return false;
4940       }
4941       N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4942       if (!N)
4943         return false;
4944     }
4945   }
4946   if (TotalOffs) {
4947     N = emitAdd_ri_(VT, N, TotalOffs);
4948     if (!N)
4949       return false;
4950   }
4951   updateValueMap(I, N);
4952   return true;
4953 }
4954 
4955 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4956   assert(TM.getOptLevel() == CodeGenOpt::None &&
4957          "cmpxchg survived AtomicExpand at optlevel > -O0");
4958 
4959   auto *RetPairTy = cast<StructType>(I->getType());
4960   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4961   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4962          "cmpxchg has a non-i1 status result");
4963 
4964   MVT VT;
4965   if (!isTypeLegal(RetTy, VT))
4966     return false;
4967 
4968   const TargetRegisterClass *ResRC;
4969   unsigned Opc, CmpOpc;
4970   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
4971   // extractvalue selection doesn't support that.
4972   if (VT == MVT::i32) {
4973     Opc = AArch64::CMP_SWAP_32;
4974     CmpOpc = AArch64::SUBSWrs;
4975     ResRC = &AArch64::GPR32RegClass;
4976   } else if (VT == MVT::i64) {
4977     Opc = AArch64::CMP_SWAP_64;
4978     CmpOpc = AArch64::SUBSXrs;
4979     ResRC = &AArch64::GPR64RegClass;
4980   } else {
4981     return false;
4982   }
4983 
4984   const MCInstrDesc &II = TII.get(Opc);
4985 
4986   const unsigned AddrReg = constrainOperandRegClass(
4987       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
4988   const unsigned DesiredReg = constrainOperandRegClass(
4989       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
4990   const unsigned NewReg = constrainOperandRegClass(
4991       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
4992 
4993   const unsigned ResultReg1 = createResultReg(ResRC);
4994   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
4995   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
4996 
4997   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
4998   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
4999       .addDef(ResultReg1)
5000       .addDef(ScratchReg)
5001       .addUse(AddrReg)
5002       .addUse(DesiredReg)
5003       .addUse(NewReg);
5004 
5005   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5006       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5007       .addUse(ResultReg1)
5008       .addUse(DesiredReg)
5009       .addImm(0);
5010 
5011   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5012       .addDef(ResultReg2)
5013       .addUse(AArch64::WZR)
5014       .addUse(AArch64::WZR)
5015       .addImm(AArch64CC::NE);
5016 
5017   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5018   updateValueMap(I, ResultReg1, 2);
5019   return true;
5020 }
5021 
5022 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5023   switch (I->getOpcode()) {
5024   default:
5025     break;
5026   case Instruction::Add:
5027   case Instruction::Sub:
5028     return selectAddSub(I);
5029   case Instruction::Mul:
5030     return selectMul(I);
5031   case Instruction::SDiv:
5032     return selectSDiv(I);
5033   case Instruction::SRem:
5034     if (!selectBinaryOp(I, ISD::SREM))
5035       return selectRem(I, ISD::SREM);
5036     return true;
5037   case Instruction::URem:
5038     if (!selectBinaryOp(I, ISD::UREM))
5039       return selectRem(I, ISD::UREM);
5040     return true;
5041   case Instruction::Shl:
5042   case Instruction::LShr:
5043   case Instruction::AShr:
5044     return selectShift(I);
5045   case Instruction::And:
5046   case Instruction::Or:
5047   case Instruction::Xor:
5048     return selectLogicalOp(I);
5049   case Instruction::Br:
5050     return selectBranch(I);
5051   case Instruction::IndirectBr:
5052     return selectIndirectBr(I);
5053   case Instruction::BitCast:
5054     if (!FastISel::selectBitCast(I))
5055       return selectBitCast(I);
5056     return true;
5057   case Instruction::FPToSI:
5058     if (!selectCast(I, ISD::FP_TO_SINT))
5059       return selectFPToInt(I, /*Signed=*/true);
5060     return true;
5061   case Instruction::FPToUI:
5062     return selectFPToInt(I, /*Signed=*/false);
5063   case Instruction::ZExt:
5064   case Instruction::SExt:
5065     return selectIntExt(I);
5066   case Instruction::Trunc:
5067     if (!selectCast(I, ISD::TRUNCATE))
5068       return selectTrunc(I);
5069     return true;
5070   case Instruction::FPExt:
5071     return selectFPExt(I);
5072   case Instruction::FPTrunc:
5073     return selectFPTrunc(I);
5074   case Instruction::SIToFP:
5075     if (!selectCast(I, ISD::SINT_TO_FP))
5076       return selectIntToFP(I, /*Signed=*/true);
5077     return true;
5078   case Instruction::UIToFP:
5079     return selectIntToFP(I, /*Signed=*/false);
5080   case Instruction::Load:
5081     return selectLoad(I);
5082   case Instruction::Store:
5083     return selectStore(I);
5084   case Instruction::FCmp:
5085   case Instruction::ICmp:
5086     return selectCmp(I);
5087   case Instruction::Select:
5088     return selectSelect(I);
5089   case Instruction::Ret:
5090     return selectRet(I);
5091   case Instruction::FRem:
5092     return selectFRem(I);
5093   case Instruction::GetElementPtr:
5094     return selectGetElementPtr(I);
5095   case Instruction::AtomicCmpXchg:
5096     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5097   }
5098 
5099   // fall-back to target-independent instruction selection.
5100   return selectOperator(I, I->getOpcode());
5101 }
5102 
5103 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5104                                         const TargetLibraryInfo *LibInfo) {
5105   return new AArch64FastISel(FuncInfo, LibInfo);
5106 }
5107