xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp (revision 7ef62cebc2f965b0f640263e179276928885e33d)
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Analysis/BranchProbabilityInfo.h"
27 #include "llvm/CodeGen/CallingConvLower.h"
28 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineConstantPool.h"
33 #include "llvm/CodeGen/MachineFrameInfo.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/RuntimeLibcalls.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/Argument.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CallingConv.h"
44 #include "llvm/IR/Constant.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DataLayout.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GetElementPtrTypeIterator.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/Operator.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/MC/MCInstrDesc.h"
61 #include "llvm/MC/MCRegisterInfo.h"
62 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/AtomicOrdering.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
67 #include "llvm/Support/ErrorHandling.h"
68 #include "llvm/Support/MachineValueType.h"
69 #include "llvm/Support/MathExtras.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81   class Address {
82   public:
83     using BaseKind = enum {
84       RegBase,
85       FrameIndexBase
86     };
87 
88   private:
89     BaseKind Kind = RegBase;
90     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
91     union {
92       unsigned Reg;
93       int FI;
94     } Base;
95     unsigned OffsetReg = 0;
96     unsigned Shift = 0;
97     int64_t Offset = 0;
98     const GlobalValue *GV = nullptr;
99 
100   public:
101     Address() { Base.Reg = 0; }
102 
103     void setKind(BaseKind K) { Kind = K; }
104     BaseKind getKind() const { return Kind; }
105     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107     bool isRegBase() const { return Kind == RegBase; }
108     bool isFIBase() const { return Kind == FrameIndexBase; }
109 
110     void setReg(unsigned Reg) {
111       assert(isRegBase() && "Invalid base register access!");
112       Base.Reg = Reg;
113     }
114 
115     unsigned getReg() const {
116       assert(isRegBase() && "Invalid base register access!");
117       return Base.Reg;
118     }
119 
120     void setOffsetReg(unsigned Reg) {
121       OffsetReg = Reg;
122     }
123 
124     unsigned getOffsetReg() const {
125       return OffsetReg;
126     }
127 
128     void setFI(unsigned FI) {
129       assert(isFIBase() && "Invalid base frame index  access!");
130       Base.FI = FI;
131     }
132 
133     unsigned getFI() const {
134       assert(isFIBase() && "Invalid base frame index access!");
135       return Base.FI;
136     }
137 
138     void setOffset(int64_t O) { Offset = O; }
139     int64_t getOffset() { return Offset; }
140     void setShift(unsigned S) { Shift = S; }
141     unsigned getShift() { return Shift; }
142 
143     void setGlobalValue(const GlobalValue *G) { GV = G; }
144     const GlobalValue *getGlobalValue() { return GV; }
145   };
146 
147   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148   /// make the right decision when generating code for different targets.
149   const AArch64Subtarget *Subtarget;
150   LLVMContext *Context;
151 
152   bool fastLowerArguments() override;
153   bool fastLowerCall(CallLoweringInfo &CLI) override;
154   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157   // Selection routines.
158   bool selectAddSub(const Instruction *I);
159   bool selectLogicalOp(const Instruction *I);
160   bool selectLoad(const Instruction *I);
161   bool selectStore(const Instruction *I);
162   bool selectBranch(const Instruction *I);
163   bool selectIndirectBr(const Instruction *I);
164   bool selectCmp(const Instruction *I);
165   bool selectSelect(const Instruction *I);
166   bool selectFPExt(const Instruction *I);
167   bool selectFPTrunc(const Instruction *I);
168   bool selectFPToInt(const Instruction *I, bool Signed);
169   bool selectIntToFP(const Instruction *I, bool Signed);
170   bool selectRem(const Instruction *I, unsigned ISDOpcode);
171   bool selectRet(const Instruction *I);
172   bool selectTrunc(const Instruction *I);
173   bool selectIntExt(const Instruction *I);
174   bool selectMul(const Instruction *I);
175   bool selectShift(const Instruction *I);
176   bool selectBitCast(const Instruction *I);
177   bool selectFRem(const Instruction *I);
178   bool selectSDiv(const Instruction *I);
179   bool selectGetElementPtr(const Instruction *I);
180   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182   // Utility helper routines.
183   bool isTypeLegal(Type *Ty, MVT &VT);
184   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185   bool isValueAvailable(const Value *V) const;
186   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187   bool computeCallAddress(const Value *V, Address &Addr);
188   bool simplifyAddress(Address &Addr, MVT VT);
189   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190                             MachineMemOperand::Flags Flags,
191                             unsigned ScaleFactor, MachineMemOperand *MMO);
192   bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
193   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194                           MaybeAlign Alignment);
195   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196                          const Value *Cond);
197   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198   bool optimizeSelect(const SelectInst *SI);
199   unsigned getRegForGEPIndex(const Value *Idx);
200 
201   // Emit helper routines.
202   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203                       const Value *RHS, bool SetFlags = false,
204                       bool WantResult = true,  bool IsZExt = false);
205   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206                          unsigned RHSReg, bool SetFlags = false,
207                          bool WantResult = true);
208   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209                          uint64_t Imm, bool SetFlags = false,
210                          bool WantResult = true);
211   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212                          unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
213                          uint64_t ShiftImm, bool SetFlags = false,
214                          bool WantResult = true);
215   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216                          unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
217                          uint64_t ShiftImm, bool SetFlags = false,
218                          bool WantResult = true);
219 
220   // Emit functions.
221   bool emitCompareAndBranch(const BranchInst *BI);
222   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
223   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
224   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
225   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
226   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
227                     MachineMemOperand *MMO = nullptr);
228   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
229                  MachineMemOperand *MMO = nullptr);
230   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
231                         MachineMemOperand *MMO = nullptr);
232   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
233   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
234   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
235                    bool SetFlags = false, bool WantResult = true,
236                    bool IsZExt = false);
237   unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
238   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
239                    bool SetFlags = false, bool WantResult = true,
240                    bool IsZExt = false);
241   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
242                        bool WantResult = true);
243   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
245                        bool WantResult = true);
246   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
247                          const Value *RHS);
248   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
249                             uint64_t Imm);
250   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251                             unsigned RHSReg, uint64_t ShiftImm);
252   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
253   unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
257   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
258                       bool IsZExt = true);
259   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
260   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
261                       bool IsZExt = true);
262   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
263   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
264                       bool IsZExt = false);
265 
266   unsigned materializeInt(const ConstantInt *CI, MVT VT);
267   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
268   unsigned materializeGV(const GlobalValue *GV);
269 
270   // Call handling routines.
271 private:
272   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
273   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
274                        unsigned &NumBytes);
275   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
276 
277 public:
278   // Backend specific FastISel code.
279   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
280   unsigned fastMaterializeConstant(const Constant *C) override;
281   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
282 
283   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
284                            const TargetLibraryInfo *LibInfo)
285       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
286     Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
287     Context = &FuncInfo.Fn->getContext();
288   }
289 
290   bool fastSelectInstruction(const Instruction *I) override;
291 
292 #include "AArch64GenFastISel.inc"
293 };
294 
295 } // end anonymous namespace
296 
297 /// Check if the sign-/zero-extend will be a noop.
298 static bool isIntExtFree(const Instruction *I) {
299   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
300          "Unexpected integer extend instruction.");
301   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
302          "Unexpected value type.");
303   bool IsZExt = isa<ZExtInst>(I);
304 
305   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
306     if (LI->hasOneUse())
307       return true;
308 
309   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
310     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
311       return true;
312 
313   return false;
314 }
315 
316 /// Determine the implicit scale factor that is applied by a memory
317 /// operation for a given value type.
318 static unsigned getImplicitScaleFactor(MVT VT) {
319   switch (VT.SimpleTy) {
320   default:
321     return 0;    // invalid
322   case MVT::i1:  // fall-through
323   case MVT::i8:
324     return 1;
325   case MVT::i16:
326     return 2;
327   case MVT::i32: // fall-through
328   case MVT::f32:
329     return 4;
330   case MVT::i64: // fall-through
331   case MVT::f64:
332     return 8;
333   }
334 }
335 
336 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
337   if (CC == CallingConv::WebKit_JS)
338     return CC_AArch64_WebKit_JS;
339   if (CC == CallingConv::GHC)
340     return CC_AArch64_GHC;
341   if (CC == CallingConv::CFGuard_Check)
342     return CC_AArch64_Win64_CFGuard_Check;
343   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
344 }
345 
346 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
347   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
348          "Alloca should always return a pointer.");
349 
350   // Don't handle dynamic allocas.
351   if (!FuncInfo.StaticAllocaMap.count(AI))
352     return 0;
353 
354   DenseMap<const AllocaInst *, int>::iterator SI =
355       FuncInfo.StaticAllocaMap.find(AI);
356 
357   if (SI != FuncInfo.StaticAllocaMap.end()) {
358     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
359     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
360             ResultReg)
361         .addFrameIndex(SI->second)
362         .addImm(0)
363         .addImm(0);
364     return ResultReg;
365   }
366 
367   return 0;
368 }
369 
370 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
371   if (VT > MVT::i64)
372     return 0;
373 
374   if (!CI->isZero())
375     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
376 
377   // Create a copy from the zero register to materialize a "0" value.
378   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
379                                                    : &AArch64::GPR32RegClass;
380   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
381   Register ResultReg = createResultReg(RC);
382   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
383           ResultReg).addReg(ZeroReg, getKillRegState(true));
384   return ResultReg;
385 }
386 
387 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
388   // Positive zero (+0.0) has to be materialized with a fmov from the zero
389   // register, because the immediate version of fmov cannot encode zero.
390   if (CFP->isNullValue())
391     return fastMaterializeFloatZero(CFP);
392 
393   if (VT != MVT::f32 && VT != MVT::f64)
394     return 0;
395 
396   const APFloat Val = CFP->getValueAPF();
397   bool Is64Bit = (VT == MVT::f64);
398   // This checks to see if we can use FMOV instructions to materialize
399   // a constant, otherwise we have to materialize via the constant pool.
400   int Imm =
401       Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
402   if (Imm != -1) {
403     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
404     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
405   }
406 
407   // For the large code model materialize the FP constant in code.
408   if (TM.getCodeModel() == CodeModel::Large) {
409     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
410     const TargetRegisterClass *RC = Is64Bit ?
411         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
412 
413     Register TmpReg = createResultReg(RC);
414     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
415         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
416 
417     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
418     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
419             TII.get(TargetOpcode::COPY), ResultReg)
420         .addReg(TmpReg, getKillRegState(true));
421 
422     return ResultReg;
423   }
424 
425   // Materialize via constant pool.  MachineConstantPool wants an explicit
426   // alignment.
427   Align Alignment = DL.getPrefTypeAlign(CFP->getType());
428 
429   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
430   Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
431   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
432           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
433 
434   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
435   Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
436   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
437       .addReg(ADRPReg)
438       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
439   return ResultReg;
440 }
441 
442 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
443   // We can't handle thread-local variables quickly yet.
444   if (GV->isThreadLocal())
445     return 0;
446 
447   // MachO still uses GOT for large code-model accesses, but ELF requires
448   // movz/movk sequences, which FastISel doesn't handle yet.
449   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
450     return 0;
451 
452   unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
453 
454   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
455   if (!DestEVT.isSimple())
456     return 0;
457 
458   Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
459   unsigned ResultReg;
460 
461   if (OpFlags & AArch64II::MO_GOT) {
462     // ADRP + LDRX
463     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
464             ADRPReg)
465         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
466 
467     unsigned LdrOpc;
468     if (Subtarget->isTargetILP32()) {
469       ResultReg = createResultReg(&AArch64::GPR32RegClass);
470       LdrOpc = AArch64::LDRWui;
471     } else {
472       ResultReg = createResultReg(&AArch64::GPR64RegClass);
473       LdrOpc = AArch64::LDRXui;
474     }
475     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
476             ResultReg)
477       .addReg(ADRPReg)
478       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
479                         AArch64II::MO_NC | OpFlags);
480     if (!Subtarget->isTargetILP32())
481       return ResultReg;
482 
483     // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
484     // so we must extend the result on ILP32.
485     Register Result64 = createResultReg(&AArch64::GPR64RegClass);
486     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
487             TII.get(TargetOpcode::SUBREG_TO_REG))
488         .addDef(Result64)
489         .addImm(0)
490         .addReg(ResultReg, RegState::Kill)
491         .addImm(AArch64::sub_32);
492     return Result64;
493   } else {
494     // ADRP + ADDX
495     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
496             ADRPReg)
497         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
498 
499     if (OpFlags & AArch64II::MO_TAGGED) {
500       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
501       // We do so by creating a MOVK that sets bits 48-63 of the register to
502       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
503       // the small code model so we can assume a binary size of <= 4GB, which
504       // makes the untagged PC relative offset positive. The binary must also be
505       // loaded into address range [0, 2^48). Both of these properties need to
506       // be ensured at runtime when using tagged addresses.
507       //
508       // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
509       // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
510       // are not exactly 1:1 with FastISel so we cannot easily abstract this
511       // out. At some point, it would be nice to find a way to not have this
512       // duplciate code.
513       unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
514       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
515               DstReg)
516           .addReg(ADRPReg)
517           .addGlobalAddress(GV, /*Offset=*/0x100000000,
518                             AArch64II::MO_PREL | AArch64II::MO_G3)
519           .addImm(48);
520       ADRPReg = DstReg;
521     }
522 
523     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
524     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
525             ResultReg)
526         .addReg(ADRPReg)
527         .addGlobalAddress(GV, 0,
528                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
529         .addImm(0);
530   }
531   return ResultReg;
532 }
533 
534 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
535   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
536 
537   // Only handle simple types.
538   if (!CEVT.isSimple())
539     return 0;
540   MVT VT = CEVT.getSimpleVT();
541   // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
542   // 'null' pointers need to have a somewhat special treatment.
543   if (isa<ConstantPointerNull>(C)) {
544     assert(VT == MVT::i64 && "Expected 64-bit pointers");
545     return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
546   }
547 
548   if (const auto *CI = dyn_cast<ConstantInt>(C))
549     return materializeInt(CI, VT);
550   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
551     return materializeFP(CFP, VT);
552   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
553     return materializeGV(GV);
554 
555   return 0;
556 }
557 
558 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
559   assert(CFP->isNullValue() &&
560          "Floating-point constant is not a positive zero.");
561   MVT VT;
562   if (!isTypeLegal(CFP->getType(), VT))
563     return 0;
564 
565   if (VT != MVT::f32 && VT != MVT::f64)
566     return 0;
567 
568   bool Is64Bit = (VT == MVT::f64);
569   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
570   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
571   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
572 }
573 
574 /// Check if the multiply is by a power-of-2 constant.
575 static bool isMulPowOf2(const Value *I) {
576   if (const auto *MI = dyn_cast<MulOperator>(I)) {
577     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
578       if (C->getValue().isPowerOf2())
579         return true;
580     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
581       if (C->getValue().isPowerOf2())
582         return true;
583   }
584   return false;
585 }
586 
587 // Computes the address to get to an object.
588 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
589 {
590   const User *U = nullptr;
591   unsigned Opcode = Instruction::UserOp1;
592   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
593     // Don't walk into other basic blocks unless the object is an alloca from
594     // another block, otherwise it may not have a virtual register assigned.
595     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
596         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
597       Opcode = I->getOpcode();
598       U = I;
599     }
600   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
601     Opcode = C->getOpcode();
602     U = C;
603   }
604 
605   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
606     if (Ty->getAddressSpace() > 255)
607       // Fast instruction selection doesn't support the special
608       // address spaces.
609       return false;
610 
611   switch (Opcode) {
612   default:
613     break;
614   case Instruction::BitCast:
615     // Look through bitcasts.
616     return computeAddress(U->getOperand(0), Addr, Ty);
617 
618   case Instruction::IntToPtr:
619     // Look past no-op inttoptrs.
620     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
621         TLI.getPointerTy(DL))
622       return computeAddress(U->getOperand(0), Addr, Ty);
623     break;
624 
625   case Instruction::PtrToInt:
626     // Look past no-op ptrtoints.
627     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
628       return computeAddress(U->getOperand(0), Addr, Ty);
629     break;
630 
631   case Instruction::GetElementPtr: {
632     Address SavedAddr = Addr;
633     uint64_t TmpOffset = Addr.getOffset();
634 
635     // Iterate through the GEP folding the constants into offsets where
636     // we can.
637     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
638          GTI != E; ++GTI) {
639       const Value *Op = GTI.getOperand();
640       if (StructType *STy = GTI.getStructTypeOrNull()) {
641         const StructLayout *SL = DL.getStructLayout(STy);
642         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
643         TmpOffset += SL->getElementOffset(Idx);
644       } else {
645         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
646         while (true) {
647           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
648             // Constant-offset addressing.
649             TmpOffset += CI->getSExtValue() * S;
650             break;
651           }
652           if (canFoldAddIntoGEP(U, Op)) {
653             // A compatible add with a constant operand. Fold the constant.
654             ConstantInt *CI =
655                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
656             TmpOffset += CI->getSExtValue() * S;
657             // Iterate on the other operand.
658             Op = cast<AddOperator>(Op)->getOperand(0);
659             continue;
660           }
661           // Unsupported
662           goto unsupported_gep;
663         }
664       }
665     }
666 
667     // Try to grab the base operand now.
668     Addr.setOffset(TmpOffset);
669     if (computeAddress(U->getOperand(0), Addr, Ty))
670       return true;
671 
672     // We failed, restore everything and try the other options.
673     Addr = SavedAddr;
674 
675   unsupported_gep:
676     break;
677   }
678   case Instruction::Alloca: {
679     const AllocaInst *AI = cast<AllocaInst>(Obj);
680     DenseMap<const AllocaInst *, int>::iterator SI =
681         FuncInfo.StaticAllocaMap.find(AI);
682     if (SI != FuncInfo.StaticAllocaMap.end()) {
683       Addr.setKind(Address::FrameIndexBase);
684       Addr.setFI(SI->second);
685       return true;
686     }
687     break;
688   }
689   case Instruction::Add: {
690     // Adds of constants are common and easy enough.
691     const Value *LHS = U->getOperand(0);
692     const Value *RHS = U->getOperand(1);
693 
694     if (isa<ConstantInt>(LHS))
695       std::swap(LHS, RHS);
696 
697     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
698       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
699       return computeAddress(LHS, Addr, Ty);
700     }
701 
702     Address Backup = Addr;
703     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
704       return true;
705     Addr = Backup;
706 
707     break;
708   }
709   case Instruction::Sub: {
710     // Subs of constants are common and easy enough.
711     const Value *LHS = U->getOperand(0);
712     const Value *RHS = U->getOperand(1);
713 
714     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
715       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
716       return computeAddress(LHS, Addr, Ty);
717     }
718     break;
719   }
720   case Instruction::Shl: {
721     if (Addr.getOffsetReg())
722       break;
723 
724     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
725     if (!CI)
726       break;
727 
728     unsigned Val = CI->getZExtValue();
729     if (Val < 1 || Val > 3)
730       break;
731 
732     uint64_t NumBytes = 0;
733     if (Ty && Ty->isSized()) {
734       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
735       NumBytes = NumBits / 8;
736       if (!isPowerOf2_64(NumBits))
737         NumBytes = 0;
738     }
739 
740     if (NumBytes != (1ULL << Val))
741       break;
742 
743     Addr.setShift(Val);
744     Addr.setExtendType(AArch64_AM::LSL);
745 
746     const Value *Src = U->getOperand(0);
747     if (const auto *I = dyn_cast<Instruction>(Src)) {
748       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
749         // Fold the zext or sext when it won't become a noop.
750         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
751           if (!isIntExtFree(ZE) &&
752               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
753             Addr.setExtendType(AArch64_AM::UXTW);
754             Src = ZE->getOperand(0);
755           }
756         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
757           if (!isIntExtFree(SE) &&
758               SE->getOperand(0)->getType()->isIntegerTy(32)) {
759             Addr.setExtendType(AArch64_AM::SXTW);
760             Src = SE->getOperand(0);
761           }
762         }
763       }
764     }
765 
766     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
767       if (AI->getOpcode() == Instruction::And) {
768         const Value *LHS = AI->getOperand(0);
769         const Value *RHS = AI->getOperand(1);
770 
771         if (const auto *C = dyn_cast<ConstantInt>(LHS))
772           if (C->getValue() == 0xffffffff)
773             std::swap(LHS, RHS);
774 
775         if (const auto *C = dyn_cast<ConstantInt>(RHS))
776           if (C->getValue() == 0xffffffff) {
777             Addr.setExtendType(AArch64_AM::UXTW);
778             Register Reg = getRegForValue(LHS);
779             if (!Reg)
780               return false;
781             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
782             Addr.setOffsetReg(Reg);
783             return true;
784           }
785       }
786 
787     Register Reg = getRegForValue(Src);
788     if (!Reg)
789       return false;
790     Addr.setOffsetReg(Reg);
791     return true;
792   }
793   case Instruction::Mul: {
794     if (Addr.getOffsetReg())
795       break;
796 
797     if (!isMulPowOf2(U))
798       break;
799 
800     const Value *LHS = U->getOperand(0);
801     const Value *RHS = U->getOperand(1);
802 
803     // Canonicalize power-of-2 value to the RHS.
804     if (const auto *C = dyn_cast<ConstantInt>(LHS))
805       if (C->getValue().isPowerOf2())
806         std::swap(LHS, RHS);
807 
808     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
809     const auto *C = cast<ConstantInt>(RHS);
810     unsigned Val = C->getValue().logBase2();
811     if (Val < 1 || Val > 3)
812       break;
813 
814     uint64_t NumBytes = 0;
815     if (Ty && Ty->isSized()) {
816       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
817       NumBytes = NumBits / 8;
818       if (!isPowerOf2_64(NumBits))
819         NumBytes = 0;
820     }
821 
822     if (NumBytes != (1ULL << Val))
823       break;
824 
825     Addr.setShift(Val);
826     Addr.setExtendType(AArch64_AM::LSL);
827 
828     const Value *Src = LHS;
829     if (const auto *I = dyn_cast<Instruction>(Src)) {
830       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
831         // Fold the zext or sext when it won't become a noop.
832         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
833           if (!isIntExtFree(ZE) &&
834               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
835             Addr.setExtendType(AArch64_AM::UXTW);
836             Src = ZE->getOperand(0);
837           }
838         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
839           if (!isIntExtFree(SE) &&
840               SE->getOperand(0)->getType()->isIntegerTy(32)) {
841             Addr.setExtendType(AArch64_AM::SXTW);
842             Src = SE->getOperand(0);
843           }
844         }
845       }
846     }
847 
848     Register Reg = getRegForValue(Src);
849     if (!Reg)
850       return false;
851     Addr.setOffsetReg(Reg);
852     return true;
853   }
854   case Instruction::And: {
855     if (Addr.getOffsetReg())
856       break;
857 
858     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
859       break;
860 
861     const Value *LHS = U->getOperand(0);
862     const Value *RHS = U->getOperand(1);
863 
864     if (const auto *C = dyn_cast<ConstantInt>(LHS))
865       if (C->getValue() == 0xffffffff)
866         std::swap(LHS, RHS);
867 
868     if (const auto *C = dyn_cast<ConstantInt>(RHS))
869       if (C->getValue() == 0xffffffff) {
870         Addr.setShift(0);
871         Addr.setExtendType(AArch64_AM::LSL);
872         Addr.setExtendType(AArch64_AM::UXTW);
873 
874         Register Reg = getRegForValue(LHS);
875         if (!Reg)
876           return false;
877         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
878         Addr.setOffsetReg(Reg);
879         return true;
880       }
881     break;
882   }
883   case Instruction::SExt:
884   case Instruction::ZExt: {
885     if (!Addr.getReg() || Addr.getOffsetReg())
886       break;
887 
888     const Value *Src = nullptr;
889     // Fold the zext or sext when it won't become a noop.
890     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
891       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
892         Addr.setExtendType(AArch64_AM::UXTW);
893         Src = ZE->getOperand(0);
894       }
895     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
896       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
897         Addr.setExtendType(AArch64_AM::SXTW);
898         Src = SE->getOperand(0);
899       }
900     }
901 
902     if (!Src)
903       break;
904 
905     Addr.setShift(0);
906     Register Reg = getRegForValue(Src);
907     if (!Reg)
908       return false;
909     Addr.setOffsetReg(Reg);
910     return true;
911   }
912   } // end switch
913 
914   if (Addr.isRegBase() && !Addr.getReg()) {
915     Register Reg = getRegForValue(Obj);
916     if (!Reg)
917       return false;
918     Addr.setReg(Reg);
919     return true;
920   }
921 
922   if (!Addr.getOffsetReg()) {
923     Register Reg = getRegForValue(Obj);
924     if (!Reg)
925       return false;
926     Addr.setOffsetReg(Reg);
927     return true;
928   }
929 
930   return false;
931 }
932 
933 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
934   const User *U = nullptr;
935   unsigned Opcode = Instruction::UserOp1;
936   bool InMBB = true;
937 
938   if (const auto *I = dyn_cast<Instruction>(V)) {
939     Opcode = I->getOpcode();
940     U = I;
941     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
942   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
943     Opcode = C->getOpcode();
944     U = C;
945   }
946 
947   switch (Opcode) {
948   default: break;
949   case Instruction::BitCast:
950     // Look past bitcasts if its operand is in the same BB.
951     if (InMBB)
952       return computeCallAddress(U->getOperand(0), Addr);
953     break;
954   case Instruction::IntToPtr:
955     // Look past no-op inttoptrs if its operand is in the same BB.
956     if (InMBB &&
957         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
958             TLI.getPointerTy(DL))
959       return computeCallAddress(U->getOperand(0), Addr);
960     break;
961   case Instruction::PtrToInt:
962     // Look past no-op ptrtoints if its operand is in the same BB.
963     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
964       return computeCallAddress(U->getOperand(0), Addr);
965     break;
966   }
967 
968   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
969     Addr.setGlobalValue(GV);
970     return true;
971   }
972 
973   // If all else fails, try to materialize the value in a register.
974   if (!Addr.getGlobalValue()) {
975     Addr.setReg(getRegForValue(V));
976     return Addr.getReg() != 0;
977   }
978 
979   return false;
980 }
981 
982 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
983   EVT evt = TLI.getValueType(DL, Ty, true);
984 
985   if (Subtarget->isTargetILP32() && Ty->isPointerTy())
986     return false;
987 
988   // Only handle simple types.
989   if (evt == MVT::Other || !evt.isSimple())
990     return false;
991   VT = evt.getSimpleVT();
992 
993   // This is a legal type, but it's not something we handle in fast-isel.
994   if (VT == MVT::f128)
995     return false;
996 
997   // Handle all other legal types, i.e. a register that will directly hold this
998   // value.
999   return TLI.isTypeLegal(VT);
1000 }
1001 
1002 /// Determine if the value type is supported by FastISel.
1003 ///
1004 /// FastISel for AArch64 can handle more value types than are legal. This adds
1005 /// simple value type such as i1, i8, and i16.
1006 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1007   if (Ty->isVectorTy() && !IsVectorAllowed)
1008     return false;
1009 
1010   if (isTypeLegal(Ty, VT))
1011     return true;
1012 
1013   // If this is a type than can be sign or zero-extended to a basic operation
1014   // go ahead and accept it now.
1015   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1016     return true;
1017 
1018   return false;
1019 }
1020 
1021 bool AArch64FastISel::isValueAvailable(const Value *V) const {
1022   if (!isa<Instruction>(V))
1023     return true;
1024 
1025   const auto *I = cast<Instruction>(V);
1026   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1027 }
1028 
1029 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1030   if (Subtarget->isTargetILP32())
1031     return false;
1032 
1033   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1034   if (!ScaleFactor)
1035     return false;
1036 
1037   bool ImmediateOffsetNeedsLowering = false;
1038   bool RegisterOffsetNeedsLowering = false;
1039   int64_t Offset = Addr.getOffset();
1040   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1041     ImmediateOffsetNeedsLowering = true;
1042   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1043            !isUInt<12>(Offset / ScaleFactor))
1044     ImmediateOffsetNeedsLowering = true;
1045 
1046   // Cannot encode an offset register and an immediate offset in the same
1047   // instruction. Fold the immediate offset into the load/store instruction and
1048   // emit an additional add to take care of the offset register.
1049   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1050     RegisterOffsetNeedsLowering = true;
1051 
1052   // Cannot encode zero register as base.
1053   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1054     RegisterOffsetNeedsLowering = true;
1055 
1056   // If this is a stack pointer and the offset needs to be simplified then put
1057   // the alloca address into a register, set the base type back to register and
1058   // continue. This should almost never happen.
1059   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1060   {
1061     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1062     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1063             ResultReg)
1064       .addFrameIndex(Addr.getFI())
1065       .addImm(0)
1066       .addImm(0);
1067     Addr.setKind(Address::RegBase);
1068     Addr.setReg(ResultReg);
1069   }
1070 
1071   if (RegisterOffsetNeedsLowering) {
1072     unsigned ResultReg = 0;
1073     if (Addr.getReg()) {
1074       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1075           Addr.getExtendType() == AArch64_AM::UXTW   )
1076         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1077                                   Addr.getOffsetReg(), Addr.getExtendType(),
1078                                   Addr.getShift());
1079       else
1080         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1081                                   Addr.getOffsetReg(), AArch64_AM::LSL,
1082                                   Addr.getShift());
1083     } else {
1084       if (Addr.getExtendType() == AArch64_AM::UXTW)
1085         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1086                                Addr.getShift(), /*IsZExt=*/true);
1087       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1088         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1089                                Addr.getShift(), /*IsZExt=*/false);
1090       else
1091         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1092                                Addr.getShift());
1093     }
1094     if (!ResultReg)
1095       return false;
1096 
1097     Addr.setReg(ResultReg);
1098     Addr.setOffsetReg(0);
1099     Addr.setShift(0);
1100     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1101   }
1102 
1103   // Since the offset is too large for the load/store instruction get the
1104   // reg+offset into a register.
1105   if (ImmediateOffsetNeedsLowering) {
1106     unsigned ResultReg;
1107     if (Addr.getReg())
1108       // Try to fold the immediate into the add instruction.
1109       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1110     else
1111       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1112 
1113     if (!ResultReg)
1114       return false;
1115     Addr.setReg(ResultReg);
1116     Addr.setOffset(0);
1117   }
1118   return true;
1119 }
1120 
1121 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1122                                            const MachineInstrBuilder &MIB,
1123                                            MachineMemOperand::Flags Flags,
1124                                            unsigned ScaleFactor,
1125                                            MachineMemOperand *MMO) {
1126   int64_t Offset = Addr.getOffset() / ScaleFactor;
1127   // Frame base works a bit differently. Handle it separately.
1128   if (Addr.isFIBase()) {
1129     int FI = Addr.getFI();
1130     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1131     // and alignment should be based on the VT.
1132     MMO = FuncInfo.MF->getMachineMemOperand(
1133         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1134         MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1135     // Now add the rest of the operands.
1136     MIB.addFrameIndex(FI).addImm(Offset);
1137   } else {
1138     assert(Addr.isRegBase() && "Unexpected address kind.");
1139     const MCInstrDesc &II = MIB->getDesc();
1140     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1141     Addr.setReg(
1142       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1143     Addr.setOffsetReg(
1144       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1145     if (Addr.getOffsetReg()) {
1146       assert(Addr.getOffset() == 0 && "Unexpected offset");
1147       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1148                       Addr.getExtendType() == AArch64_AM::SXTX;
1149       MIB.addReg(Addr.getReg());
1150       MIB.addReg(Addr.getOffsetReg());
1151       MIB.addImm(IsSigned);
1152       MIB.addImm(Addr.getShift() != 0);
1153     } else
1154       MIB.addReg(Addr.getReg()).addImm(Offset);
1155   }
1156 
1157   if (MMO)
1158     MIB.addMemOperand(MMO);
1159 }
1160 
1161 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1162                                      const Value *RHS, bool SetFlags,
1163                                      bool WantResult,  bool IsZExt) {
1164   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1165   bool NeedExtend = false;
1166   switch (RetVT.SimpleTy) {
1167   default:
1168     return 0;
1169   case MVT::i1:
1170     NeedExtend = true;
1171     break;
1172   case MVT::i8:
1173     NeedExtend = true;
1174     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1175     break;
1176   case MVT::i16:
1177     NeedExtend = true;
1178     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1179     break;
1180   case MVT::i32:  // fall-through
1181   case MVT::i64:
1182     break;
1183   }
1184   MVT SrcVT = RetVT;
1185   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1186 
1187   // Canonicalize immediates to the RHS first.
1188   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1189     std::swap(LHS, RHS);
1190 
1191   // Canonicalize mul by power of 2 to the RHS.
1192   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1193     if (isMulPowOf2(LHS))
1194       std::swap(LHS, RHS);
1195 
1196   // Canonicalize shift immediate to the RHS.
1197   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1198     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1199       if (isa<ConstantInt>(SI->getOperand(1)))
1200         if (SI->getOpcode() == Instruction::Shl  ||
1201             SI->getOpcode() == Instruction::LShr ||
1202             SI->getOpcode() == Instruction::AShr   )
1203           std::swap(LHS, RHS);
1204 
1205   Register LHSReg = getRegForValue(LHS);
1206   if (!LHSReg)
1207     return 0;
1208 
1209   if (NeedExtend)
1210     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1211 
1212   unsigned ResultReg = 0;
1213   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1214     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1215     if (C->isNegative())
1216       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1217                                 WantResult);
1218     else
1219       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1220                                 WantResult);
1221   } else if (const auto *C = dyn_cast<Constant>(RHS))
1222     if (C->isNullValue())
1223       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1224 
1225   if (ResultReg)
1226     return ResultReg;
1227 
1228   // Only extend the RHS within the instruction if there is a valid extend type.
1229   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1230       isValueAvailable(RHS)) {
1231     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1232       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1233         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1234           Register RHSReg = getRegForValue(SI->getOperand(0));
1235           if (!RHSReg)
1236             return 0;
1237           return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1238                                C->getZExtValue(), SetFlags, WantResult);
1239         }
1240     Register RHSReg = getRegForValue(RHS);
1241     if (!RHSReg)
1242       return 0;
1243     return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1244                          SetFlags, WantResult);
1245   }
1246 
1247   // Check if the mul can be folded into the instruction.
1248   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1249     if (isMulPowOf2(RHS)) {
1250       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1251       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1252 
1253       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1254         if (C->getValue().isPowerOf2())
1255           std::swap(MulLHS, MulRHS);
1256 
1257       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1258       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1259       Register RHSReg = getRegForValue(MulLHS);
1260       if (!RHSReg)
1261         return 0;
1262       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1263                                 ShiftVal, SetFlags, WantResult);
1264       if (ResultReg)
1265         return ResultReg;
1266     }
1267   }
1268 
1269   // Check if the shift can be folded into the instruction.
1270   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1271     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1272       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1273         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1274         switch (SI->getOpcode()) {
1275         default: break;
1276         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1277         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1278         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1279         }
1280         uint64_t ShiftVal = C->getZExtValue();
1281         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1282           Register RHSReg = getRegForValue(SI->getOperand(0));
1283           if (!RHSReg)
1284             return 0;
1285           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1286                                     ShiftVal, SetFlags, WantResult);
1287           if (ResultReg)
1288             return ResultReg;
1289         }
1290       }
1291     }
1292   }
1293 
1294   Register RHSReg = getRegForValue(RHS);
1295   if (!RHSReg)
1296     return 0;
1297 
1298   if (NeedExtend)
1299     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1300 
1301   return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1302 }
1303 
1304 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1305                                         unsigned RHSReg, bool SetFlags,
1306                                         bool WantResult) {
1307   assert(LHSReg && RHSReg && "Invalid register number.");
1308 
1309   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1310       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1311     return 0;
1312 
1313   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1314     return 0;
1315 
1316   static const unsigned OpcTable[2][2][2] = {
1317     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1318       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1319     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1320       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1321   };
1322   bool Is64Bit = RetVT == MVT::i64;
1323   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1324   const TargetRegisterClass *RC =
1325       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1326   unsigned ResultReg;
1327   if (WantResult)
1328     ResultReg = createResultReg(RC);
1329   else
1330     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1331 
1332   const MCInstrDesc &II = TII.get(Opc);
1333   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1334   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1335   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1336       .addReg(LHSReg)
1337       .addReg(RHSReg);
1338   return ResultReg;
1339 }
1340 
1341 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1342                                         uint64_t Imm, bool SetFlags,
1343                                         bool WantResult) {
1344   assert(LHSReg && "Invalid register number.");
1345 
1346   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1347     return 0;
1348 
1349   unsigned ShiftImm;
1350   if (isUInt<12>(Imm))
1351     ShiftImm = 0;
1352   else if ((Imm & 0xfff000) == Imm) {
1353     ShiftImm = 12;
1354     Imm >>= 12;
1355   } else
1356     return 0;
1357 
1358   static const unsigned OpcTable[2][2][2] = {
1359     { { AArch64::SUBWri,  AArch64::SUBXri  },
1360       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1361     { { AArch64::SUBSWri, AArch64::SUBSXri },
1362       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1363   };
1364   bool Is64Bit = RetVT == MVT::i64;
1365   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1366   const TargetRegisterClass *RC;
1367   if (SetFlags)
1368     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1369   else
1370     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1371   unsigned ResultReg;
1372   if (WantResult)
1373     ResultReg = createResultReg(RC);
1374   else
1375     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1376 
1377   const MCInstrDesc &II = TII.get(Opc);
1378   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1379   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1380       .addReg(LHSReg)
1381       .addImm(Imm)
1382       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1383   return ResultReg;
1384 }
1385 
1386 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1387                                         unsigned RHSReg,
1388                                         AArch64_AM::ShiftExtendType ShiftType,
1389                                         uint64_t ShiftImm, bool SetFlags,
1390                                         bool WantResult) {
1391   assert(LHSReg && RHSReg && "Invalid register number.");
1392   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1393          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1394 
1395   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1396     return 0;
1397 
1398   // Don't deal with undefined shifts.
1399   if (ShiftImm >= RetVT.getSizeInBits())
1400     return 0;
1401 
1402   static const unsigned OpcTable[2][2][2] = {
1403     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1404       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1405     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1406       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1407   };
1408   bool Is64Bit = RetVT == MVT::i64;
1409   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1410   const TargetRegisterClass *RC =
1411       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1412   unsigned ResultReg;
1413   if (WantResult)
1414     ResultReg = createResultReg(RC);
1415   else
1416     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1417 
1418   const MCInstrDesc &II = TII.get(Opc);
1419   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1420   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1421   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1422       .addReg(LHSReg)
1423       .addReg(RHSReg)
1424       .addImm(getShifterImm(ShiftType, ShiftImm));
1425   return ResultReg;
1426 }
1427 
1428 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1429                                         unsigned RHSReg,
1430                                         AArch64_AM::ShiftExtendType ExtType,
1431                                         uint64_t ShiftImm, bool SetFlags,
1432                                         bool WantResult) {
1433   assert(LHSReg && RHSReg && "Invalid register number.");
1434   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1435          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1436 
1437   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1438     return 0;
1439 
1440   if (ShiftImm >= 4)
1441     return 0;
1442 
1443   static const unsigned OpcTable[2][2][2] = {
1444     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1445       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1446     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1447       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1448   };
1449   bool Is64Bit = RetVT == MVT::i64;
1450   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1451   const TargetRegisterClass *RC = nullptr;
1452   if (SetFlags)
1453     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1454   else
1455     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1456   unsigned ResultReg;
1457   if (WantResult)
1458     ResultReg = createResultReg(RC);
1459   else
1460     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1461 
1462   const MCInstrDesc &II = TII.get(Opc);
1463   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1464   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1465   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1466       .addReg(LHSReg)
1467       .addReg(RHSReg)
1468       .addImm(getArithExtendImm(ExtType, ShiftImm));
1469   return ResultReg;
1470 }
1471 
1472 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1473   Type *Ty = LHS->getType();
1474   EVT EVT = TLI.getValueType(DL, Ty, true);
1475   if (!EVT.isSimple())
1476     return false;
1477   MVT VT = EVT.getSimpleVT();
1478 
1479   switch (VT.SimpleTy) {
1480   default:
1481     return false;
1482   case MVT::i1:
1483   case MVT::i8:
1484   case MVT::i16:
1485   case MVT::i32:
1486   case MVT::i64:
1487     return emitICmp(VT, LHS, RHS, IsZExt);
1488   case MVT::f32:
1489   case MVT::f64:
1490     return emitFCmp(VT, LHS, RHS);
1491   }
1492 }
1493 
1494 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1495                                bool IsZExt) {
1496   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1497                  IsZExt) != 0;
1498 }
1499 
1500 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1501   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1502                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1503 }
1504 
1505 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1506   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1507     return false;
1508 
1509   // Check to see if the 2nd operand is a constant that we can encode directly
1510   // in the compare.
1511   bool UseImm = false;
1512   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1513     if (CFP->isZero() && !CFP->isNegative())
1514       UseImm = true;
1515 
1516   Register LHSReg = getRegForValue(LHS);
1517   if (!LHSReg)
1518     return false;
1519 
1520   if (UseImm) {
1521     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1522     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1523         .addReg(LHSReg);
1524     return true;
1525   }
1526 
1527   Register RHSReg = getRegForValue(RHS);
1528   if (!RHSReg)
1529     return false;
1530 
1531   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1532   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1533       .addReg(LHSReg)
1534       .addReg(RHSReg);
1535   return true;
1536 }
1537 
1538 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1539                                   bool SetFlags, bool WantResult, bool IsZExt) {
1540   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1541                     IsZExt);
1542 }
1543 
1544 /// This method is a wrapper to simplify add emission.
1545 ///
1546 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1547 /// that fails, then try to materialize the immediate into a register and use
1548 /// emitAddSub_rr instead.
1549 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1550   unsigned ResultReg;
1551   if (Imm < 0)
1552     ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1553   else
1554     ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1555 
1556   if (ResultReg)
1557     return ResultReg;
1558 
1559   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1560   if (!CReg)
1561     return 0;
1562 
1563   ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1564   return ResultReg;
1565 }
1566 
1567 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1568                                   bool SetFlags, bool WantResult, bool IsZExt) {
1569   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1570                     IsZExt);
1571 }
1572 
1573 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1574                                       unsigned RHSReg, bool WantResult) {
1575   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1576                        /*SetFlags=*/true, WantResult);
1577 }
1578 
1579 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1580                                       unsigned RHSReg,
1581                                       AArch64_AM::ShiftExtendType ShiftType,
1582                                       uint64_t ShiftImm, bool WantResult) {
1583   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1584                        ShiftImm, /*SetFlags=*/true, WantResult);
1585 }
1586 
1587 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1588                                         const Value *LHS, const Value *RHS) {
1589   // Canonicalize immediates to the RHS first.
1590   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1591     std::swap(LHS, RHS);
1592 
1593   // Canonicalize mul by power-of-2 to the RHS.
1594   if (LHS->hasOneUse() && isValueAvailable(LHS))
1595     if (isMulPowOf2(LHS))
1596       std::swap(LHS, RHS);
1597 
1598   // Canonicalize shift immediate to the RHS.
1599   if (LHS->hasOneUse() && isValueAvailable(LHS))
1600     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1601       if (isa<ConstantInt>(SI->getOperand(1)))
1602         std::swap(LHS, RHS);
1603 
1604   Register LHSReg = getRegForValue(LHS);
1605   if (!LHSReg)
1606     return 0;
1607 
1608   unsigned ResultReg = 0;
1609   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1610     uint64_t Imm = C->getZExtValue();
1611     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1612   }
1613   if (ResultReg)
1614     return ResultReg;
1615 
1616   // Check if the mul can be folded into the instruction.
1617   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1618     if (isMulPowOf2(RHS)) {
1619       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1620       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1621 
1622       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1623         if (C->getValue().isPowerOf2())
1624           std::swap(MulLHS, MulRHS);
1625 
1626       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1627       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1628 
1629       Register RHSReg = getRegForValue(MulLHS);
1630       if (!RHSReg)
1631         return 0;
1632       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1633       if (ResultReg)
1634         return ResultReg;
1635     }
1636   }
1637 
1638   // Check if the shift can be folded into the instruction.
1639   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1640     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1641       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1642         uint64_t ShiftVal = C->getZExtValue();
1643         Register RHSReg = getRegForValue(SI->getOperand(0));
1644         if (!RHSReg)
1645           return 0;
1646         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1647         if (ResultReg)
1648           return ResultReg;
1649       }
1650   }
1651 
1652   Register RHSReg = getRegForValue(RHS);
1653   if (!RHSReg)
1654     return 0;
1655 
1656   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1657   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1658   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1659     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1660     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1661   }
1662   return ResultReg;
1663 }
1664 
1665 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1666                                            unsigned LHSReg, uint64_t Imm) {
1667   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1668                 "ISD nodes are not consecutive!");
1669   static const unsigned OpcTable[3][2] = {
1670     { AArch64::ANDWri, AArch64::ANDXri },
1671     { AArch64::ORRWri, AArch64::ORRXri },
1672     { AArch64::EORWri, AArch64::EORXri }
1673   };
1674   const TargetRegisterClass *RC;
1675   unsigned Opc;
1676   unsigned RegSize;
1677   switch (RetVT.SimpleTy) {
1678   default:
1679     return 0;
1680   case MVT::i1:
1681   case MVT::i8:
1682   case MVT::i16:
1683   case MVT::i32: {
1684     unsigned Idx = ISDOpc - ISD::AND;
1685     Opc = OpcTable[Idx][0];
1686     RC = &AArch64::GPR32spRegClass;
1687     RegSize = 32;
1688     break;
1689   }
1690   case MVT::i64:
1691     Opc = OpcTable[ISDOpc - ISD::AND][1];
1692     RC = &AArch64::GPR64spRegClass;
1693     RegSize = 64;
1694     break;
1695   }
1696 
1697   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1698     return 0;
1699 
1700   Register ResultReg =
1701       fastEmitInst_ri(Opc, RC, LHSReg,
1702                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1703   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1704     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1705     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1706   }
1707   return ResultReg;
1708 }
1709 
1710 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1711                                            unsigned LHSReg, unsigned RHSReg,
1712                                            uint64_t ShiftImm) {
1713   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1714                 "ISD nodes are not consecutive!");
1715   static const unsigned OpcTable[3][2] = {
1716     { AArch64::ANDWrs, AArch64::ANDXrs },
1717     { AArch64::ORRWrs, AArch64::ORRXrs },
1718     { AArch64::EORWrs, AArch64::EORXrs }
1719   };
1720 
1721   // Don't deal with undefined shifts.
1722   if (ShiftImm >= RetVT.getSizeInBits())
1723     return 0;
1724 
1725   const TargetRegisterClass *RC;
1726   unsigned Opc;
1727   switch (RetVT.SimpleTy) {
1728   default:
1729     return 0;
1730   case MVT::i1:
1731   case MVT::i8:
1732   case MVT::i16:
1733   case MVT::i32:
1734     Opc = OpcTable[ISDOpc - ISD::AND][0];
1735     RC = &AArch64::GPR32RegClass;
1736     break;
1737   case MVT::i64:
1738     Opc = OpcTable[ISDOpc - ISD::AND][1];
1739     RC = &AArch64::GPR64RegClass;
1740     break;
1741   }
1742   Register ResultReg =
1743       fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1744                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1745   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1746     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1747     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1748   }
1749   return ResultReg;
1750 }
1751 
1752 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1753                                      uint64_t Imm) {
1754   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1755 }
1756 
1757 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1758                                    bool WantZExt, MachineMemOperand *MMO) {
1759   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1760     return 0;
1761 
1762   // Simplify this down to something we can handle.
1763   if (!simplifyAddress(Addr, VT))
1764     return 0;
1765 
1766   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1767   if (!ScaleFactor)
1768     llvm_unreachable("Unexpected value type.");
1769 
1770   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1771   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1772   bool UseScaled = true;
1773   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1774     UseScaled = false;
1775     ScaleFactor = 1;
1776   }
1777 
1778   static const unsigned GPOpcTable[2][8][4] = {
1779     // Sign-extend.
1780     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1781         AArch64::LDURXi  },
1782       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1783         AArch64::LDURXi  },
1784       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1785         AArch64::LDRXui  },
1786       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1787         AArch64::LDRXui  },
1788       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1789         AArch64::LDRXroX },
1790       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1791         AArch64::LDRXroX },
1792       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1793         AArch64::LDRXroW },
1794       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1795         AArch64::LDRXroW }
1796     },
1797     // Zero-extend.
1798     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1799         AArch64::LDURXi  },
1800       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1801         AArch64::LDURXi  },
1802       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1803         AArch64::LDRXui  },
1804       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1805         AArch64::LDRXui  },
1806       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1807         AArch64::LDRXroX },
1808       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1809         AArch64::LDRXroX },
1810       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1811         AArch64::LDRXroW },
1812       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1813         AArch64::LDRXroW }
1814     }
1815   };
1816 
1817   static const unsigned FPOpcTable[4][2] = {
1818     { AArch64::LDURSi,  AArch64::LDURDi  },
1819     { AArch64::LDRSui,  AArch64::LDRDui  },
1820     { AArch64::LDRSroX, AArch64::LDRDroX },
1821     { AArch64::LDRSroW, AArch64::LDRDroW }
1822   };
1823 
1824   unsigned Opc;
1825   const TargetRegisterClass *RC;
1826   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1827                       Addr.getOffsetReg();
1828   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1829   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1830       Addr.getExtendType() == AArch64_AM::SXTW)
1831     Idx++;
1832 
1833   bool IsRet64Bit = RetVT == MVT::i64;
1834   switch (VT.SimpleTy) {
1835   default:
1836     llvm_unreachable("Unexpected value type.");
1837   case MVT::i1: // Intentional fall-through.
1838   case MVT::i8:
1839     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1840     RC = (IsRet64Bit && !WantZExt) ?
1841              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842     break;
1843   case MVT::i16:
1844     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1845     RC = (IsRet64Bit && !WantZExt) ?
1846              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847     break;
1848   case MVT::i32:
1849     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1850     RC = (IsRet64Bit && !WantZExt) ?
1851              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1852     break;
1853   case MVT::i64:
1854     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1855     RC = &AArch64::GPR64RegClass;
1856     break;
1857   case MVT::f32:
1858     Opc = FPOpcTable[Idx][0];
1859     RC = &AArch64::FPR32RegClass;
1860     break;
1861   case MVT::f64:
1862     Opc = FPOpcTable[Idx][1];
1863     RC = &AArch64::FPR64RegClass;
1864     break;
1865   }
1866 
1867   // Create the base instruction, then add the operands.
1868   Register ResultReg = createResultReg(RC);
1869   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1870                                     TII.get(Opc), ResultReg);
1871   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1872 
1873   // Loading an i1 requires special handling.
1874   if (VT == MVT::i1) {
1875     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1876     assert(ANDReg && "Unexpected AND instruction emission failure.");
1877     ResultReg = ANDReg;
1878   }
1879 
1880   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1881   // the 32bit reg to a 64bit reg.
1882   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1883     Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1884     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1885             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1886         .addImm(0)
1887         .addReg(ResultReg, getKillRegState(true))
1888         .addImm(AArch64::sub_32);
1889     ResultReg = Reg64;
1890   }
1891   return ResultReg;
1892 }
1893 
1894 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1895   MVT VT;
1896   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1897     return false;
1898 
1899   if (VT.isVector())
1900     return selectOperator(I, I->getOpcode());
1901 
1902   unsigned ResultReg;
1903   switch (I->getOpcode()) {
1904   default:
1905     llvm_unreachable("Unexpected instruction.");
1906   case Instruction::Add:
1907     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1908     break;
1909   case Instruction::Sub:
1910     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1911     break;
1912   }
1913   if (!ResultReg)
1914     return false;
1915 
1916   updateValueMap(I, ResultReg);
1917   return true;
1918 }
1919 
1920 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1921   MVT VT;
1922   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1923     return false;
1924 
1925   if (VT.isVector())
1926     return selectOperator(I, I->getOpcode());
1927 
1928   unsigned ResultReg;
1929   switch (I->getOpcode()) {
1930   default:
1931     llvm_unreachable("Unexpected instruction.");
1932   case Instruction::And:
1933     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1934     break;
1935   case Instruction::Or:
1936     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1937     break;
1938   case Instruction::Xor:
1939     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1940     break;
1941   }
1942   if (!ResultReg)
1943     return false;
1944 
1945   updateValueMap(I, ResultReg);
1946   return true;
1947 }
1948 
1949 bool AArch64FastISel::selectLoad(const Instruction *I) {
1950   MVT VT;
1951   // Verify we have a legal type before going any further.  Currently, we handle
1952   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1953   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1954   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1955       cast<LoadInst>(I)->isAtomic())
1956     return false;
1957 
1958   const Value *SV = I->getOperand(0);
1959   if (TLI.supportSwiftError()) {
1960     // Swifterror values can come from either a function parameter with
1961     // swifterror attribute or an alloca with swifterror attribute.
1962     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1963       if (Arg->hasSwiftErrorAttr())
1964         return false;
1965     }
1966 
1967     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1968       if (Alloca->isSwiftError())
1969         return false;
1970     }
1971   }
1972 
1973   // See if we can handle this address.
1974   Address Addr;
1975   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1976     return false;
1977 
1978   // Fold the following sign-/zero-extend into the load instruction.
1979   bool WantZExt = true;
1980   MVT RetVT = VT;
1981   const Value *IntExtVal = nullptr;
1982   if (I->hasOneUse()) {
1983     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1984       if (isTypeSupported(ZE->getType(), RetVT))
1985         IntExtVal = ZE;
1986       else
1987         RetVT = VT;
1988     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1989       if (isTypeSupported(SE->getType(), RetVT))
1990         IntExtVal = SE;
1991       else
1992         RetVT = VT;
1993       WantZExt = false;
1994     }
1995   }
1996 
1997   unsigned ResultReg =
1998       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1999   if (!ResultReg)
2000     return false;
2001 
2002   // There are a few different cases we have to handle, because the load or the
2003   // sign-/zero-extend might not be selected by FastISel if we fall-back to
2004   // SelectionDAG. There is also an ordering issue when both instructions are in
2005   // different basic blocks.
2006   // 1.) The load instruction is selected by FastISel, but the integer extend
2007   //     not. This usually happens when the integer extend is in a different
2008   //     basic block and SelectionDAG took over for that basic block.
2009   // 2.) The load instruction is selected before the integer extend. This only
2010   //     happens when the integer extend is in a different basic block.
2011   // 3.) The load instruction is selected by SelectionDAG and the integer extend
2012   //     by FastISel. This happens if there are instructions between the load
2013   //     and the integer extend that couldn't be selected by FastISel.
2014   if (IntExtVal) {
2015     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2016     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2017     // it when it selects the integer extend.
2018     Register Reg = lookUpRegForValue(IntExtVal);
2019     auto *MI = MRI.getUniqueVRegDef(Reg);
2020     if (!MI) {
2021       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2022         if (WantZExt) {
2023           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2024           MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2025           ResultReg = std::prev(I)->getOperand(0).getReg();
2026           removeDeadCode(I, std::next(I));
2027         } else
2028           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2029                                                  AArch64::sub_32);
2030       }
2031       updateValueMap(I, ResultReg);
2032       return true;
2033     }
2034 
2035     // The integer extend has already been emitted - delete all the instructions
2036     // that have been emitted by the integer extend lowering code and use the
2037     // result from the load instruction directly.
2038     while (MI) {
2039       Reg = 0;
2040       for (auto &Opnd : MI->uses()) {
2041         if (Opnd.isReg()) {
2042           Reg = Opnd.getReg();
2043           break;
2044         }
2045       }
2046       MachineBasicBlock::iterator I(MI);
2047       removeDeadCode(I, std::next(I));
2048       MI = nullptr;
2049       if (Reg)
2050         MI = MRI.getUniqueVRegDef(Reg);
2051     }
2052     updateValueMap(IntExtVal, ResultReg);
2053     return true;
2054   }
2055 
2056   updateValueMap(I, ResultReg);
2057   return true;
2058 }
2059 
2060 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2061                                        unsigned AddrReg,
2062                                        MachineMemOperand *MMO) {
2063   unsigned Opc;
2064   switch (VT.SimpleTy) {
2065   default: return false;
2066   case MVT::i8:  Opc = AArch64::STLRB; break;
2067   case MVT::i16: Opc = AArch64::STLRH; break;
2068   case MVT::i32: Opc = AArch64::STLRW; break;
2069   case MVT::i64: Opc = AArch64::STLRX; break;
2070   }
2071 
2072   const MCInstrDesc &II = TII.get(Opc);
2073   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2074   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2075   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2076       .addReg(SrcReg)
2077       .addReg(AddrReg)
2078       .addMemOperand(MMO);
2079   return true;
2080 }
2081 
2082 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2083                                 MachineMemOperand *MMO) {
2084   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2085     return false;
2086 
2087   // Simplify this down to something we can handle.
2088   if (!simplifyAddress(Addr, VT))
2089     return false;
2090 
2091   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2092   if (!ScaleFactor)
2093     llvm_unreachable("Unexpected value type.");
2094 
2095   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2096   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2097   bool UseScaled = true;
2098   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2099     UseScaled = false;
2100     ScaleFactor = 1;
2101   }
2102 
2103   static const unsigned OpcTable[4][6] = {
2104     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2105       AArch64::STURSi,   AArch64::STURDi },
2106     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2107       AArch64::STRSui,   AArch64::STRDui },
2108     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2109       AArch64::STRSroX,  AArch64::STRDroX },
2110     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2111       AArch64::STRSroW,  AArch64::STRDroW }
2112   };
2113 
2114   unsigned Opc;
2115   bool VTIsi1 = false;
2116   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2117                       Addr.getOffsetReg();
2118   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2119   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2120       Addr.getExtendType() == AArch64_AM::SXTW)
2121     Idx++;
2122 
2123   switch (VT.SimpleTy) {
2124   default: llvm_unreachable("Unexpected value type.");
2125   case MVT::i1:  VTIsi1 = true; [[fallthrough]];
2126   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2127   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2128   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2129   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2130   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2131   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2132   }
2133 
2134   // Storing an i1 requires special handling.
2135   if (VTIsi1 && SrcReg != AArch64::WZR) {
2136     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2137     assert(ANDReg && "Unexpected AND instruction emission failure.");
2138     SrcReg = ANDReg;
2139   }
2140   // Create the base instruction, then add the operands.
2141   const MCInstrDesc &II = TII.get(Opc);
2142   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2143   MachineInstrBuilder MIB =
2144       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2145   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2146 
2147   return true;
2148 }
2149 
2150 bool AArch64FastISel::selectStore(const Instruction *I) {
2151   MVT VT;
2152   const Value *Op0 = I->getOperand(0);
2153   // Verify we have a legal type before going any further.  Currently, we handle
2154   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2155   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2156   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2157     return false;
2158 
2159   const Value *PtrV = I->getOperand(1);
2160   if (TLI.supportSwiftError()) {
2161     // Swifterror values can come from either a function parameter with
2162     // swifterror attribute or an alloca with swifterror attribute.
2163     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2164       if (Arg->hasSwiftErrorAttr())
2165         return false;
2166     }
2167 
2168     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2169       if (Alloca->isSwiftError())
2170         return false;
2171     }
2172   }
2173 
2174   // Get the value to be stored into a register. Use the zero register directly
2175   // when possible to avoid an unnecessary copy and a wasted register.
2176   unsigned SrcReg = 0;
2177   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2178     if (CI->isZero())
2179       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2180   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2181     if (CF->isZero() && !CF->isNegative()) {
2182       VT = MVT::getIntegerVT(VT.getSizeInBits());
2183       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2184     }
2185   }
2186 
2187   if (!SrcReg)
2188     SrcReg = getRegForValue(Op0);
2189 
2190   if (!SrcReg)
2191     return false;
2192 
2193   auto *SI = cast<StoreInst>(I);
2194 
2195   // Try to emit a STLR for seq_cst/release.
2196   if (SI->isAtomic()) {
2197     AtomicOrdering Ord = SI->getOrdering();
2198     // The non-atomic instructions are sufficient for relaxed stores.
2199     if (isReleaseOrStronger(Ord)) {
2200       // The STLR addressing mode only supports a base reg; pass that directly.
2201       Register AddrReg = getRegForValue(PtrV);
2202       return emitStoreRelease(VT, SrcReg, AddrReg,
2203                               createMachineMemOperandFor(I));
2204     }
2205   }
2206 
2207   // See if we can handle this address.
2208   Address Addr;
2209   if (!computeAddress(PtrV, Addr, Op0->getType()))
2210     return false;
2211 
2212   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2213     return false;
2214   return true;
2215 }
2216 
2217 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2218   switch (Pred) {
2219   case CmpInst::FCMP_ONE:
2220   case CmpInst::FCMP_UEQ:
2221   default:
2222     // AL is our "false" for now. The other two need more compares.
2223     return AArch64CC::AL;
2224   case CmpInst::ICMP_EQ:
2225   case CmpInst::FCMP_OEQ:
2226     return AArch64CC::EQ;
2227   case CmpInst::ICMP_SGT:
2228   case CmpInst::FCMP_OGT:
2229     return AArch64CC::GT;
2230   case CmpInst::ICMP_SGE:
2231   case CmpInst::FCMP_OGE:
2232     return AArch64CC::GE;
2233   case CmpInst::ICMP_UGT:
2234   case CmpInst::FCMP_UGT:
2235     return AArch64CC::HI;
2236   case CmpInst::FCMP_OLT:
2237     return AArch64CC::MI;
2238   case CmpInst::ICMP_ULE:
2239   case CmpInst::FCMP_OLE:
2240     return AArch64CC::LS;
2241   case CmpInst::FCMP_ORD:
2242     return AArch64CC::VC;
2243   case CmpInst::FCMP_UNO:
2244     return AArch64CC::VS;
2245   case CmpInst::FCMP_UGE:
2246     return AArch64CC::PL;
2247   case CmpInst::ICMP_SLT:
2248   case CmpInst::FCMP_ULT:
2249     return AArch64CC::LT;
2250   case CmpInst::ICMP_SLE:
2251   case CmpInst::FCMP_ULE:
2252     return AArch64CC::LE;
2253   case CmpInst::FCMP_UNE:
2254   case CmpInst::ICMP_NE:
2255     return AArch64CC::NE;
2256   case CmpInst::ICMP_UGE:
2257     return AArch64CC::HS;
2258   case CmpInst::ICMP_ULT:
2259     return AArch64CC::LO;
2260   }
2261 }
2262 
2263 /// Try to emit a combined compare-and-branch instruction.
2264 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2265   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2266   // will not be produced, as they are conditional branch instructions that do
2267   // not set flags.
2268   if (FuncInfo.MF->getFunction().hasFnAttribute(
2269           Attribute::SpeculativeLoadHardening))
2270     return false;
2271 
2272   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2273   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2274   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2275 
2276   const Value *LHS = CI->getOperand(0);
2277   const Value *RHS = CI->getOperand(1);
2278 
2279   MVT VT;
2280   if (!isTypeSupported(LHS->getType(), VT))
2281     return false;
2282 
2283   unsigned BW = VT.getSizeInBits();
2284   if (BW > 64)
2285     return false;
2286 
2287   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2288   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2289 
2290   // Try to take advantage of fallthrough opportunities.
2291   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2292     std::swap(TBB, FBB);
2293     Predicate = CmpInst::getInversePredicate(Predicate);
2294   }
2295 
2296   int TestBit = -1;
2297   bool IsCmpNE;
2298   switch (Predicate) {
2299   default:
2300     return false;
2301   case CmpInst::ICMP_EQ:
2302   case CmpInst::ICMP_NE:
2303     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2304       std::swap(LHS, RHS);
2305 
2306     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2307       return false;
2308 
2309     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2310       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2311         const Value *AndLHS = AI->getOperand(0);
2312         const Value *AndRHS = AI->getOperand(1);
2313 
2314         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2315           if (C->getValue().isPowerOf2())
2316             std::swap(AndLHS, AndRHS);
2317 
2318         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2319           if (C->getValue().isPowerOf2()) {
2320             TestBit = C->getValue().logBase2();
2321             LHS = AndLHS;
2322           }
2323       }
2324 
2325     if (VT == MVT::i1)
2326       TestBit = 0;
2327 
2328     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2329     break;
2330   case CmpInst::ICMP_SLT:
2331   case CmpInst::ICMP_SGE:
2332     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2333       return false;
2334 
2335     TestBit = BW - 1;
2336     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2337     break;
2338   case CmpInst::ICMP_SGT:
2339   case CmpInst::ICMP_SLE:
2340     if (!isa<ConstantInt>(RHS))
2341       return false;
2342 
2343     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2344       return false;
2345 
2346     TestBit = BW - 1;
2347     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2348     break;
2349   } // end switch
2350 
2351   static const unsigned OpcTable[2][2][2] = {
2352     { {AArch64::CBZW,  AArch64::CBZX },
2353       {AArch64::CBNZW, AArch64::CBNZX} },
2354     { {AArch64::TBZW,  AArch64::TBZX },
2355       {AArch64::TBNZW, AArch64::TBNZX} }
2356   };
2357 
2358   bool IsBitTest = TestBit != -1;
2359   bool Is64Bit = BW == 64;
2360   if (TestBit < 32 && TestBit >= 0)
2361     Is64Bit = false;
2362 
2363   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2364   const MCInstrDesc &II = TII.get(Opc);
2365 
2366   Register SrcReg = getRegForValue(LHS);
2367   if (!SrcReg)
2368     return false;
2369 
2370   if (BW == 64 && !Is64Bit)
2371     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2372 
2373   if ((BW < 32) && !IsBitTest)
2374     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2375 
2376   // Emit the combined compare and branch instruction.
2377   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2378   MachineInstrBuilder MIB =
2379       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2380           .addReg(SrcReg);
2381   if (IsBitTest)
2382     MIB.addImm(TestBit);
2383   MIB.addMBB(TBB);
2384 
2385   finishCondBranch(BI->getParent(), TBB, FBB);
2386   return true;
2387 }
2388 
2389 bool AArch64FastISel::selectBranch(const Instruction *I) {
2390   const BranchInst *BI = cast<BranchInst>(I);
2391   if (BI->isUnconditional()) {
2392     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2393     fastEmitBranch(MSucc, BI->getDebugLoc());
2394     return true;
2395   }
2396 
2397   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2398   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2399 
2400   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2401     if (CI->hasOneUse() && isValueAvailable(CI)) {
2402       // Try to optimize or fold the cmp.
2403       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2404       switch (Predicate) {
2405       default:
2406         break;
2407       case CmpInst::FCMP_FALSE:
2408         fastEmitBranch(FBB, MIMD.getDL());
2409         return true;
2410       case CmpInst::FCMP_TRUE:
2411         fastEmitBranch(TBB, MIMD.getDL());
2412         return true;
2413       }
2414 
2415       // Try to emit a combined compare-and-branch first.
2416       if (emitCompareAndBranch(BI))
2417         return true;
2418 
2419       // Try to take advantage of fallthrough opportunities.
2420       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2421         std::swap(TBB, FBB);
2422         Predicate = CmpInst::getInversePredicate(Predicate);
2423       }
2424 
2425       // Emit the cmp.
2426       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2427         return false;
2428 
2429       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2430       // instruction.
2431       AArch64CC::CondCode CC = getCompareCC(Predicate);
2432       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2433       switch (Predicate) {
2434       default:
2435         break;
2436       case CmpInst::FCMP_UEQ:
2437         ExtraCC = AArch64CC::EQ;
2438         CC = AArch64CC::VS;
2439         break;
2440       case CmpInst::FCMP_ONE:
2441         ExtraCC = AArch64CC::MI;
2442         CC = AArch64CC::GT;
2443         break;
2444       }
2445       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2446 
2447       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2448       if (ExtraCC != AArch64CC::AL) {
2449         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2450             .addImm(ExtraCC)
2451             .addMBB(TBB);
2452       }
2453 
2454       // Emit the branch.
2455       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2456           .addImm(CC)
2457           .addMBB(TBB);
2458 
2459       finishCondBranch(BI->getParent(), TBB, FBB);
2460       return true;
2461     }
2462   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2463     uint64_t Imm = CI->getZExtValue();
2464     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2465     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2466         .addMBB(Target);
2467 
2468     // Obtain the branch probability and add the target to the successor list.
2469     if (FuncInfo.BPI) {
2470       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2471           BI->getParent(), Target->getBasicBlock());
2472       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2473     } else
2474       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2475     return true;
2476   } else {
2477     AArch64CC::CondCode CC = AArch64CC::NE;
2478     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2479       // Fake request the condition, otherwise the intrinsic might be completely
2480       // optimized away.
2481       Register CondReg = getRegForValue(BI->getCondition());
2482       if (!CondReg)
2483         return false;
2484 
2485       // Emit the branch.
2486       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2487         .addImm(CC)
2488         .addMBB(TBB);
2489 
2490       finishCondBranch(BI->getParent(), TBB, FBB);
2491       return true;
2492     }
2493   }
2494 
2495   Register CondReg = getRegForValue(BI->getCondition());
2496   if (CondReg == 0)
2497     return false;
2498 
2499   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2500   unsigned Opcode = AArch64::TBNZW;
2501   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2502     std::swap(TBB, FBB);
2503     Opcode = AArch64::TBZW;
2504   }
2505 
2506   const MCInstrDesc &II = TII.get(Opcode);
2507   Register ConstrainedCondReg
2508     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2509   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2510       .addReg(ConstrainedCondReg)
2511       .addImm(0)
2512       .addMBB(TBB);
2513 
2514   finishCondBranch(BI->getParent(), TBB, FBB);
2515   return true;
2516 }
2517 
2518 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2519   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2520   Register AddrReg = getRegForValue(BI->getOperand(0));
2521   if (AddrReg == 0)
2522     return false;
2523 
2524   // Emit the indirect branch.
2525   const MCInstrDesc &II = TII.get(AArch64::BR);
2526   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2527   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2528 
2529   // Make sure the CFG is up-to-date.
2530   for (const auto *Succ : BI->successors())
2531     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2532 
2533   return true;
2534 }
2535 
2536 bool AArch64FastISel::selectCmp(const Instruction *I) {
2537   const CmpInst *CI = cast<CmpInst>(I);
2538 
2539   // Vectors of i1 are weird: bail out.
2540   if (CI->getType()->isVectorTy())
2541     return false;
2542 
2543   // Try to optimize or fold the cmp.
2544   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2545   unsigned ResultReg = 0;
2546   switch (Predicate) {
2547   default:
2548     break;
2549   case CmpInst::FCMP_FALSE:
2550     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2551     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2552             TII.get(TargetOpcode::COPY), ResultReg)
2553         .addReg(AArch64::WZR, getKillRegState(true));
2554     break;
2555   case CmpInst::FCMP_TRUE:
2556     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2557     break;
2558   }
2559 
2560   if (ResultReg) {
2561     updateValueMap(I, ResultReg);
2562     return true;
2563   }
2564 
2565   // Emit the cmp.
2566   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2567     return false;
2568 
2569   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2570 
2571   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2572   // condition codes are inverted, because they are used by CSINC.
2573   static unsigned CondCodeTable[2][2] = {
2574     { AArch64CC::NE, AArch64CC::VC },
2575     { AArch64CC::PL, AArch64CC::LE }
2576   };
2577   unsigned *CondCodes = nullptr;
2578   switch (Predicate) {
2579   default:
2580     break;
2581   case CmpInst::FCMP_UEQ:
2582     CondCodes = &CondCodeTable[0][0];
2583     break;
2584   case CmpInst::FCMP_ONE:
2585     CondCodes = &CondCodeTable[1][0];
2586     break;
2587   }
2588 
2589   if (CondCodes) {
2590     Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2591     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2592             TmpReg1)
2593         .addReg(AArch64::WZR, getKillRegState(true))
2594         .addReg(AArch64::WZR, getKillRegState(true))
2595         .addImm(CondCodes[0]);
2596     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2597             ResultReg)
2598         .addReg(TmpReg1, getKillRegState(true))
2599         .addReg(AArch64::WZR, getKillRegState(true))
2600         .addImm(CondCodes[1]);
2601 
2602     updateValueMap(I, ResultReg);
2603     return true;
2604   }
2605 
2606   // Now set a register based on the comparison.
2607   AArch64CC::CondCode CC = getCompareCC(Predicate);
2608   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2609   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2610   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2611           ResultReg)
2612       .addReg(AArch64::WZR, getKillRegState(true))
2613       .addReg(AArch64::WZR, getKillRegState(true))
2614       .addImm(invertedCC);
2615 
2616   updateValueMap(I, ResultReg);
2617   return true;
2618 }
2619 
2620 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2621 /// value.
2622 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2623   if (!SI->getType()->isIntegerTy(1))
2624     return false;
2625 
2626   const Value *Src1Val, *Src2Val;
2627   unsigned Opc = 0;
2628   bool NeedExtraOp = false;
2629   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2630     if (CI->isOne()) {
2631       Src1Val = SI->getCondition();
2632       Src2Val = SI->getFalseValue();
2633       Opc = AArch64::ORRWrr;
2634     } else {
2635       assert(CI->isZero());
2636       Src1Val = SI->getFalseValue();
2637       Src2Val = SI->getCondition();
2638       Opc = AArch64::BICWrr;
2639     }
2640   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2641     if (CI->isOne()) {
2642       Src1Val = SI->getCondition();
2643       Src2Val = SI->getTrueValue();
2644       Opc = AArch64::ORRWrr;
2645       NeedExtraOp = true;
2646     } else {
2647       assert(CI->isZero());
2648       Src1Val = SI->getCondition();
2649       Src2Val = SI->getTrueValue();
2650       Opc = AArch64::ANDWrr;
2651     }
2652   }
2653 
2654   if (!Opc)
2655     return false;
2656 
2657   Register Src1Reg = getRegForValue(Src1Val);
2658   if (!Src1Reg)
2659     return false;
2660 
2661   Register Src2Reg = getRegForValue(Src2Val);
2662   if (!Src2Reg)
2663     return false;
2664 
2665   if (NeedExtraOp)
2666     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2667 
2668   Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2669                                        Src2Reg);
2670   updateValueMap(SI, ResultReg);
2671   return true;
2672 }
2673 
2674 bool AArch64FastISel::selectSelect(const Instruction *I) {
2675   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2676   MVT VT;
2677   if (!isTypeSupported(I->getType(), VT))
2678     return false;
2679 
2680   unsigned Opc;
2681   const TargetRegisterClass *RC;
2682   switch (VT.SimpleTy) {
2683   default:
2684     return false;
2685   case MVT::i1:
2686   case MVT::i8:
2687   case MVT::i16:
2688   case MVT::i32:
2689     Opc = AArch64::CSELWr;
2690     RC = &AArch64::GPR32RegClass;
2691     break;
2692   case MVT::i64:
2693     Opc = AArch64::CSELXr;
2694     RC = &AArch64::GPR64RegClass;
2695     break;
2696   case MVT::f32:
2697     Opc = AArch64::FCSELSrrr;
2698     RC = &AArch64::FPR32RegClass;
2699     break;
2700   case MVT::f64:
2701     Opc = AArch64::FCSELDrrr;
2702     RC = &AArch64::FPR64RegClass;
2703     break;
2704   }
2705 
2706   const SelectInst *SI = cast<SelectInst>(I);
2707   const Value *Cond = SI->getCondition();
2708   AArch64CC::CondCode CC = AArch64CC::NE;
2709   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2710 
2711   if (optimizeSelect(SI))
2712     return true;
2713 
2714   // Try to pickup the flags, so we don't have to emit another compare.
2715   if (foldXALUIntrinsic(CC, I, Cond)) {
2716     // Fake request the condition to force emission of the XALU intrinsic.
2717     Register CondReg = getRegForValue(Cond);
2718     if (!CondReg)
2719       return false;
2720   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2721              isValueAvailable(Cond)) {
2722     const auto *Cmp = cast<CmpInst>(Cond);
2723     // Try to optimize or fold the cmp.
2724     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2725     const Value *FoldSelect = nullptr;
2726     switch (Predicate) {
2727     default:
2728       break;
2729     case CmpInst::FCMP_FALSE:
2730       FoldSelect = SI->getFalseValue();
2731       break;
2732     case CmpInst::FCMP_TRUE:
2733       FoldSelect = SI->getTrueValue();
2734       break;
2735     }
2736 
2737     if (FoldSelect) {
2738       Register SrcReg = getRegForValue(FoldSelect);
2739       if (!SrcReg)
2740         return false;
2741 
2742       updateValueMap(I, SrcReg);
2743       return true;
2744     }
2745 
2746     // Emit the cmp.
2747     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2748       return false;
2749 
2750     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2751     CC = getCompareCC(Predicate);
2752     switch (Predicate) {
2753     default:
2754       break;
2755     case CmpInst::FCMP_UEQ:
2756       ExtraCC = AArch64CC::EQ;
2757       CC = AArch64CC::VS;
2758       break;
2759     case CmpInst::FCMP_ONE:
2760       ExtraCC = AArch64CC::MI;
2761       CC = AArch64CC::GT;
2762       break;
2763     }
2764     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2765   } else {
2766     Register CondReg = getRegForValue(Cond);
2767     if (!CondReg)
2768       return false;
2769 
2770     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2771     CondReg = constrainOperandRegClass(II, CondReg, 1);
2772 
2773     // Emit a TST instruction (ANDS wzr, reg, #imm).
2774     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2775             AArch64::WZR)
2776         .addReg(CondReg)
2777         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2778   }
2779 
2780   Register Src1Reg = getRegForValue(SI->getTrueValue());
2781   Register Src2Reg = getRegForValue(SI->getFalseValue());
2782 
2783   if (!Src1Reg || !Src2Reg)
2784     return false;
2785 
2786   if (ExtraCC != AArch64CC::AL)
2787     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2788 
2789   Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2790   updateValueMap(I, ResultReg);
2791   return true;
2792 }
2793 
2794 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2795   Value *V = I->getOperand(0);
2796   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2797     return false;
2798 
2799   Register Op = getRegForValue(V);
2800   if (Op == 0)
2801     return false;
2802 
2803   Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2804   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2805           ResultReg).addReg(Op);
2806   updateValueMap(I, ResultReg);
2807   return true;
2808 }
2809 
2810 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2811   Value *V = I->getOperand(0);
2812   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2813     return false;
2814 
2815   Register Op = getRegForValue(V);
2816   if (Op == 0)
2817     return false;
2818 
2819   Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2820   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2821           ResultReg).addReg(Op);
2822   updateValueMap(I, ResultReg);
2823   return true;
2824 }
2825 
2826 // FPToUI and FPToSI
2827 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2828   MVT DestVT;
2829   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2830     return false;
2831 
2832   Register SrcReg = getRegForValue(I->getOperand(0));
2833   if (SrcReg == 0)
2834     return false;
2835 
2836   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2837   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2838     return false;
2839 
2840   unsigned Opc;
2841   if (SrcVT == MVT::f64) {
2842     if (Signed)
2843       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2844     else
2845       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2846   } else {
2847     if (Signed)
2848       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2849     else
2850       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2851   }
2852   Register ResultReg = createResultReg(
2853       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2854   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2855       .addReg(SrcReg);
2856   updateValueMap(I, ResultReg);
2857   return true;
2858 }
2859 
2860 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2861   MVT DestVT;
2862   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2863     return false;
2864   // Let regular ISEL handle FP16
2865   if (DestVT == MVT::f16)
2866     return false;
2867 
2868   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2869          "Unexpected value type.");
2870 
2871   Register SrcReg = getRegForValue(I->getOperand(0));
2872   if (!SrcReg)
2873     return false;
2874 
2875   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2876 
2877   // Handle sign-extension.
2878   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2879     SrcReg =
2880         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2881     if (!SrcReg)
2882       return false;
2883   }
2884 
2885   unsigned Opc;
2886   if (SrcVT == MVT::i64) {
2887     if (Signed)
2888       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2889     else
2890       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2891   } else {
2892     if (Signed)
2893       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2894     else
2895       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2896   }
2897 
2898   Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2899   updateValueMap(I, ResultReg);
2900   return true;
2901 }
2902 
2903 bool AArch64FastISel::fastLowerArguments() {
2904   if (!FuncInfo.CanLowerReturn)
2905     return false;
2906 
2907   const Function *F = FuncInfo.Fn;
2908   if (F->isVarArg())
2909     return false;
2910 
2911   CallingConv::ID CC = F->getCallingConv();
2912   if (CC != CallingConv::C && CC != CallingConv::Swift)
2913     return false;
2914 
2915   if (Subtarget->hasCustomCallingConv())
2916     return false;
2917 
2918   // Only handle simple cases of up to 8 GPR and FPR each.
2919   unsigned GPRCnt = 0;
2920   unsigned FPRCnt = 0;
2921   for (auto const &Arg : F->args()) {
2922     if (Arg.hasAttribute(Attribute::ByVal) ||
2923         Arg.hasAttribute(Attribute::InReg) ||
2924         Arg.hasAttribute(Attribute::StructRet) ||
2925         Arg.hasAttribute(Attribute::SwiftSelf) ||
2926         Arg.hasAttribute(Attribute::SwiftAsync) ||
2927         Arg.hasAttribute(Attribute::SwiftError) ||
2928         Arg.hasAttribute(Attribute::Nest))
2929       return false;
2930 
2931     Type *ArgTy = Arg.getType();
2932     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2933       return false;
2934 
2935     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2936     if (!ArgVT.isSimple())
2937       return false;
2938 
2939     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2940     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2941       return false;
2942 
2943     if (VT.isVector() &&
2944         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2945       return false;
2946 
2947     if (VT >= MVT::i1 && VT <= MVT::i64)
2948       ++GPRCnt;
2949     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2950              VT.is128BitVector())
2951       ++FPRCnt;
2952     else
2953       return false;
2954 
2955     if (GPRCnt > 8 || FPRCnt > 8)
2956       return false;
2957   }
2958 
2959   static const MCPhysReg Registers[6][8] = {
2960     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2961       AArch64::W5, AArch64::W6, AArch64::W7 },
2962     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2963       AArch64::X5, AArch64::X6, AArch64::X7 },
2964     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2965       AArch64::H5, AArch64::H6, AArch64::H7 },
2966     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2967       AArch64::S5, AArch64::S6, AArch64::S7 },
2968     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2969       AArch64::D5, AArch64::D6, AArch64::D7 },
2970     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2971       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2972   };
2973 
2974   unsigned GPRIdx = 0;
2975   unsigned FPRIdx = 0;
2976   for (auto const &Arg : F->args()) {
2977     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2978     unsigned SrcReg;
2979     const TargetRegisterClass *RC;
2980     if (VT >= MVT::i1 && VT <= MVT::i32) {
2981       SrcReg = Registers[0][GPRIdx++];
2982       RC = &AArch64::GPR32RegClass;
2983       VT = MVT::i32;
2984     } else if (VT == MVT::i64) {
2985       SrcReg = Registers[1][GPRIdx++];
2986       RC = &AArch64::GPR64RegClass;
2987     } else if (VT == MVT::f16) {
2988       SrcReg = Registers[2][FPRIdx++];
2989       RC = &AArch64::FPR16RegClass;
2990     } else if (VT ==  MVT::f32) {
2991       SrcReg = Registers[3][FPRIdx++];
2992       RC = &AArch64::FPR32RegClass;
2993     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2994       SrcReg = Registers[4][FPRIdx++];
2995       RC = &AArch64::FPR64RegClass;
2996     } else if (VT.is128BitVector()) {
2997       SrcReg = Registers[5][FPRIdx++];
2998       RC = &AArch64::FPR128RegClass;
2999     } else
3000       llvm_unreachable("Unexpected value type.");
3001 
3002     Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3003     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3004     // Without this, EmitLiveInCopies may eliminate the livein if its only
3005     // use is a bitcast (which isn't turned into an instruction).
3006     Register ResultReg = createResultReg(RC);
3007     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3008             TII.get(TargetOpcode::COPY), ResultReg)
3009         .addReg(DstReg, getKillRegState(true));
3010     updateValueMap(&Arg, ResultReg);
3011   }
3012   return true;
3013 }
3014 
3015 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3016                                       SmallVectorImpl<MVT> &OutVTs,
3017                                       unsigned &NumBytes) {
3018   CallingConv::ID CC = CLI.CallConv;
3019   SmallVector<CCValAssign, 16> ArgLocs;
3020   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3021   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3022 
3023   // Get a count of how many bytes are to be pushed on the stack.
3024   NumBytes = CCInfo.getNextStackOffset();
3025 
3026   // Issue CALLSEQ_START
3027   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3028   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3029     .addImm(NumBytes).addImm(0);
3030 
3031   // Process the args.
3032   for (CCValAssign &VA : ArgLocs) {
3033     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3034     MVT ArgVT = OutVTs[VA.getValNo()];
3035 
3036     Register ArgReg = getRegForValue(ArgVal);
3037     if (!ArgReg)
3038       return false;
3039 
3040     // Handle arg promotion: SExt, ZExt, AExt.
3041     switch (VA.getLocInfo()) {
3042     case CCValAssign::Full:
3043       break;
3044     case CCValAssign::SExt: {
3045       MVT DestVT = VA.getLocVT();
3046       MVT SrcVT = ArgVT;
3047       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3048       if (!ArgReg)
3049         return false;
3050       break;
3051     }
3052     case CCValAssign::AExt:
3053     // Intentional fall-through.
3054     case CCValAssign::ZExt: {
3055       MVT DestVT = VA.getLocVT();
3056       MVT SrcVT = ArgVT;
3057       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3058       if (!ArgReg)
3059         return false;
3060       break;
3061     }
3062     default:
3063       llvm_unreachable("Unknown arg promotion!");
3064     }
3065 
3066     // Now copy/store arg to correct locations.
3067     if (VA.isRegLoc() && !VA.needsCustom()) {
3068       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3069               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3070       CLI.OutRegs.push_back(VA.getLocReg());
3071     } else if (VA.needsCustom()) {
3072       // FIXME: Handle custom args.
3073       return false;
3074     } else {
3075       assert(VA.isMemLoc() && "Assuming store on stack.");
3076 
3077       // Don't emit stores for undef values.
3078       if (isa<UndefValue>(ArgVal))
3079         continue;
3080 
3081       // Need to store on the stack.
3082       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3083 
3084       unsigned BEAlign = 0;
3085       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3086         BEAlign = 8 - ArgSize;
3087 
3088       Address Addr;
3089       Addr.setKind(Address::RegBase);
3090       Addr.setReg(AArch64::SP);
3091       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3092 
3093       Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3094       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3095           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3096           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3097 
3098       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3099         return false;
3100     }
3101   }
3102   return true;
3103 }
3104 
3105 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3106                                  unsigned NumBytes) {
3107   CallingConv::ID CC = CLI.CallConv;
3108 
3109   // Issue CALLSEQ_END
3110   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3111   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3112     .addImm(NumBytes).addImm(0);
3113 
3114   // Now the return value.
3115   if (RetVT != MVT::isVoid) {
3116     SmallVector<CCValAssign, 16> RVLocs;
3117     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3118     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3119 
3120     // Only handle a single return value.
3121     if (RVLocs.size() != 1)
3122       return false;
3123 
3124     // Copy all of the result registers out of their specified physreg.
3125     MVT CopyVT = RVLocs[0].getValVT();
3126 
3127     // TODO: Handle big-endian results
3128     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3129       return false;
3130 
3131     Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3132     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3133             TII.get(TargetOpcode::COPY), ResultReg)
3134         .addReg(RVLocs[0].getLocReg());
3135     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3136 
3137     CLI.ResultReg = ResultReg;
3138     CLI.NumResultRegs = 1;
3139   }
3140 
3141   return true;
3142 }
3143 
3144 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3145   CallingConv::ID CC  = CLI.CallConv;
3146   bool IsTailCall     = CLI.IsTailCall;
3147   bool IsVarArg       = CLI.IsVarArg;
3148   const Value *Callee = CLI.Callee;
3149   MCSymbol *Symbol = CLI.Symbol;
3150 
3151   if (!Callee && !Symbol)
3152     return false;
3153 
3154   // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3155   // a bti instruction following the call.
3156   if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3157       !Subtarget->noBTIAtReturnTwice() &&
3158       MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3159     return false;
3160 
3161   // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3162   if (CLI.CB && CLI.CB->isIndirectCall() &&
3163       CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3164     return false;
3165 
3166   // Allow SelectionDAG isel to handle tail calls.
3167   if (IsTailCall)
3168     return false;
3169 
3170   // FIXME: we could and should support this, but for now correctness at -O0 is
3171   // more important.
3172   if (Subtarget->isTargetILP32())
3173     return false;
3174 
3175   CodeModel::Model CM = TM.getCodeModel();
3176   // Only support the small-addressing and large code models.
3177   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3178     return false;
3179 
3180   // FIXME: Add large code model support for ELF.
3181   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3182     return false;
3183 
3184   // Let SDISel handle vararg functions.
3185   if (IsVarArg)
3186     return false;
3187 
3188   // FIXME: Only handle *simple* calls for now.
3189   MVT RetVT;
3190   if (CLI.RetTy->isVoidTy())
3191     RetVT = MVT::isVoid;
3192   else if (!isTypeLegal(CLI.RetTy, RetVT))
3193     return false;
3194 
3195   for (auto Flag : CLI.OutFlags)
3196     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3197         Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3198       return false;
3199 
3200   // Set up the argument vectors.
3201   SmallVector<MVT, 16> OutVTs;
3202   OutVTs.reserve(CLI.OutVals.size());
3203 
3204   for (auto *Val : CLI.OutVals) {
3205     MVT VT;
3206     if (!isTypeLegal(Val->getType(), VT) &&
3207         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3208       return false;
3209 
3210     // We don't handle vector parameters yet.
3211     if (VT.isVector() || VT.getSizeInBits() > 64)
3212       return false;
3213 
3214     OutVTs.push_back(VT);
3215   }
3216 
3217   Address Addr;
3218   if (Callee && !computeCallAddress(Callee, Addr))
3219     return false;
3220 
3221   // The weak function target may be zero; in that case we must use indirect
3222   // addressing via a stub on windows as it may be out of range for a
3223   // PC-relative jump.
3224   if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3225       Addr.getGlobalValue()->hasExternalWeakLinkage())
3226     return false;
3227 
3228   // Handle the arguments now that we've gotten them.
3229   unsigned NumBytes;
3230   if (!processCallArgs(CLI, OutVTs, NumBytes))
3231     return false;
3232 
3233   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3234   if (RegInfo->isAnyArgRegReserved(*MF))
3235     RegInfo->emitReservedArgRegCallError(*MF);
3236 
3237   // Issue the call.
3238   MachineInstrBuilder MIB;
3239   if (Subtarget->useSmallAddressing()) {
3240     const MCInstrDesc &II =
3241         TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3242     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3243     if (Symbol)
3244       MIB.addSym(Symbol, 0);
3245     else if (Addr.getGlobalValue())
3246       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3247     else if (Addr.getReg()) {
3248       Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3249       MIB.addReg(Reg);
3250     } else
3251       return false;
3252   } else {
3253     unsigned CallReg = 0;
3254     if (Symbol) {
3255       Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3256       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3257               ADRPReg)
3258           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3259 
3260       CallReg = createResultReg(&AArch64::GPR64RegClass);
3261       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3262               TII.get(AArch64::LDRXui), CallReg)
3263           .addReg(ADRPReg)
3264           .addSym(Symbol,
3265                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3266     } else if (Addr.getGlobalValue())
3267       CallReg = materializeGV(Addr.getGlobalValue());
3268     else if (Addr.getReg())
3269       CallReg = Addr.getReg();
3270 
3271     if (!CallReg)
3272       return false;
3273 
3274     const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3275     CallReg = constrainOperandRegClass(II, CallReg, 0);
3276     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3277   }
3278 
3279   // Add implicit physical register uses to the call.
3280   for (auto Reg : CLI.OutRegs)
3281     MIB.addReg(Reg, RegState::Implicit);
3282 
3283   // Add a register mask with the call-preserved registers.
3284   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3285   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3286 
3287   CLI.Call = MIB;
3288 
3289   // Finish off the call including any return values.
3290   return finishCall(CLI, RetVT, NumBytes);
3291 }
3292 
3293 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3294   if (Alignment)
3295     return Len / Alignment->value() <= 4;
3296   else
3297     return Len < 32;
3298 }
3299 
3300 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3301                                          uint64_t Len, MaybeAlign Alignment) {
3302   // Make sure we don't bloat code by inlining very large memcpy's.
3303   if (!isMemCpySmall(Len, Alignment))
3304     return false;
3305 
3306   int64_t UnscaledOffset = 0;
3307   Address OrigDest = Dest;
3308   Address OrigSrc = Src;
3309 
3310   while (Len) {
3311     MVT VT;
3312     if (!Alignment || *Alignment >= 8) {
3313       if (Len >= 8)
3314         VT = MVT::i64;
3315       else if (Len >= 4)
3316         VT = MVT::i32;
3317       else if (Len >= 2)
3318         VT = MVT::i16;
3319       else {
3320         VT = MVT::i8;
3321       }
3322     } else {
3323       assert(Alignment && "Alignment is set in this branch");
3324       // Bound based on alignment.
3325       if (Len >= 4 && *Alignment == 4)
3326         VT = MVT::i32;
3327       else if (Len >= 2 && *Alignment == 2)
3328         VT = MVT::i16;
3329       else {
3330         VT = MVT::i8;
3331       }
3332     }
3333 
3334     unsigned ResultReg = emitLoad(VT, VT, Src);
3335     if (!ResultReg)
3336       return false;
3337 
3338     if (!emitStore(VT, ResultReg, Dest))
3339       return false;
3340 
3341     int64_t Size = VT.getSizeInBits() / 8;
3342     Len -= Size;
3343     UnscaledOffset += Size;
3344 
3345     // We need to recompute the unscaled offset for each iteration.
3346     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3347     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3348   }
3349 
3350   return true;
3351 }
3352 
3353 /// Check if it is possible to fold the condition from the XALU intrinsic
3354 /// into the user. The condition code will only be updated on success.
3355 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3356                                         const Instruction *I,
3357                                         const Value *Cond) {
3358   if (!isa<ExtractValueInst>(Cond))
3359     return false;
3360 
3361   const auto *EV = cast<ExtractValueInst>(Cond);
3362   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3363     return false;
3364 
3365   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3366   MVT RetVT;
3367   const Function *Callee = II->getCalledFunction();
3368   Type *RetTy =
3369   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3370   if (!isTypeLegal(RetTy, RetVT))
3371     return false;
3372 
3373   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3374     return false;
3375 
3376   const Value *LHS = II->getArgOperand(0);
3377   const Value *RHS = II->getArgOperand(1);
3378 
3379   // Canonicalize immediate to the RHS.
3380   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3381     std::swap(LHS, RHS);
3382 
3383   // Simplify multiplies.
3384   Intrinsic::ID IID = II->getIntrinsicID();
3385   switch (IID) {
3386   default:
3387     break;
3388   case Intrinsic::smul_with_overflow:
3389     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3390       if (C->getValue() == 2)
3391         IID = Intrinsic::sadd_with_overflow;
3392     break;
3393   case Intrinsic::umul_with_overflow:
3394     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3395       if (C->getValue() == 2)
3396         IID = Intrinsic::uadd_with_overflow;
3397     break;
3398   }
3399 
3400   AArch64CC::CondCode TmpCC;
3401   switch (IID) {
3402   default:
3403     return false;
3404   case Intrinsic::sadd_with_overflow:
3405   case Intrinsic::ssub_with_overflow:
3406     TmpCC = AArch64CC::VS;
3407     break;
3408   case Intrinsic::uadd_with_overflow:
3409     TmpCC = AArch64CC::HS;
3410     break;
3411   case Intrinsic::usub_with_overflow:
3412     TmpCC = AArch64CC::LO;
3413     break;
3414   case Intrinsic::smul_with_overflow:
3415   case Intrinsic::umul_with_overflow:
3416     TmpCC = AArch64CC::NE;
3417     break;
3418   }
3419 
3420   // Check if both instructions are in the same basic block.
3421   if (!isValueAvailable(II))
3422     return false;
3423 
3424   // Make sure nothing is in the way
3425   BasicBlock::const_iterator Start(I);
3426   BasicBlock::const_iterator End(II);
3427   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3428     // We only expect extractvalue instructions between the intrinsic and the
3429     // instruction to be selected.
3430     if (!isa<ExtractValueInst>(Itr))
3431       return false;
3432 
3433     // Check that the extractvalue operand comes from the intrinsic.
3434     const auto *EVI = cast<ExtractValueInst>(Itr);
3435     if (EVI->getAggregateOperand() != II)
3436       return false;
3437   }
3438 
3439   CC = TmpCC;
3440   return true;
3441 }
3442 
3443 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3444   // FIXME: Handle more intrinsics.
3445   switch (II->getIntrinsicID()) {
3446   default: return false;
3447   case Intrinsic::frameaddress: {
3448     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3449     MFI.setFrameAddressIsTaken(true);
3450 
3451     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3452     Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3453     Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3454     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3455             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3456     // Recursively load frame address
3457     // ldr x0, [fp]
3458     // ldr x0, [x0]
3459     // ldr x0, [x0]
3460     // ...
3461     unsigned DestReg;
3462     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3463     while (Depth--) {
3464       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3465                                 SrcReg, 0);
3466       assert(DestReg && "Unexpected LDR instruction emission failure.");
3467       SrcReg = DestReg;
3468     }
3469 
3470     updateValueMap(II, SrcReg);
3471     return true;
3472   }
3473   case Intrinsic::sponentry: {
3474     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3475 
3476     // SP = FP + Fixed Object + 16
3477     int FI = MFI.CreateFixedObject(4, 0, false);
3478     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3479     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3480             TII.get(AArch64::ADDXri), ResultReg)
3481             .addFrameIndex(FI)
3482             .addImm(0)
3483             .addImm(0);
3484 
3485     updateValueMap(II, ResultReg);
3486     return true;
3487   }
3488   case Intrinsic::memcpy:
3489   case Intrinsic::memmove: {
3490     const auto *MTI = cast<MemTransferInst>(II);
3491     // Don't handle volatile.
3492     if (MTI->isVolatile())
3493       return false;
3494 
3495     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3496     // we would emit dead code because we don't currently handle memmoves.
3497     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3498     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3499       // Small memcpy's are common enough that we want to do them without a call
3500       // if possible.
3501       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3502       MaybeAlign Alignment;
3503       if (MTI->getDestAlign() || MTI->getSourceAlign())
3504         Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3505                              MTI->getSourceAlign().valueOrOne());
3506       if (isMemCpySmall(Len, Alignment)) {
3507         Address Dest, Src;
3508         if (!computeAddress(MTI->getRawDest(), Dest) ||
3509             !computeAddress(MTI->getRawSource(), Src))
3510           return false;
3511         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3512           return true;
3513       }
3514     }
3515 
3516     if (!MTI->getLength()->getType()->isIntegerTy(64))
3517       return false;
3518 
3519     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3520       // Fast instruction selection doesn't support the special
3521       // address spaces.
3522       return false;
3523 
3524     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3525     return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3526   }
3527   case Intrinsic::memset: {
3528     const MemSetInst *MSI = cast<MemSetInst>(II);
3529     // Don't handle volatile.
3530     if (MSI->isVolatile())
3531       return false;
3532 
3533     if (!MSI->getLength()->getType()->isIntegerTy(64))
3534       return false;
3535 
3536     if (MSI->getDestAddressSpace() > 255)
3537       // Fast instruction selection doesn't support the special
3538       // address spaces.
3539       return false;
3540 
3541     return lowerCallTo(II, "memset", II->arg_size() - 1);
3542   }
3543   case Intrinsic::sin:
3544   case Intrinsic::cos:
3545   case Intrinsic::pow: {
3546     MVT RetVT;
3547     if (!isTypeLegal(II->getType(), RetVT))
3548       return false;
3549 
3550     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3551       return false;
3552 
3553     static const RTLIB::Libcall LibCallTable[3][2] = {
3554       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3555       { RTLIB::COS_F32, RTLIB::COS_F64 },
3556       { RTLIB::POW_F32, RTLIB::POW_F64 }
3557     };
3558     RTLIB::Libcall LC;
3559     bool Is64Bit = RetVT == MVT::f64;
3560     switch (II->getIntrinsicID()) {
3561     default:
3562       llvm_unreachable("Unexpected intrinsic.");
3563     case Intrinsic::sin:
3564       LC = LibCallTable[0][Is64Bit];
3565       break;
3566     case Intrinsic::cos:
3567       LC = LibCallTable[1][Is64Bit];
3568       break;
3569     case Intrinsic::pow:
3570       LC = LibCallTable[2][Is64Bit];
3571       break;
3572     }
3573 
3574     ArgListTy Args;
3575     Args.reserve(II->arg_size());
3576 
3577     // Populate the argument list.
3578     for (auto &Arg : II->args()) {
3579       ArgListEntry Entry;
3580       Entry.Val = Arg;
3581       Entry.Ty = Arg->getType();
3582       Args.push_back(Entry);
3583     }
3584 
3585     CallLoweringInfo CLI;
3586     MCContext &Ctx = MF->getContext();
3587     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3588                   TLI.getLibcallName(LC), std::move(Args));
3589     if (!lowerCallTo(CLI))
3590       return false;
3591     updateValueMap(II, CLI.ResultReg);
3592     return true;
3593   }
3594   case Intrinsic::fabs: {
3595     MVT VT;
3596     if (!isTypeLegal(II->getType(), VT))
3597       return false;
3598 
3599     unsigned Opc;
3600     switch (VT.SimpleTy) {
3601     default:
3602       return false;
3603     case MVT::f32:
3604       Opc = AArch64::FABSSr;
3605       break;
3606     case MVT::f64:
3607       Opc = AArch64::FABSDr;
3608       break;
3609     }
3610     Register SrcReg = getRegForValue(II->getOperand(0));
3611     if (!SrcReg)
3612       return false;
3613     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3614     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3615       .addReg(SrcReg);
3616     updateValueMap(II, ResultReg);
3617     return true;
3618   }
3619   case Intrinsic::trap:
3620     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3621         .addImm(1);
3622     return true;
3623   case Intrinsic::debugtrap:
3624     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3625         .addImm(0xF000);
3626     return true;
3627 
3628   case Intrinsic::sqrt: {
3629     Type *RetTy = II->getCalledFunction()->getReturnType();
3630 
3631     MVT VT;
3632     if (!isTypeLegal(RetTy, VT))
3633       return false;
3634 
3635     Register Op0Reg = getRegForValue(II->getOperand(0));
3636     if (!Op0Reg)
3637       return false;
3638 
3639     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3640     if (!ResultReg)
3641       return false;
3642 
3643     updateValueMap(II, ResultReg);
3644     return true;
3645   }
3646   case Intrinsic::sadd_with_overflow:
3647   case Intrinsic::uadd_with_overflow:
3648   case Intrinsic::ssub_with_overflow:
3649   case Intrinsic::usub_with_overflow:
3650   case Intrinsic::smul_with_overflow:
3651   case Intrinsic::umul_with_overflow: {
3652     // This implements the basic lowering of the xalu with overflow intrinsics.
3653     const Function *Callee = II->getCalledFunction();
3654     auto *Ty = cast<StructType>(Callee->getReturnType());
3655     Type *RetTy = Ty->getTypeAtIndex(0U);
3656 
3657     MVT VT;
3658     if (!isTypeLegal(RetTy, VT))
3659       return false;
3660 
3661     if (VT != MVT::i32 && VT != MVT::i64)
3662       return false;
3663 
3664     const Value *LHS = II->getArgOperand(0);
3665     const Value *RHS = II->getArgOperand(1);
3666     // Canonicalize immediate to the RHS.
3667     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3668       std::swap(LHS, RHS);
3669 
3670     // Simplify multiplies.
3671     Intrinsic::ID IID = II->getIntrinsicID();
3672     switch (IID) {
3673     default:
3674       break;
3675     case Intrinsic::smul_with_overflow:
3676       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3677         if (C->getValue() == 2) {
3678           IID = Intrinsic::sadd_with_overflow;
3679           RHS = LHS;
3680         }
3681       break;
3682     case Intrinsic::umul_with_overflow:
3683       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3684         if (C->getValue() == 2) {
3685           IID = Intrinsic::uadd_with_overflow;
3686           RHS = LHS;
3687         }
3688       break;
3689     }
3690 
3691     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3692     AArch64CC::CondCode CC = AArch64CC::Invalid;
3693     switch (IID) {
3694     default: llvm_unreachable("Unexpected intrinsic!");
3695     case Intrinsic::sadd_with_overflow:
3696       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3697       CC = AArch64CC::VS;
3698       break;
3699     case Intrinsic::uadd_with_overflow:
3700       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3701       CC = AArch64CC::HS;
3702       break;
3703     case Intrinsic::ssub_with_overflow:
3704       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3705       CC = AArch64CC::VS;
3706       break;
3707     case Intrinsic::usub_with_overflow:
3708       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3709       CC = AArch64CC::LO;
3710       break;
3711     case Intrinsic::smul_with_overflow: {
3712       CC = AArch64CC::NE;
3713       Register LHSReg = getRegForValue(LHS);
3714       if (!LHSReg)
3715         return false;
3716 
3717       Register RHSReg = getRegForValue(RHS);
3718       if (!RHSReg)
3719         return false;
3720 
3721       if (VT == MVT::i32) {
3722         MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3723         Register MulSubReg =
3724             fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3725         // cmp xreg, wreg, sxtw
3726         emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3727                       AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3728                       /*WantResult=*/false);
3729         MulReg = MulSubReg;
3730       } else {
3731         assert(VT == MVT::i64 && "Unexpected value type.");
3732         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3733         // reused in the next instruction.
3734         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3735         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3736         emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3737                     /*WantResult=*/false);
3738       }
3739       break;
3740     }
3741     case Intrinsic::umul_with_overflow: {
3742       CC = AArch64CC::NE;
3743       Register LHSReg = getRegForValue(LHS);
3744       if (!LHSReg)
3745         return false;
3746 
3747       Register RHSReg = getRegForValue(RHS);
3748       if (!RHSReg)
3749         return false;
3750 
3751       if (VT == MVT::i32) {
3752         MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3753         // tst xreg, #0xffffffff00000000
3754         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3755                 TII.get(AArch64::ANDSXri), AArch64::XZR)
3756             .addReg(MulReg)
3757             .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3758         MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3759       } else {
3760         assert(VT == MVT::i64 && "Unexpected value type.");
3761         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3762         // reused in the next instruction.
3763         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3764         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3765         emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3766       }
3767       break;
3768     }
3769     }
3770 
3771     if (MulReg) {
3772       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3773       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3774               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3775     }
3776 
3777     if (!ResultReg1)
3778       return false;
3779 
3780     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3781                                   AArch64::WZR, AArch64::WZR,
3782                                   getInvertedCondCode(CC));
3783     (void)ResultReg2;
3784     assert((ResultReg1 + 1) == ResultReg2 &&
3785            "Nonconsecutive result registers.");
3786     updateValueMap(II, ResultReg1, 2);
3787     return true;
3788   }
3789   }
3790   return false;
3791 }
3792 
3793 bool AArch64FastISel::selectRet(const Instruction *I) {
3794   const ReturnInst *Ret = cast<ReturnInst>(I);
3795   const Function &F = *I->getParent()->getParent();
3796 
3797   if (!FuncInfo.CanLowerReturn)
3798     return false;
3799 
3800   if (F.isVarArg())
3801     return false;
3802 
3803   if (TLI.supportSwiftError() &&
3804       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3805     return false;
3806 
3807   if (TLI.supportSplitCSR(FuncInfo.MF))
3808     return false;
3809 
3810   // Build a list of return value registers.
3811   SmallVector<unsigned, 4> RetRegs;
3812 
3813   if (Ret->getNumOperands() > 0) {
3814     CallingConv::ID CC = F.getCallingConv();
3815     SmallVector<ISD::OutputArg, 4> Outs;
3816     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3817 
3818     // Analyze operands of the call, assigning locations to each operand.
3819     SmallVector<CCValAssign, 16> ValLocs;
3820     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3821     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3822                                                      : RetCC_AArch64_AAPCS;
3823     CCInfo.AnalyzeReturn(Outs, RetCC);
3824 
3825     // Only handle a single return value for now.
3826     if (ValLocs.size() != 1)
3827       return false;
3828 
3829     CCValAssign &VA = ValLocs[0];
3830     const Value *RV = Ret->getOperand(0);
3831 
3832     // Don't bother handling odd stuff for now.
3833     if ((VA.getLocInfo() != CCValAssign::Full) &&
3834         (VA.getLocInfo() != CCValAssign::BCvt))
3835       return false;
3836 
3837     // Only handle register returns for now.
3838     if (!VA.isRegLoc())
3839       return false;
3840 
3841     Register Reg = getRegForValue(RV);
3842     if (Reg == 0)
3843       return false;
3844 
3845     unsigned SrcReg = Reg + VA.getValNo();
3846     Register DestReg = VA.getLocReg();
3847     // Avoid a cross-class copy. This is very unlikely.
3848     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3849       return false;
3850 
3851     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3852     if (!RVEVT.isSimple())
3853       return false;
3854 
3855     // Vectors (of > 1 lane) in big endian need tricky handling.
3856     if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3857         !Subtarget->isLittleEndian())
3858       return false;
3859 
3860     MVT RVVT = RVEVT.getSimpleVT();
3861     if (RVVT == MVT::f128)
3862       return false;
3863 
3864     MVT DestVT = VA.getValVT();
3865     // Special handling for extended integers.
3866     if (RVVT != DestVT) {
3867       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3868         return false;
3869 
3870       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3871         return false;
3872 
3873       bool IsZExt = Outs[0].Flags.isZExt();
3874       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3875       if (SrcReg == 0)
3876         return false;
3877     }
3878 
3879     // "Callee" (i.e. value producer) zero extends pointers at function
3880     // boundary.
3881     if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3882       SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3883 
3884     // Make the copy.
3885     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3886             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3887 
3888     // Add register to return instruction.
3889     RetRegs.push_back(VA.getLocReg());
3890   }
3891 
3892   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3893                                     TII.get(AArch64::RET_ReallyLR));
3894   for (unsigned RetReg : RetRegs)
3895     MIB.addReg(RetReg, RegState::Implicit);
3896   return true;
3897 }
3898 
3899 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3900   Type *DestTy = I->getType();
3901   Value *Op = I->getOperand(0);
3902   Type *SrcTy = Op->getType();
3903 
3904   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3905   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3906   if (!SrcEVT.isSimple())
3907     return false;
3908   if (!DestEVT.isSimple())
3909     return false;
3910 
3911   MVT SrcVT = SrcEVT.getSimpleVT();
3912   MVT DestVT = DestEVT.getSimpleVT();
3913 
3914   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3915       SrcVT != MVT::i8)
3916     return false;
3917   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3918       DestVT != MVT::i1)
3919     return false;
3920 
3921   Register SrcReg = getRegForValue(Op);
3922   if (!SrcReg)
3923     return false;
3924 
3925   // If we're truncating from i64 to a smaller non-legal type then generate an
3926   // AND. Otherwise, we know the high bits are undefined and a truncate only
3927   // generate a COPY. We cannot mark the source register also as result
3928   // register, because this can incorrectly transfer the kill flag onto the
3929   // source register.
3930   unsigned ResultReg;
3931   if (SrcVT == MVT::i64) {
3932     uint64_t Mask = 0;
3933     switch (DestVT.SimpleTy) {
3934     default:
3935       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3936       return false;
3937     case MVT::i1:
3938       Mask = 0x1;
3939       break;
3940     case MVT::i8:
3941       Mask = 0xff;
3942       break;
3943     case MVT::i16:
3944       Mask = 0xffff;
3945       break;
3946     }
3947     // Issue an extract_subreg to get the lower 32-bits.
3948     Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3949                                                 AArch64::sub_32);
3950     // Create the AND instruction which performs the actual truncation.
3951     ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3952     assert(ResultReg && "Unexpected AND instruction emission failure.");
3953   } else {
3954     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3955     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3956             TII.get(TargetOpcode::COPY), ResultReg)
3957         .addReg(SrcReg);
3958   }
3959 
3960   updateValueMap(I, ResultReg);
3961   return true;
3962 }
3963 
3964 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3965   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3966           DestVT == MVT::i64) &&
3967          "Unexpected value type.");
3968   // Handle i8 and i16 as i32.
3969   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3970     DestVT = MVT::i32;
3971 
3972   if (IsZExt) {
3973     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3974     assert(ResultReg && "Unexpected AND instruction emission failure.");
3975     if (DestVT == MVT::i64) {
3976       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3977       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3978       Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3979       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3980               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3981           .addImm(0)
3982           .addReg(ResultReg)
3983           .addImm(AArch64::sub_32);
3984       ResultReg = Reg64;
3985     }
3986     return ResultReg;
3987   } else {
3988     if (DestVT == MVT::i64) {
3989       // FIXME: We're SExt i1 to i64.
3990       return 0;
3991     }
3992     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3993                             0, 0);
3994   }
3995 }
3996 
3997 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3998   unsigned Opc, ZReg;
3999   switch (RetVT.SimpleTy) {
4000   default: return 0;
4001   case MVT::i8:
4002   case MVT::i16:
4003   case MVT::i32:
4004     RetVT = MVT::i32;
4005     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4006   case MVT::i64:
4007     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4008   }
4009 
4010   const TargetRegisterClass *RC =
4011       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4012   return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4013 }
4014 
4015 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4016   if (RetVT != MVT::i64)
4017     return 0;
4018 
4019   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4020                           Op0, Op1, AArch64::XZR);
4021 }
4022 
4023 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4024   if (RetVT != MVT::i64)
4025     return 0;
4026 
4027   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4028                           Op0, Op1, AArch64::XZR);
4029 }
4030 
4031 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4032                                      unsigned Op1Reg) {
4033   unsigned Opc = 0;
4034   bool NeedTrunc = false;
4035   uint64_t Mask = 0;
4036   switch (RetVT.SimpleTy) {
4037   default: return 0;
4038   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4039   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4040   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4041   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4042   }
4043 
4044   const TargetRegisterClass *RC =
4045       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4046   if (NeedTrunc)
4047     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4048 
4049   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4050   if (NeedTrunc)
4051     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4052   return ResultReg;
4053 }
4054 
4055 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4056                                      uint64_t Shift, bool IsZExt) {
4057   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4058          "Unexpected source/return type pair.");
4059   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4060           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4061          "Unexpected source value type.");
4062   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4063           RetVT == MVT::i64) && "Unexpected return value type.");
4064 
4065   bool Is64Bit = (RetVT == MVT::i64);
4066   unsigned RegSize = Is64Bit ? 64 : 32;
4067   unsigned DstBits = RetVT.getSizeInBits();
4068   unsigned SrcBits = SrcVT.getSizeInBits();
4069   const TargetRegisterClass *RC =
4070       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4071 
4072   // Just emit a copy for "zero" shifts.
4073   if (Shift == 0) {
4074     if (RetVT == SrcVT) {
4075       Register ResultReg = createResultReg(RC);
4076       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4077               TII.get(TargetOpcode::COPY), ResultReg)
4078           .addReg(Op0);
4079       return ResultReg;
4080     } else
4081       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4082   }
4083 
4084   // Don't deal with undefined shifts.
4085   if (Shift >= DstBits)
4086     return 0;
4087 
4088   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4089   // {S|U}BFM Wd, Wn, #r, #s
4090   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4091 
4092   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4093   // %2 = shl i16 %1, 4
4094   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4095   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4096   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4097   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4098 
4099   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4100   // %2 = shl i16 %1, 8
4101   // Wd<32+7-24,32-24> = Wn<7:0>
4102   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4103   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4104   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4105 
4106   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4107   // %2 = shl i16 %1, 12
4108   // Wd<32+3-20,32-20> = Wn<3:0>
4109   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4110   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4111   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4112 
4113   unsigned ImmR = RegSize - Shift;
4114   // Limit the width to the length of the source type.
4115   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4116   static const unsigned OpcTable[2][2] = {
4117     {AArch64::SBFMWri, AArch64::SBFMXri},
4118     {AArch64::UBFMWri, AArch64::UBFMXri}
4119   };
4120   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4121   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4122     Register TmpReg = MRI.createVirtualRegister(RC);
4123     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4124             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4125         .addImm(0)
4126         .addReg(Op0)
4127         .addImm(AArch64::sub_32);
4128     Op0 = TmpReg;
4129   }
4130   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4131 }
4132 
4133 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4134                                      unsigned Op1Reg) {
4135   unsigned Opc = 0;
4136   bool NeedTrunc = false;
4137   uint64_t Mask = 0;
4138   switch (RetVT.SimpleTy) {
4139   default: return 0;
4140   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4141   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4142   case MVT::i32: Opc = AArch64::LSRVWr; break;
4143   case MVT::i64: Opc = AArch64::LSRVXr; break;
4144   }
4145 
4146   const TargetRegisterClass *RC =
4147       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4148   if (NeedTrunc) {
4149     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4150     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4151   }
4152   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4153   if (NeedTrunc)
4154     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4155   return ResultReg;
4156 }
4157 
4158 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4159                                      uint64_t Shift, bool IsZExt) {
4160   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4161          "Unexpected source/return type pair.");
4162   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4163           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4164          "Unexpected source value type.");
4165   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4166           RetVT == MVT::i64) && "Unexpected return value type.");
4167 
4168   bool Is64Bit = (RetVT == MVT::i64);
4169   unsigned RegSize = Is64Bit ? 64 : 32;
4170   unsigned DstBits = RetVT.getSizeInBits();
4171   unsigned SrcBits = SrcVT.getSizeInBits();
4172   const TargetRegisterClass *RC =
4173       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4174 
4175   // Just emit a copy for "zero" shifts.
4176   if (Shift == 0) {
4177     if (RetVT == SrcVT) {
4178       Register ResultReg = createResultReg(RC);
4179       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4180               TII.get(TargetOpcode::COPY), ResultReg)
4181       .addReg(Op0);
4182       return ResultReg;
4183     } else
4184       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4185   }
4186 
4187   // Don't deal with undefined shifts.
4188   if (Shift >= DstBits)
4189     return 0;
4190 
4191   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4192   // {S|U}BFM Wd, Wn, #r, #s
4193   // Wd<s-r:0> = Wn<s:r> when r <= s
4194 
4195   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4196   // %2 = lshr i16 %1, 4
4197   // Wd<7-4:0> = Wn<7:4>
4198   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4199   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4200   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4201 
4202   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4203   // %2 = lshr i16 %1, 8
4204   // Wd<7-7,0> = Wn<7:7>
4205   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4206   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4207   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4208 
4209   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4210   // %2 = lshr i16 %1, 12
4211   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4212   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4213   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4214   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4215 
4216   if (Shift >= SrcBits && IsZExt)
4217     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4218 
4219   // It is not possible to fold a sign-extend into the LShr instruction. In this
4220   // case emit a sign-extend.
4221   if (!IsZExt) {
4222     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4223     if (!Op0)
4224       return 0;
4225     SrcVT = RetVT;
4226     SrcBits = SrcVT.getSizeInBits();
4227     IsZExt = true;
4228   }
4229 
4230   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4231   unsigned ImmS = SrcBits - 1;
4232   static const unsigned OpcTable[2][2] = {
4233     {AArch64::SBFMWri, AArch64::SBFMXri},
4234     {AArch64::UBFMWri, AArch64::UBFMXri}
4235   };
4236   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4237   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4238     Register TmpReg = MRI.createVirtualRegister(RC);
4239     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4240             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4241         .addImm(0)
4242         .addReg(Op0)
4243         .addImm(AArch64::sub_32);
4244     Op0 = TmpReg;
4245   }
4246   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4247 }
4248 
4249 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4250                                      unsigned Op1Reg) {
4251   unsigned Opc = 0;
4252   bool NeedTrunc = false;
4253   uint64_t Mask = 0;
4254   switch (RetVT.SimpleTy) {
4255   default: return 0;
4256   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4257   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4258   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4259   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4260   }
4261 
4262   const TargetRegisterClass *RC =
4263       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4264   if (NeedTrunc) {
4265     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4266     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4267   }
4268   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4269   if (NeedTrunc)
4270     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4271   return ResultReg;
4272 }
4273 
4274 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4275                                      uint64_t Shift, bool IsZExt) {
4276   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4277          "Unexpected source/return type pair.");
4278   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4279           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4280          "Unexpected source value type.");
4281   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4282           RetVT == MVT::i64) && "Unexpected return value type.");
4283 
4284   bool Is64Bit = (RetVT == MVT::i64);
4285   unsigned RegSize = Is64Bit ? 64 : 32;
4286   unsigned DstBits = RetVT.getSizeInBits();
4287   unsigned SrcBits = SrcVT.getSizeInBits();
4288   const TargetRegisterClass *RC =
4289       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4290 
4291   // Just emit a copy for "zero" shifts.
4292   if (Shift == 0) {
4293     if (RetVT == SrcVT) {
4294       Register ResultReg = createResultReg(RC);
4295       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4296               TII.get(TargetOpcode::COPY), ResultReg)
4297       .addReg(Op0);
4298       return ResultReg;
4299     } else
4300       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4301   }
4302 
4303   // Don't deal with undefined shifts.
4304   if (Shift >= DstBits)
4305     return 0;
4306 
4307   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4308   // {S|U}BFM Wd, Wn, #r, #s
4309   // Wd<s-r:0> = Wn<s:r> when r <= s
4310 
4311   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4312   // %2 = ashr i16 %1, 4
4313   // Wd<7-4:0> = Wn<7:4>
4314   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4315   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4316   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4317 
4318   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4319   // %2 = ashr i16 %1, 8
4320   // Wd<7-7,0> = Wn<7:7>
4321   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4322   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4323   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4324 
4325   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4326   // %2 = ashr i16 %1, 12
4327   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4328   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4329   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4330   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4331 
4332   if (Shift >= SrcBits && IsZExt)
4333     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4334 
4335   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4336   unsigned ImmS = SrcBits - 1;
4337   static const unsigned OpcTable[2][2] = {
4338     {AArch64::SBFMWri, AArch64::SBFMXri},
4339     {AArch64::UBFMWri, AArch64::UBFMXri}
4340   };
4341   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4342   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4343     Register TmpReg = MRI.createVirtualRegister(RC);
4344     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4345             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4346         .addImm(0)
4347         .addReg(Op0)
4348         .addImm(AArch64::sub_32);
4349     Op0 = TmpReg;
4350   }
4351   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4352 }
4353 
4354 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4355                                      bool IsZExt) {
4356   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4357 
4358   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4359   // DestVT are odd things, so test to make sure that they are both types we can
4360   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4361   // bail out to SelectionDAG.
4362   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4363        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4364       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4365        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4366     return 0;
4367 
4368   unsigned Opc;
4369   unsigned Imm = 0;
4370 
4371   switch (SrcVT.SimpleTy) {
4372   default:
4373     return 0;
4374   case MVT::i1:
4375     return emiti1Ext(SrcReg, DestVT, IsZExt);
4376   case MVT::i8:
4377     if (DestVT == MVT::i64)
4378       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4379     else
4380       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4381     Imm = 7;
4382     break;
4383   case MVT::i16:
4384     if (DestVT == MVT::i64)
4385       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4386     else
4387       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4388     Imm = 15;
4389     break;
4390   case MVT::i32:
4391     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4392     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4393     Imm = 31;
4394     break;
4395   }
4396 
4397   // Handle i8 and i16 as i32.
4398   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4399     DestVT = MVT::i32;
4400   else if (DestVT == MVT::i64) {
4401     Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4402     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4403             TII.get(AArch64::SUBREG_TO_REG), Src64)
4404         .addImm(0)
4405         .addReg(SrcReg)
4406         .addImm(AArch64::sub_32);
4407     SrcReg = Src64;
4408   }
4409 
4410   const TargetRegisterClass *RC =
4411       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4412   return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4413 }
4414 
4415 static bool isZExtLoad(const MachineInstr *LI) {
4416   switch (LI->getOpcode()) {
4417   default:
4418     return false;
4419   case AArch64::LDURBBi:
4420   case AArch64::LDURHHi:
4421   case AArch64::LDURWi:
4422   case AArch64::LDRBBui:
4423   case AArch64::LDRHHui:
4424   case AArch64::LDRWui:
4425   case AArch64::LDRBBroX:
4426   case AArch64::LDRHHroX:
4427   case AArch64::LDRWroX:
4428   case AArch64::LDRBBroW:
4429   case AArch64::LDRHHroW:
4430   case AArch64::LDRWroW:
4431     return true;
4432   }
4433 }
4434 
4435 static bool isSExtLoad(const MachineInstr *LI) {
4436   switch (LI->getOpcode()) {
4437   default:
4438     return false;
4439   case AArch64::LDURSBWi:
4440   case AArch64::LDURSHWi:
4441   case AArch64::LDURSBXi:
4442   case AArch64::LDURSHXi:
4443   case AArch64::LDURSWi:
4444   case AArch64::LDRSBWui:
4445   case AArch64::LDRSHWui:
4446   case AArch64::LDRSBXui:
4447   case AArch64::LDRSHXui:
4448   case AArch64::LDRSWui:
4449   case AArch64::LDRSBWroX:
4450   case AArch64::LDRSHWroX:
4451   case AArch64::LDRSBXroX:
4452   case AArch64::LDRSHXroX:
4453   case AArch64::LDRSWroX:
4454   case AArch64::LDRSBWroW:
4455   case AArch64::LDRSHWroW:
4456   case AArch64::LDRSBXroW:
4457   case AArch64::LDRSHXroW:
4458   case AArch64::LDRSWroW:
4459     return true;
4460   }
4461 }
4462 
4463 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4464                                          MVT SrcVT) {
4465   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4466   if (!LI || !LI->hasOneUse())
4467     return false;
4468 
4469   // Check if the load instruction has already been selected.
4470   Register Reg = lookUpRegForValue(LI);
4471   if (!Reg)
4472     return false;
4473 
4474   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4475   if (!MI)
4476     return false;
4477 
4478   // Check if the correct load instruction has been emitted - SelectionDAG might
4479   // have emitted a zero-extending load, but we need a sign-extending load.
4480   bool IsZExt = isa<ZExtInst>(I);
4481   const auto *LoadMI = MI;
4482   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4483       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4484     Register LoadReg = MI->getOperand(1).getReg();
4485     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4486     assert(LoadMI && "Expected valid instruction");
4487   }
4488   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4489     return false;
4490 
4491   // Nothing to be done.
4492   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4493     updateValueMap(I, Reg);
4494     return true;
4495   }
4496 
4497   if (IsZExt) {
4498     Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4499     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4500             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4501         .addImm(0)
4502         .addReg(Reg, getKillRegState(true))
4503         .addImm(AArch64::sub_32);
4504     Reg = Reg64;
4505   } else {
4506     assert((MI->getOpcode() == TargetOpcode::COPY &&
4507             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4508            "Expected copy instruction");
4509     Reg = MI->getOperand(1).getReg();
4510     MachineBasicBlock::iterator I(MI);
4511     removeDeadCode(I, std::next(I));
4512   }
4513   updateValueMap(I, Reg);
4514   return true;
4515 }
4516 
4517 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4518   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4519          "Unexpected integer extend instruction.");
4520   MVT RetVT;
4521   MVT SrcVT;
4522   if (!isTypeSupported(I->getType(), RetVT))
4523     return false;
4524 
4525   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4526     return false;
4527 
4528   // Try to optimize already sign-/zero-extended values from load instructions.
4529   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4530     return true;
4531 
4532   Register SrcReg = getRegForValue(I->getOperand(0));
4533   if (!SrcReg)
4534     return false;
4535 
4536   // Try to optimize already sign-/zero-extended values from function arguments.
4537   bool IsZExt = isa<ZExtInst>(I);
4538   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4539     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4540       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4541         Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4542         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4543                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4544             .addImm(0)
4545             .addReg(SrcReg)
4546             .addImm(AArch64::sub_32);
4547         SrcReg = ResultReg;
4548       }
4549 
4550       updateValueMap(I, SrcReg);
4551       return true;
4552     }
4553   }
4554 
4555   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4556   if (!ResultReg)
4557     return false;
4558 
4559   updateValueMap(I, ResultReg);
4560   return true;
4561 }
4562 
4563 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4564   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4565   if (!DestEVT.isSimple())
4566     return false;
4567 
4568   MVT DestVT = DestEVT.getSimpleVT();
4569   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4570     return false;
4571 
4572   unsigned DivOpc;
4573   bool Is64bit = (DestVT == MVT::i64);
4574   switch (ISDOpcode) {
4575   default:
4576     return false;
4577   case ISD::SREM:
4578     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4579     break;
4580   case ISD::UREM:
4581     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4582     break;
4583   }
4584   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4585   Register Src0Reg = getRegForValue(I->getOperand(0));
4586   if (!Src0Reg)
4587     return false;
4588 
4589   Register Src1Reg = getRegForValue(I->getOperand(1));
4590   if (!Src1Reg)
4591     return false;
4592 
4593   const TargetRegisterClass *RC =
4594       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4595   Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4596   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4597   // The remainder is computed as numerator - (quotient * denominator) using the
4598   // MSUB instruction.
4599   Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4600   updateValueMap(I, ResultReg);
4601   return true;
4602 }
4603 
4604 bool AArch64FastISel::selectMul(const Instruction *I) {
4605   MVT VT;
4606   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4607     return false;
4608 
4609   if (VT.isVector())
4610     return selectBinaryOp(I, ISD::MUL);
4611 
4612   const Value *Src0 = I->getOperand(0);
4613   const Value *Src1 = I->getOperand(1);
4614   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4615     if (C->getValue().isPowerOf2())
4616       std::swap(Src0, Src1);
4617 
4618   // Try to simplify to a shift instruction.
4619   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4620     if (C->getValue().isPowerOf2()) {
4621       uint64_t ShiftVal = C->getValue().logBase2();
4622       MVT SrcVT = VT;
4623       bool IsZExt = true;
4624       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4625         if (!isIntExtFree(ZExt)) {
4626           MVT VT;
4627           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4628             SrcVT = VT;
4629             IsZExt = true;
4630             Src0 = ZExt->getOperand(0);
4631           }
4632         }
4633       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4634         if (!isIntExtFree(SExt)) {
4635           MVT VT;
4636           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4637             SrcVT = VT;
4638             IsZExt = false;
4639             Src0 = SExt->getOperand(0);
4640           }
4641         }
4642       }
4643 
4644       Register Src0Reg = getRegForValue(Src0);
4645       if (!Src0Reg)
4646         return false;
4647 
4648       unsigned ResultReg =
4649           emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4650 
4651       if (ResultReg) {
4652         updateValueMap(I, ResultReg);
4653         return true;
4654       }
4655     }
4656 
4657   Register Src0Reg = getRegForValue(I->getOperand(0));
4658   if (!Src0Reg)
4659     return false;
4660 
4661   Register Src1Reg = getRegForValue(I->getOperand(1));
4662   if (!Src1Reg)
4663     return false;
4664 
4665   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4666 
4667   if (!ResultReg)
4668     return false;
4669 
4670   updateValueMap(I, ResultReg);
4671   return true;
4672 }
4673 
4674 bool AArch64FastISel::selectShift(const Instruction *I) {
4675   MVT RetVT;
4676   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4677     return false;
4678 
4679   if (RetVT.isVector())
4680     return selectOperator(I, I->getOpcode());
4681 
4682   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4683     unsigned ResultReg = 0;
4684     uint64_t ShiftVal = C->getZExtValue();
4685     MVT SrcVT = RetVT;
4686     bool IsZExt = I->getOpcode() != Instruction::AShr;
4687     const Value *Op0 = I->getOperand(0);
4688     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4689       if (!isIntExtFree(ZExt)) {
4690         MVT TmpVT;
4691         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4692           SrcVT = TmpVT;
4693           IsZExt = true;
4694           Op0 = ZExt->getOperand(0);
4695         }
4696       }
4697     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4698       if (!isIntExtFree(SExt)) {
4699         MVT TmpVT;
4700         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4701           SrcVT = TmpVT;
4702           IsZExt = false;
4703           Op0 = SExt->getOperand(0);
4704         }
4705       }
4706     }
4707 
4708     Register Op0Reg = getRegForValue(Op0);
4709     if (!Op0Reg)
4710       return false;
4711 
4712     switch (I->getOpcode()) {
4713     default: llvm_unreachable("Unexpected instruction.");
4714     case Instruction::Shl:
4715       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4716       break;
4717     case Instruction::AShr:
4718       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4719       break;
4720     case Instruction::LShr:
4721       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4722       break;
4723     }
4724     if (!ResultReg)
4725       return false;
4726 
4727     updateValueMap(I, ResultReg);
4728     return true;
4729   }
4730 
4731   Register Op0Reg = getRegForValue(I->getOperand(0));
4732   if (!Op0Reg)
4733     return false;
4734 
4735   Register Op1Reg = getRegForValue(I->getOperand(1));
4736   if (!Op1Reg)
4737     return false;
4738 
4739   unsigned ResultReg = 0;
4740   switch (I->getOpcode()) {
4741   default: llvm_unreachable("Unexpected instruction.");
4742   case Instruction::Shl:
4743     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4744     break;
4745   case Instruction::AShr:
4746     ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4747     break;
4748   case Instruction::LShr:
4749     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4750     break;
4751   }
4752 
4753   if (!ResultReg)
4754     return false;
4755 
4756   updateValueMap(I, ResultReg);
4757   return true;
4758 }
4759 
4760 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4761   MVT RetVT, SrcVT;
4762 
4763   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4764     return false;
4765   if (!isTypeLegal(I->getType(), RetVT))
4766     return false;
4767 
4768   unsigned Opc;
4769   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4770     Opc = AArch64::FMOVWSr;
4771   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4772     Opc = AArch64::FMOVXDr;
4773   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4774     Opc = AArch64::FMOVSWr;
4775   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4776     Opc = AArch64::FMOVDXr;
4777   else
4778     return false;
4779 
4780   const TargetRegisterClass *RC = nullptr;
4781   switch (RetVT.SimpleTy) {
4782   default: llvm_unreachable("Unexpected value type.");
4783   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4784   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4785   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4786   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4787   }
4788   Register Op0Reg = getRegForValue(I->getOperand(0));
4789   if (!Op0Reg)
4790     return false;
4791 
4792   Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4793   if (!ResultReg)
4794     return false;
4795 
4796   updateValueMap(I, ResultReg);
4797   return true;
4798 }
4799 
4800 bool AArch64FastISel::selectFRem(const Instruction *I) {
4801   MVT RetVT;
4802   if (!isTypeLegal(I->getType(), RetVT))
4803     return false;
4804 
4805   RTLIB::Libcall LC;
4806   switch (RetVT.SimpleTy) {
4807   default:
4808     return false;
4809   case MVT::f32:
4810     LC = RTLIB::REM_F32;
4811     break;
4812   case MVT::f64:
4813     LC = RTLIB::REM_F64;
4814     break;
4815   }
4816 
4817   ArgListTy Args;
4818   Args.reserve(I->getNumOperands());
4819 
4820   // Populate the argument list.
4821   for (auto &Arg : I->operands()) {
4822     ArgListEntry Entry;
4823     Entry.Val = Arg;
4824     Entry.Ty = Arg->getType();
4825     Args.push_back(Entry);
4826   }
4827 
4828   CallLoweringInfo CLI;
4829   MCContext &Ctx = MF->getContext();
4830   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4831                 TLI.getLibcallName(LC), std::move(Args));
4832   if (!lowerCallTo(CLI))
4833     return false;
4834   updateValueMap(I, CLI.ResultReg);
4835   return true;
4836 }
4837 
4838 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4839   MVT VT;
4840   if (!isTypeLegal(I->getType(), VT))
4841     return false;
4842 
4843   if (!isa<ConstantInt>(I->getOperand(1)))
4844     return selectBinaryOp(I, ISD::SDIV);
4845 
4846   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4847   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4848       !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4849     return selectBinaryOp(I, ISD::SDIV);
4850 
4851   unsigned Lg2 = C.countTrailingZeros();
4852   Register Src0Reg = getRegForValue(I->getOperand(0));
4853   if (!Src0Reg)
4854     return false;
4855 
4856   if (cast<BinaryOperator>(I)->isExact()) {
4857     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4858     if (!ResultReg)
4859       return false;
4860     updateValueMap(I, ResultReg);
4861     return true;
4862   }
4863 
4864   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4865   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4866   if (!AddReg)
4867     return false;
4868 
4869   // (Src0 < 0) ? Pow2 - 1 : 0;
4870   if (!emitICmp_ri(VT, Src0Reg, 0))
4871     return false;
4872 
4873   unsigned SelectOpc;
4874   const TargetRegisterClass *RC;
4875   if (VT == MVT::i64) {
4876     SelectOpc = AArch64::CSELXr;
4877     RC = &AArch64::GPR64RegClass;
4878   } else {
4879     SelectOpc = AArch64::CSELWr;
4880     RC = &AArch64::GPR32RegClass;
4881   }
4882   Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4883                                         AArch64CC::LT);
4884   if (!SelectReg)
4885     return false;
4886 
4887   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4888   // negate the result.
4889   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4890   unsigned ResultReg;
4891   if (C.isNegative())
4892     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4893                               AArch64_AM::ASR, Lg2);
4894   else
4895     ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4896 
4897   if (!ResultReg)
4898     return false;
4899 
4900   updateValueMap(I, ResultReg);
4901   return true;
4902 }
4903 
4904 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4905 /// have to duplicate it for AArch64, because otherwise we would fail during the
4906 /// sign-extend emission.
4907 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4908   Register IdxN = getRegForValue(Idx);
4909   if (IdxN == 0)
4910     // Unhandled operand. Halt "fast" selection and bail.
4911     return 0;
4912 
4913   // If the index is smaller or larger than intptr_t, truncate or extend it.
4914   MVT PtrVT = TLI.getPointerTy(DL);
4915   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4916   if (IdxVT.bitsLT(PtrVT)) {
4917     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4918   } else if (IdxVT.bitsGT(PtrVT))
4919     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4920   return IdxN;
4921 }
4922 
4923 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4924 /// duplicate it for AArch64, because otherwise we would bail out even for
4925 /// simple cases. This is because the standard fastEmit functions don't cover
4926 /// MUL at all and ADD is lowered very inefficientily.
4927 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4928   if (Subtarget->isTargetILP32())
4929     return false;
4930 
4931   Register N = getRegForValue(I->getOperand(0));
4932   if (!N)
4933     return false;
4934 
4935   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4936   // into a single N = N + TotalOffset.
4937   uint64_t TotalOffs = 0;
4938   MVT VT = TLI.getPointerTy(DL);
4939   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4940        GTI != E; ++GTI) {
4941     const Value *Idx = GTI.getOperand();
4942     if (auto *StTy = GTI.getStructTypeOrNull()) {
4943       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4944       // N = N + Offset
4945       if (Field)
4946         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4947     } else {
4948       Type *Ty = GTI.getIndexedType();
4949 
4950       // If this is a constant subscript, handle it quickly.
4951       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4952         if (CI->isZero())
4953           continue;
4954         // N = N + Offset
4955         TotalOffs +=
4956             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4957         continue;
4958       }
4959       if (TotalOffs) {
4960         N = emitAdd_ri_(VT, N, TotalOffs);
4961         if (!N)
4962           return false;
4963         TotalOffs = 0;
4964       }
4965 
4966       // N = N + Idx * ElementSize;
4967       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4968       unsigned IdxN = getRegForGEPIndex(Idx);
4969       if (!IdxN)
4970         return false;
4971 
4972       if (ElementSize != 1) {
4973         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4974         if (!C)
4975           return false;
4976         IdxN = emitMul_rr(VT, IdxN, C);
4977         if (!IdxN)
4978           return false;
4979       }
4980       N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4981       if (!N)
4982         return false;
4983     }
4984   }
4985   if (TotalOffs) {
4986     N = emitAdd_ri_(VT, N, TotalOffs);
4987     if (!N)
4988       return false;
4989   }
4990   updateValueMap(I, N);
4991   return true;
4992 }
4993 
4994 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4995   assert(TM.getOptLevel() == CodeGenOpt::None &&
4996          "cmpxchg survived AtomicExpand at optlevel > -O0");
4997 
4998   auto *RetPairTy = cast<StructType>(I->getType());
4999   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5000   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5001          "cmpxchg has a non-i1 status result");
5002 
5003   MVT VT;
5004   if (!isTypeLegal(RetTy, VT))
5005     return false;
5006 
5007   const TargetRegisterClass *ResRC;
5008   unsigned Opc, CmpOpc;
5009   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5010   // extractvalue selection doesn't support that.
5011   if (VT == MVT::i32) {
5012     Opc = AArch64::CMP_SWAP_32;
5013     CmpOpc = AArch64::SUBSWrs;
5014     ResRC = &AArch64::GPR32RegClass;
5015   } else if (VT == MVT::i64) {
5016     Opc = AArch64::CMP_SWAP_64;
5017     CmpOpc = AArch64::SUBSXrs;
5018     ResRC = &AArch64::GPR64RegClass;
5019   } else {
5020     return false;
5021   }
5022 
5023   const MCInstrDesc &II = TII.get(Opc);
5024 
5025   const Register AddrReg = constrainOperandRegClass(
5026       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5027   const Register DesiredReg = constrainOperandRegClass(
5028       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5029   const Register NewReg = constrainOperandRegClass(
5030       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5031 
5032   const Register ResultReg1 = createResultReg(ResRC);
5033   const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5034   const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5035 
5036   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5037   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5038       .addDef(ResultReg1)
5039       .addDef(ScratchReg)
5040       .addUse(AddrReg)
5041       .addUse(DesiredReg)
5042       .addUse(NewReg);
5043 
5044   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5045       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5046       .addUse(ResultReg1)
5047       .addUse(DesiredReg)
5048       .addImm(0);
5049 
5050   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5051       .addDef(ResultReg2)
5052       .addUse(AArch64::WZR)
5053       .addUse(AArch64::WZR)
5054       .addImm(AArch64CC::NE);
5055 
5056   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5057   updateValueMap(I, ResultReg1, 2);
5058   return true;
5059 }
5060 
5061 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5062   if (TLI.fallBackToDAGISel(*I))
5063     return false;
5064   switch (I->getOpcode()) {
5065   default:
5066     break;
5067   case Instruction::Add:
5068   case Instruction::Sub:
5069     return selectAddSub(I);
5070   case Instruction::Mul:
5071     return selectMul(I);
5072   case Instruction::SDiv:
5073     return selectSDiv(I);
5074   case Instruction::SRem:
5075     if (!selectBinaryOp(I, ISD::SREM))
5076       return selectRem(I, ISD::SREM);
5077     return true;
5078   case Instruction::URem:
5079     if (!selectBinaryOp(I, ISD::UREM))
5080       return selectRem(I, ISD::UREM);
5081     return true;
5082   case Instruction::Shl:
5083   case Instruction::LShr:
5084   case Instruction::AShr:
5085     return selectShift(I);
5086   case Instruction::And:
5087   case Instruction::Or:
5088   case Instruction::Xor:
5089     return selectLogicalOp(I);
5090   case Instruction::Br:
5091     return selectBranch(I);
5092   case Instruction::IndirectBr:
5093     return selectIndirectBr(I);
5094   case Instruction::BitCast:
5095     if (!FastISel::selectBitCast(I))
5096       return selectBitCast(I);
5097     return true;
5098   case Instruction::FPToSI:
5099     if (!selectCast(I, ISD::FP_TO_SINT))
5100       return selectFPToInt(I, /*Signed=*/true);
5101     return true;
5102   case Instruction::FPToUI:
5103     return selectFPToInt(I, /*Signed=*/false);
5104   case Instruction::ZExt:
5105   case Instruction::SExt:
5106     return selectIntExt(I);
5107   case Instruction::Trunc:
5108     if (!selectCast(I, ISD::TRUNCATE))
5109       return selectTrunc(I);
5110     return true;
5111   case Instruction::FPExt:
5112     return selectFPExt(I);
5113   case Instruction::FPTrunc:
5114     return selectFPTrunc(I);
5115   case Instruction::SIToFP:
5116     if (!selectCast(I, ISD::SINT_TO_FP))
5117       return selectIntToFP(I, /*Signed=*/true);
5118     return true;
5119   case Instruction::UIToFP:
5120     return selectIntToFP(I, /*Signed=*/false);
5121   case Instruction::Load:
5122     return selectLoad(I);
5123   case Instruction::Store:
5124     return selectStore(I);
5125   case Instruction::FCmp:
5126   case Instruction::ICmp:
5127     return selectCmp(I);
5128   case Instruction::Select:
5129     return selectSelect(I);
5130   case Instruction::Ret:
5131     return selectRet(I);
5132   case Instruction::FRem:
5133     return selectFRem(I);
5134   case Instruction::GetElementPtr:
5135     return selectGetElementPtr(I);
5136   case Instruction::AtomicCmpXchg:
5137     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5138   }
5139 
5140   // fall-back to target-independent instruction selection.
5141   return selectOperator(I, I->getOpcode());
5142 }
5143 
5144 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5145                                         const TargetLibraryInfo *LibInfo) {
5146 
5147   SMEAttrs CallerAttrs(*FuncInfo.Fn);
5148   if (CallerAttrs.hasZAState() ||
5149       (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody()))
5150     return nullptr;
5151   return new AArch64FastISel(FuncInfo, LibInfo);
5152 }
5153