xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp (revision 6966ac055c3b7a39266fb982493330df7a097997)
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64RegisterInfo.h"
18 #include "AArch64Subtarget.h"
19 #include "MCTargetDesc/AArch64AddressingModes.h"
20 #include "Utils/AArch64BaseInfo.h"
21 #include "llvm/ADT/APFloat.h"
22 #include "llvm/ADT/APInt.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Analysis/BranchProbabilityInfo.h"
26 #include "llvm/CodeGen/CallingConvLower.h"
27 #include "llvm/CodeGen/FastISel.h"
28 #include "llvm/CodeGen/FunctionLoweringInfo.h"
29 #include "llvm/CodeGen/ISDOpcodes.h"
30 #include "llvm/CodeGen/MachineBasicBlock.h"
31 #include "llvm/CodeGen/MachineConstantPool.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineInstr.h"
34 #include "llvm/CodeGen/MachineInstrBuilder.h"
35 #include "llvm/CodeGen/MachineMemOperand.h"
36 #include "llvm/CodeGen/MachineRegisterInfo.h"
37 #include "llvm/CodeGen/RuntimeLibcalls.h"
38 #include "llvm/CodeGen/ValueTypes.h"
39 #include "llvm/IR/Argument.h"
40 #include "llvm/IR/Attributes.h"
41 #include "llvm/IR/BasicBlock.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/Constant.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DerivedTypes.h"
47 #include "llvm/IR/Function.h"
48 #include "llvm/IR/GetElementPtrTypeIterator.h"
49 #include "llvm/IR/GlobalValue.h"
50 #include "llvm/IR/InstrTypes.h"
51 #include "llvm/IR/Instruction.h"
52 #include "llvm/IR/Instructions.h"
53 #include "llvm/IR/IntrinsicInst.h"
54 #include "llvm/IR/Intrinsics.h"
55 #include "llvm/IR/Operator.h"
56 #include "llvm/IR/Type.h"
57 #include "llvm/IR/User.h"
58 #include "llvm/IR/Value.h"
59 #include "llvm/MC/MCInstrDesc.h"
60 #include "llvm/MC/MCRegisterInfo.h"
61 #include "llvm/MC/MCSymbol.h"
62 #include "llvm/Support/AtomicOrdering.h"
63 #include "llvm/Support/Casting.h"
64 #include "llvm/Support/CodeGen.h"
65 #include "llvm/Support/Compiler.h"
66 #include "llvm/Support/ErrorHandling.h"
67 #include "llvm/Support/MachineValueType.h"
68 #include "llvm/Support/MathExtras.h"
69 #include <algorithm>
70 #include <cassert>
71 #include <cstdint>
72 #include <iterator>
73 #include <utility>
74 
75 using namespace llvm;
76 
77 namespace {
78 
79 class AArch64FastISel final : public FastISel {
80   class Address {
81   public:
82     using BaseKind = enum {
83       RegBase,
84       FrameIndexBase
85     };
86 
87   private:
88     BaseKind Kind = RegBase;
89     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
90     union {
91       unsigned Reg;
92       int FI;
93     } Base;
94     unsigned OffsetReg = 0;
95     unsigned Shift = 0;
96     int64_t Offset = 0;
97     const GlobalValue *GV = nullptr;
98 
99   public:
100     Address() { Base.Reg = 0; }
101 
102     void setKind(BaseKind K) { Kind = K; }
103     BaseKind getKind() const { return Kind; }
104     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
105     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
106     bool isRegBase() const { return Kind == RegBase; }
107     bool isFIBase() const { return Kind == FrameIndexBase; }
108 
109     void setReg(unsigned Reg) {
110       assert(isRegBase() && "Invalid base register access!");
111       Base.Reg = Reg;
112     }
113 
114     unsigned getReg() const {
115       assert(isRegBase() && "Invalid base register access!");
116       return Base.Reg;
117     }
118 
119     void setOffsetReg(unsigned Reg) {
120       OffsetReg = Reg;
121     }
122 
123     unsigned getOffsetReg() const {
124       return OffsetReg;
125     }
126 
127     void setFI(unsigned FI) {
128       assert(isFIBase() && "Invalid base frame index  access!");
129       Base.FI = FI;
130     }
131 
132     unsigned getFI() const {
133       assert(isFIBase() && "Invalid base frame index access!");
134       return Base.FI;
135     }
136 
137     void setOffset(int64_t O) { Offset = O; }
138     int64_t getOffset() { return Offset; }
139     void setShift(unsigned S) { Shift = S; }
140     unsigned getShift() { return Shift; }
141 
142     void setGlobalValue(const GlobalValue *G) { GV = G; }
143     const GlobalValue *getGlobalValue() { return GV; }
144   };
145 
146   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
147   /// make the right decision when generating code for different targets.
148   const AArch64Subtarget *Subtarget;
149   LLVMContext *Context;
150 
151   bool fastLowerArguments() override;
152   bool fastLowerCall(CallLoweringInfo &CLI) override;
153   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
154 
155 private:
156   // Selection routines.
157   bool selectAddSub(const Instruction *I);
158   bool selectLogicalOp(const Instruction *I);
159   bool selectLoad(const Instruction *I);
160   bool selectStore(const Instruction *I);
161   bool selectBranch(const Instruction *I);
162   bool selectIndirectBr(const Instruction *I);
163   bool selectCmp(const Instruction *I);
164   bool selectSelect(const Instruction *I);
165   bool selectFPExt(const Instruction *I);
166   bool selectFPTrunc(const Instruction *I);
167   bool selectFPToInt(const Instruction *I, bool Signed);
168   bool selectIntToFP(const Instruction *I, bool Signed);
169   bool selectRem(const Instruction *I, unsigned ISDOpcode);
170   bool selectRet(const Instruction *I);
171   bool selectTrunc(const Instruction *I);
172   bool selectIntExt(const Instruction *I);
173   bool selectMul(const Instruction *I);
174   bool selectShift(const Instruction *I);
175   bool selectBitCast(const Instruction *I);
176   bool selectFRem(const Instruction *I);
177   bool selectSDiv(const Instruction *I);
178   bool selectGetElementPtr(const Instruction *I);
179   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
180 
181   // Utility helper routines.
182   bool isTypeLegal(Type *Ty, MVT &VT);
183   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
184   bool isValueAvailable(const Value *V) const;
185   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
186   bool computeCallAddress(const Value *V, Address &Addr);
187   bool simplifyAddress(Address &Addr, MVT VT);
188   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
189                             MachineMemOperand::Flags Flags,
190                             unsigned ScaleFactor, MachineMemOperand *MMO);
191   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
192   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
193                           unsigned Alignment);
194   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
195                          const Value *Cond);
196   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
197   bool optimizeSelect(const SelectInst *SI);
198   std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx);
199 
200   // Emit helper routines.
201   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
202                       const Value *RHS, bool SetFlags = false,
203                       bool WantResult = true,  bool IsZExt = false);
204   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
205                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
206                          bool SetFlags = false, bool WantResult = true);
207   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
208                          bool LHSIsKill, uint64_t Imm, bool SetFlags = false,
209                          bool WantResult = true);
210   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
211                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
212                          AArch64_AM::ShiftExtendType ShiftType,
213                          uint64_t ShiftImm, bool SetFlags = false,
214                          bool WantResult = true);
215   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216                          bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
217                           AArch64_AM::ShiftExtendType ExtType,
218                           uint64_t ShiftImm, bool SetFlags = false,
219                          bool WantResult = true);
220 
221   // Emit functions.
222   bool emitCompareAndBranch(const BranchInst *BI);
223   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
224   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
225   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
226   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
227   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
228                     MachineMemOperand *MMO = nullptr);
229   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
230                  MachineMemOperand *MMO = nullptr);
231   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
232                         MachineMemOperand *MMO = nullptr);
233   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
234   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
235   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
236                    bool SetFlags = false, bool WantResult = true,
237                    bool IsZExt = false);
238   unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm);
239   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
240                    bool SetFlags = false, bool WantResult = true,
241                    bool IsZExt = false);
242   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
243                        unsigned RHSReg, bool RHSIsKill, bool WantResult = true);
244   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
245                        unsigned RHSReg, bool RHSIsKill,
246                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247                        bool WantResult = true);
248   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249                          const Value *RHS);
250   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251                             bool LHSIsKill, uint64_t Imm);
252   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253                             bool LHSIsKill, unsigned RHSReg, bool RHSIsKill,
254                             uint64_t ShiftImm);
255   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm);
256   unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
257                       unsigned Op1, bool Op1IsKill);
258   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
259                         unsigned Op1, bool Op1IsKill);
260   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
261                         unsigned Op1, bool Op1IsKill);
262   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
263                       unsigned Op1Reg, bool Op1IsKill);
264   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
265                       uint64_t Imm, bool IsZExt = true);
266   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
267                       unsigned Op1Reg, bool Op1IsKill);
268   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
269                       uint64_t Imm, bool IsZExt = true);
270   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
271                       unsigned Op1Reg, bool Op1IsKill);
272   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill,
273                       uint64_t Imm, bool IsZExt = false);
274 
275   unsigned materializeInt(const ConstantInt *CI, MVT VT);
276   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
277   unsigned materializeGV(const GlobalValue *GV);
278 
279   // Call handling routines.
280 private:
281   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
282   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
283                        unsigned &NumBytes);
284   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
285 
286 public:
287   // Backend specific FastISel code.
288   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
289   unsigned fastMaterializeConstant(const Constant *C) override;
290   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
291 
292   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
293                            const TargetLibraryInfo *LibInfo)
294       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
295     Subtarget =
296         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
297     Context = &FuncInfo.Fn->getContext();
298   }
299 
300   bool fastSelectInstruction(const Instruction *I) override;
301 
302 #include "AArch64GenFastISel.inc"
303 };
304 
305 } // end anonymous namespace
306 
307 /// Check if the sign-/zero-extend will be a noop.
308 static bool isIntExtFree(const Instruction *I) {
309   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
310          "Unexpected integer extend instruction.");
311   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
312          "Unexpected value type.");
313   bool IsZExt = isa<ZExtInst>(I);
314 
315   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
316     if (LI->hasOneUse())
317       return true;
318 
319   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
320     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
321       return true;
322 
323   return false;
324 }
325 
326 /// Determine the implicit scale factor that is applied by a memory
327 /// operation for a given value type.
328 static unsigned getImplicitScaleFactor(MVT VT) {
329   switch (VT.SimpleTy) {
330   default:
331     return 0;    // invalid
332   case MVT::i1:  // fall-through
333   case MVT::i8:
334     return 1;
335   case MVT::i16:
336     return 2;
337   case MVT::i32: // fall-through
338   case MVT::f32:
339     return 4;
340   case MVT::i64: // fall-through
341   case MVT::f64:
342     return 8;
343   }
344 }
345 
346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
347   if (CC == CallingConv::WebKit_JS)
348     return CC_AArch64_WebKit_JS;
349   if (CC == CallingConv::GHC)
350     return CC_AArch64_GHC;
351   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
352 }
353 
354 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
355   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
356          "Alloca should always return a pointer.");
357 
358   // Don't handle dynamic allocas.
359   if (!FuncInfo.StaticAllocaMap.count(AI))
360     return 0;
361 
362   DenseMap<const AllocaInst *, int>::iterator SI =
363       FuncInfo.StaticAllocaMap.find(AI);
364 
365   if (SI != FuncInfo.StaticAllocaMap.end()) {
366     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
367     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
368             ResultReg)
369         .addFrameIndex(SI->second)
370         .addImm(0)
371         .addImm(0);
372     return ResultReg;
373   }
374 
375   return 0;
376 }
377 
378 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
379   if (VT > MVT::i64)
380     return 0;
381 
382   if (!CI->isZero())
383     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
384 
385   // Create a copy from the zero register to materialize a "0" value.
386   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
387                                                    : &AArch64::GPR32RegClass;
388   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
389   unsigned ResultReg = createResultReg(RC);
390   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
391           ResultReg).addReg(ZeroReg, getKillRegState(true));
392   return ResultReg;
393 }
394 
395 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
396   // Positive zero (+0.0) has to be materialized with a fmov from the zero
397   // register, because the immediate version of fmov cannot encode zero.
398   if (CFP->isNullValue())
399     return fastMaterializeFloatZero(CFP);
400 
401   if (VT != MVT::f32 && VT != MVT::f64)
402     return 0;
403 
404   const APFloat Val = CFP->getValueAPF();
405   bool Is64Bit = (VT == MVT::f64);
406   // This checks to see if we can use FMOV instructions to materialize
407   // a constant, otherwise we have to materialize via the constant pool.
408   int Imm =
409       Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
410   if (Imm != -1) {
411     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
412     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
413   }
414 
415   // For the MachO large code model materialize the FP constant in code.
416   if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
417     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
418     const TargetRegisterClass *RC = Is64Bit ?
419         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
420 
421     unsigned TmpReg = createResultReg(RC);
422     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
423         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
424 
425     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
426     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
427             TII.get(TargetOpcode::COPY), ResultReg)
428         .addReg(TmpReg, getKillRegState(true));
429 
430     return ResultReg;
431   }
432 
433   // Materialize via constant pool.  MachineConstantPool wants an explicit
434   // alignment.
435   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
436   if (Align == 0)
437     Align = DL.getTypeAllocSize(CFP->getType());
438 
439   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
440   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
441   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
442           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
443 
444   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
445   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
446   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
447       .addReg(ADRPReg)
448       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
449   return ResultReg;
450 }
451 
452 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
453   // We can't handle thread-local variables quickly yet.
454   if (GV->isThreadLocal())
455     return 0;
456 
457   // MachO still uses GOT for large code-model accesses, but ELF requires
458   // movz/movk sequences, which FastISel doesn't handle yet.
459   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
460     return 0;
461 
462   unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
463 
464   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
465   if (!DestEVT.isSimple())
466     return 0;
467 
468   unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
469   unsigned ResultReg;
470 
471   if (OpFlags & AArch64II::MO_GOT) {
472     // ADRP + LDRX
473     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
474             ADRPReg)
475         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
476 
477     ResultReg = createResultReg(&AArch64::GPR64RegClass);
478     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui),
479             ResultReg)
480         .addReg(ADRPReg)
481         .addGlobalAddress(GV, 0,
482                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags);
483   } else {
484     // ADRP + ADDX
485     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
486             ADRPReg)
487         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
488 
489     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
490     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
491             ResultReg)
492         .addReg(ADRPReg)
493         .addGlobalAddress(GV, 0,
494                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
495         .addImm(0);
496   }
497   return ResultReg;
498 }
499 
500 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
501   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
502 
503   // Only handle simple types.
504   if (!CEVT.isSimple())
505     return 0;
506   MVT VT = CEVT.getSimpleVT();
507 
508   if (const auto *CI = dyn_cast<ConstantInt>(C))
509     return materializeInt(CI, VT);
510   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
511     return materializeFP(CFP, VT);
512   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
513     return materializeGV(GV);
514 
515   return 0;
516 }
517 
518 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
519   assert(CFP->isNullValue() &&
520          "Floating-point constant is not a positive zero.");
521   MVT VT;
522   if (!isTypeLegal(CFP->getType(), VT))
523     return 0;
524 
525   if (VT != MVT::f32 && VT != MVT::f64)
526     return 0;
527 
528   bool Is64Bit = (VT == MVT::f64);
529   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
530   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
531   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true);
532 }
533 
534 /// Check if the multiply is by a power-of-2 constant.
535 static bool isMulPowOf2(const Value *I) {
536   if (const auto *MI = dyn_cast<MulOperator>(I)) {
537     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
538       if (C->getValue().isPowerOf2())
539         return true;
540     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
541       if (C->getValue().isPowerOf2())
542         return true;
543   }
544   return false;
545 }
546 
547 // Computes the address to get to an object.
548 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
549 {
550   const User *U = nullptr;
551   unsigned Opcode = Instruction::UserOp1;
552   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
553     // Don't walk into other basic blocks unless the object is an alloca from
554     // another block, otherwise it may not have a virtual register assigned.
555     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
556         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
557       Opcode = I->getOpcode();
558       U = I;
559     }
560   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
561     Opcode = C->getOpcode();
562     U = C;
563   }
564 
565   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
566     if (Ty->getAddressSpace() > 255)
567       // Fast instruction selection doesn't support the special
568       // address spaces.
569       return false;
570 
571   switch (Opcode) {
572   default:
573     break;
574   case Instruction::BitCast:
575     // Look through bitcasts.
576     return computeAddress(U->getOperand(0), Addr, Ty);
577 
578   case Instruction::IntToPtr:
579     // Look past no-op inttoptrs.
580     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
581         TLI.getPointerTy(DL))
582       return computeAddress(U->getOperand(0), Addr, Ty);
583     break;
584 
585   case Instruction::PtrToInt:
586     // Look past no-op ptrtoints.
587     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
588       return computeAddress(U->getOperand(0), Addr, Ty);
589     break;
590 
591   case Instruction::GetElementPtr: {
592     Address SavedAddr = Addr;
593     uint64_t TmpOffset = Addr.getOffset();
594 
595     // Iterate through the GEP folding the constants into offsets where
596     // we can.
597     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
598          GTI != E; ++GTI) {
599       const Value *Op = GTI.getOperand();
600       if (StructType *STy = GTI.getStructTypeOrNull()) {
601         const StructLayout *SL = DL.getStructLayout(STy);
602         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
603         TmpOffset += SL->getElementOffset(Idx);
604       } else {
605         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
606         while (true) {
607           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
608             // Constant-offset addressing.
609             TmpOffset += CI->getSExtValue() * S;
610             break;
611           }
612           if (canFoldAddIntoGEP(U, Op)) {
613             // A compatible add with a constant operand. Fold the constant.
614             ConstantInt *CI =
615                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
616             TmpOffset += CI->getSExtValue() * S;
617             // Iterate on the other operand.
618             Op = cast<AddOperator>(Op)->getOperand(0);
619             continue;
620           }
621           // Unsupported
622           goto unsupported_gep;
623         }
624       }
625     }
626 
627     // Try to grab the base operand now.
628     Addr.setOffset(TmpOffset);
629     if (computeAddress(U->getOperand(0), Addr, Ty))
630       return true;
631 
632     // We failed, restore everything and try the other options.
633     Addr = SavedAddr;
634 
635   unsupported_gep:
636     break;
637   }
638   case Instruction::Alloca: {
639     const AllocaInst *AI = cast<AllocaInst>(Obj);
640     DenseMap<const AllocaInst *, int>::iterator SI =
641         FuncInfo.StaticAllocaMap.find(AI);
642     if (SI != FuncInfo.StaticAllocaMap.end()) {
643       Addr.setKind(Address::FrameIndexBase);
644       Addr.setFI(SI->second);
645       return true;
646     }
647     break;
648   }
649   case Instruction::Add: {
650     // Adds of constants are common and easy enough.
651     const Value *LHS = U->getOperand(0);
652     const Value *RHS = U->getOperand(1);
653 
654     if (isa<ConstantInt>(LHS))
655       std::swap(LHS, RHS);
656 
657     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
658       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
659       return computeAddress(LHS, Addr, Ty);
660     }
661 
662     Address Backup = Addr;
663     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
664       return true;
665     Addr = Backup;
666 
667     break;
668   }
669   case Instruction::Sub: {
670     // Subs of constants are common and easy enough.
671     const Value *LHS = U->getOperand(0);
672     const Value *RHS = U->getOperand(1);
673 
674     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
675       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
676       return computeAddress(LHS, Addr, Ty);
677     }
678     break;
679   }
680   case Instruction::Shl: {
681     if (Addr.getOffsetReg())
682       break;
683 
684     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
685     if (!CI)
686       break;
687 
688     unsigned Val = CI->getZExtValue();
689     if (Val < 1 || Val > 3)
690       break;
691 
692     uint64_t NumBytes = 0;
693     if (Ty && Ty->isSized()) {
694       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
695       NumBytes = NumBits / 8;
696       if (!isPowerOf2_64(NumBits))
697         NumBytes = 0;
698     }
699 
700     if (NumBytes != (1ULL << Val))
701       break;
702 
703     Addr.setShift(Val);
704     Addr.setExtendType(AArch64_AM::LSL);
705 
706     const Value *Src = U->getOperand(0);
707     if (const auto *I = dyn_cast<Instruction>(Src)) {
708       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
709         // Fold the zext or sext when it won't become a noop.
710         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
711           if (!isIntExtFree(ZE) &&
712               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
713             Addr.setExtendType(AArch64_AM::UXTW);
714             Src = ZE->getOperand(0);
715           }
716         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
717           if (!isIntExtFree(SE) &&
718               SE->getOperand(0)->getType()->isIntegerTy(32)) {
719             Addr.setExtendType(AArch64_AM::SXTW);
720             Src = SE->getOperand(0);
721           }
722         }
723       }
724     }
725 
726     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
727       if (AI->getOpcode() == Instruction::And) {
728         const Value *LHS = AI->getOperand(0);
729         const Value *RHS = AI->getOperand(1);
730 
731         if (const auto *C = dyn_cast<ConstantInt>(LHS))
732           if (C->getValue() == 0xffffffff)
733             std::swap(LHS, RHS);
734 
735         if (const auto *C = dyn_cast<ConstantInt>(RHS))
736           if (C->getValue() == 0xffffffff) {
737             Addr.setExtendType(AArch64_AM::UXTW);
738             unsigned Reg = getRegForValue(LHS);
739             if (!Reg)
740               return false;
741             bool RegIsKill = hasTrivialKill(LHS);
742             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
743                                              AArch64::sub_32);
744             Addr.setOffsetReg(Reg);
745             return true;
746           }
747       }
748 
749     unsigned Reg = getRegForValue(Src);
750     if (!Reg)
751       return false;
752     Addr.setOffsetReg(Reg);
753     return true;
754   }
755   case Instruction::Mul: {
756     if (Addr.getOffsetReg())
757       break;
758 
759     if (!isMulPowOf2(U))
760       break;
761 
762     const Value *LHS = U->getOperand(0);
763     const Value *RHS = U->getOperand(1);
764 
765     // Canonicalize power-of-2 value to the RHS.
766     if (const auto *C = dyn_cast<ConstantInt>(LHS))
767       if (C->getValue().isPowerOf2())
768         std::swap(LHS, RHS);
769 
770     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
771     const auto *C = cast<ConstantInt>(RHS);
772     unsigned Val = C->getValue().logBase2();
773     if (Val < 1 || Val > 3)
774       break;
775 
776     uint64_t NumBytes = 0;
777     if (Ty && Ty->isSized()) {
778       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
779       NumBytes = NumBits / 8;
780       if (!isPowerOf2_64(NumBits))
781         NumBytes = 0;
782     }
783 
784     if (NumBytes != (1ULL << Val))
785       break;
786 
787     Addr.setShift(Val);
788     Addr.setExtendType(AArch64_AM::LSL);
789 
790     const Value *Src = LHS;
791     if (const auto *I = dyn_cast<Instruction>(Src)) {
792       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
793         // Fold the zext or sext when it won't become a noop.
794         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
795           if (!isIntExtFree(ZE) &&
796               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
797             Addr.setExtendType(AArch64_AM::UXTW);
798             Src = ZE->getOperand(0);
799           }
800         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
801           if (!isIntExtFree(SE) &&
802               SE->getOperand(0)->getType()->isIntegerTy(32)) {
803             Addr.setExtendType(AArch64_AM::SXTW);
804             Src = SE->getOperand(0);
805           }
806         }
807       }
808     }
809 
810     unsigned Reg = getRegForValue(Src);
811     if (!Reg)
812       return false;
813     Addr.setOffsetReg(Reg);
814     return true;
815   }
816   case Instruction::And: {
817     if (Addr.getOffsetReg())
818       break;
819 
820     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
821       break;
822 
823     const Value *LHS = U->getOperand(0);
824     const Value *RHS = U->getOperand(1);
825 
826     if (const auto *C = dyn_cast<ConstantInt>(LHS))
827       if (C->getValue() == 0xffffffff)
828         std::swap(LHS, RHS);
829 
830     if (const auto *C = dyn_cast<ConstantInt>(RHS))
831       if (C->getValue() == 0xffffffff) {
832         Addr.setShift(0);
833         Addr.setExtendType(AArch64_AM::LSL);
834         Addr.setExtendType(AArch64_AM::UXTW);
835 
836         unsigned Reg = getRegForValue(LHS);
837         if (!Reg)
838           return false;
839         bool RegIsKill = hasTrivialKill(LHS);
840         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill,
841                                          AArch64::sub_32);
842         Addr.setOffsetReg(Reg);
843         return true;
844       }
845     break;
846   }
847   case Instruction::SExt:
848   case Instruction::ZExt: {
849     if (!Addr.getReg() || Addr.getOffsetReg())
850       break;
851 
852     const Value *Src = nullptr;
853     // Fold the zext or sext when it won't become a noop.
854     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
855       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
856         Addr.setExtendType(AArch64_AM::UXTW);
857         Src = ZE->getOperand(0);
858       }
859     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
860       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
861         Addr.setExtendType(AArch64_AM::SXTW);
862         Src = SE->getOperand(0);
863       }
864     }
865 
866     if (!Src)
867       break;
868 
869     Addr.setShift(0);
870     unsigned Reg = getRegForValue(Src);
871     if (!Reg)
872       return false;
873     Addr.setOffsetReg(Reg);
874     return true;
875   }
876   } // end switch
877 
878   if (Addr.isRegBase() && !Addr.getReg()) {
879     unsigned Reg = getRegForValue(Obj);
880     if (!Reg)
881       return false;
882     Addr.setReg(Reg);
883     return true;
884   }
885 
886   if (!Addr.getOffsetReg()) {
887     unsigned Reg = getRegForValue(Obj);
888     if (!Reg)
889       return false;
890     Addr.setOffsetReg(Reg);
891     return true;
892   }
893 
894   return false;
895 }
896 
897 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
898   const User *U = nullptr;
899   unsigned Opcode = Instruction::UserOp1;
900   bool InMBB = true;
901 
902   if (const auto *I = dyn_cast<Instruction>(V)) {
903     Opcode = I->getOpcode();
904     U = I;
905     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
906   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
907     Opcode = C->getOpcode();
908     U = C;
909   }
910 
911   switch (Opcode) {
912   default: break;
913   case Instruction::BitCast:
914     // Look past bitcasts if its operand is in the same BB.
915     if (InMBB)
916       return computeCallAddress(U->getOperand(0), Addr);
917     break;
918   case Instruction::IntToPtr:
919     // Look past no-op inttoptrs if its operand is in the same BB.
920     if (InMBB &&
921         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
922             TLI.getPointerTy(DL))
923       return computeCallAddress(U->getOperand(0), Addr);
924     break;
925   case Instruction::PtrToInt:
926     // Look past no-op ptrtoints if its operand is in the same BB.
927     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
928       return computeCallAddress(U->getOperand(0), Addr);
929     break;
930   }
931 
932   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
933     Addr.setGlobalValue(GV);
934     return true;
935   }
936 
937   // If all else fails, try to materialize the value in a register.
938   if (!Addr.getGlobalValue()) {
939     Addr.setReg(getRegForValue(V));
940     return Addr.getReg() != 0;
941   }
942 
943   return false;
944 }
945 
946 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
947   EVT evt = TLI.getValueType(DL, Ty, true);
948 
949   // Only handle simple types.
950   if (evt == MVT::Other || !evt.isSimple())
951     return false;
952   VT = evt.getSimpleVT();
953 
954   // This is a legal type, but it's not something we handle in fast-isel.
955   if (VT == MVT::f128)
956     return false;
957 
958   // Handle all other legal types, i.e. a register that will directly hold this
959   // value.
960   return TLI.isTypeLegal(VT);
961 }
962 
963 /// Determine if the value type is supported by FastISel.
964 ///
965 /// FastISel for AArch64 can handle more value types than are legal. This adds
966 /// simple value type such as i1, i8, and i16.
967 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
968   if (Ty->isVectorTy() && !IsVectorAllowed)
969     return false;
970 
971   if (isTypeLegal(Ty, VT))
972     return true;
973 
974   // If this is a type than can be sign or zero-extended to a basic operation
975   // go ahead and accept it now.
976   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
977     return true;
978 
979   return false;
980 }
981 
982 bool AArch64FastISel::isValueAvailable(const Value *V) const {
983   if (!isa<Instruction>(V))
984     return true;
985 
986   const auto *I = cast<Instruction>(V);
987   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
988 }
989 
990 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
991   unsigned ScaleFactor = getImplicitScaleFactor(VT);
992   if (!ScaleFactor)
993     return false;
994 
995   bool ImmediateOffsetNeedsLowering = false;
996   bool RegisterOffsetNeedsLowering = false;
997   int64_t Offset = Addr.getOffset();
998   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
999     ImmediateOffsetNeedsLowering = true;
1000   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1001            !isUInt<12>(Offset / ScaleFactor))
1002     ImmediateOffsetNeedsLowering = true;
1003 
1004   // Cannot encode an offset register and an immediate offset in the same
1005   // instruction. Fold the immediate offset into the load/store instruction and
1006   // emit an additional add to take care of the offset register.
1007   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1008     RegisterOffsetNeedsLowering = true;
1009 
1010   // Cannot encode zero register as base.
1011   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1012     RegisterOffsetNeedsLowering = true;
1013 
1014   // If this is a stack pointer and the offset needs to be simplified then put
1015   // the alloca address into a register, set the base type back to register and
1016   // continue. This should almost never happen.
1017   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1018   {
1019     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1020     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1021             ResultReg)
1022       .addFrameIndex(Addr.getFI())
1023       .addImm(0)
1024       .addImm(0);
1025     Addr.setKind(Address::RegBase);
1026     Addr.setReg(ResultReg);
1027   }
1028 
1029   if (RegisterOffsetNeedsLowering) {
1030     unsigned ResultReg = 0;
1031     if (Addr.getReg()) {
1032       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1033           Addr.getExtendType() == AArch64_AM::UXTW   )
1034         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1035                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1036                                   /*TODO:IsKill=*/false, Addr.getExtendType(),
1037                                   Addr.getShift());
1038       else
1039         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1040                                   /*TODO:IsKill=*/false, Addr.getOffsetReg(),
1041                                   /*TODO:IsKill=*/false, AArch64_AM::LSL,
1042                                   Addr.getShift());
1043     } else {
1044       if (Addr.getExtendType() == AArch64_AM::UXTW)
1045         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1046                                /*Op0IsKill=*/false, Addr.getShift(),
1047                                /*IsZExt=*/true);
1048       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1049         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1050                                /*Op0IsKill=*/false, Addr.getShift(),
1051                                /*IsZExt=*/false);
1052       else
1053         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1054                                /*Op0IsKill=*/false, Addr.getShift());
1055     }
1056     if (!ResultReg)
1057       return false;
1058 
1059     Addr.setReg(ResultReg);
1060     Addr.setOffsetReg(0);
1061     Addr.setShift(0);
1062     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1063   }
1064 
1065   // Since the offset is too large for the load/store instruction get the
1066   // reg+offset into a register.
1067   if (ImmediateOffsetNeedsLowering) {
1068     unsigned ResultReg;
1069     if (Addr.getReg())
1070       // Try to fold the immediate into the add instruction.
1071       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset);
1072     else
1073       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1074 
1075     if (!ResultReg)
1076       return false;
1077     Addr.setReg(ResultReg);
1078     Addr.setOffset(0);
1079   }
1080   return true;
1081 }
1082 
1083 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1084                                            const MachineInstrBuilder &MIB,
1085                                            MachineMemOperand::Flags Flags,
1086                                            unsigned ScaleFactor,
1087                                            MachineMemOperand *MMO) {
1088   int64_t Offset = Addr.getOffset() / ScaleFactor;
1089   // Frame base works a bit differently. Handle it separately.
1090   if (Addr.isFIBase()) {
1091     int FI = Addr.getFI();
1092     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1093     // and alignment should be based on the VT.
1094     MMO = FuncInfo.MF->getMachineMemOperand(
1095         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1096         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
1097     // Now add the rest of the operands.
1098     MIB.addFrameIndex(FI).addImm(Offset);
1099   } else {
1100     assert(Addr.isRegBase() && "Unexpected address kind.");
1101     const MCInstrDesc &II = MIB->getDesc();
1102     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1103     Addr.setReg(
1104       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1105     Addr.setOffsetReg(
1106       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1107     if (Addr.getOffsetReg()) {
1108       assert(Addr.getOffset() == 0 && "Unexpected offset");
1109       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1110                       Addr.getExtendType() == AArch64_AM::SXTX;
1111       MIB.addReg(Addr.getReg());
1112       MIB.addReg(Addr.getOffsetReg());
1113       MIB.addImm(IsSigned);
1114       MIB.addImm(Addr.getShift() != 0);
1115     } else
1116       MIB.addReg(Addr.getReg()).addImm(Offset);
1117   }
1118 
1119   if (MMO)
1120     MIB.addMemOperand(MMO);
1121 }
1122 
1123 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1124                                      const Value *RHS, bool SetFlags,
1125                                      bool WantResult,  bool IsZExt) {
1126   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1127   bool NeedExtend = false;
1128   switch (RetVT.SimpleTy) {
1129   default:
1130     return 0;
1131   case MVT::i1:
1132     NeedExtend = true;
1133     break;
1134   case MVT::i8:
1135     NeedExtend = true;
1136     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1137     break;
1138   case MVT::i16:
1139     NeedExtend = true;
1140     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1141     break;
1142   case MVT::i32:  // fall-through
1143   case MVT::i64:
1144     break;
1145   }
1146   MVT SrcVT = RetVT;
1147   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1148 
1149   // Canonicalize immediates to the RHS first.
1150   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1151     std::swap(LHS, RHS);
1152 
1153   // Canonicalize mul by power of 2 to the RHS.
1154   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1155     if (isMulPowOf2(LHS))
1156       std::swap(LHS, RHS);
1157 
1158   // Canonicalize shift immediate to the RHS.
1159   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1160     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1161       if (isa<ConstantInt>(SI->getOperand(1)))
1162         if (SI->getOpcode() == Instruction::Shl  ||
1163             SI->getOpcode() == Instruction::LShr ||
1164             SI->getOpcode() == Instruction::AShr   )
1165           std::swap(LHS, RHS);
1166 
1167   unsigned LHSReg = getRegForValue(LHS);
1168   if (!LHSReg)
1169     return 0;
1170   bool LHSIsKill = hasTrivialKill(LHS);
1171 
1172   if (NeedExtend)
1173     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1174 
1175   unsigned ResultReg = 0;
1176   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1177     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1178     if (C->isNegative())
1179       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm,
1180                                 SetFlags, WantResult);
1181     else
1182       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags,
1183                                 WantResult);
1184   } else if (const auto *C = dyn_cast<Constant>(RHS))
1185     if (C->isNullValue())
1186       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags,
1187                                 WantResult);
1188 
1189   if (ResultReg)
1190     return ResultReg;
1191 
1192   // Only extend the RHS within the instruction if there is a valid extend type.
1193   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1194       isValueAvailable(RHS)) {
1195     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1196       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1197         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1198           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1199           if (!RHSReg)
1200             return 0;
1201           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1202           return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1203                                RHSIsKill, ExtendType, C->getZExtValue(),
1204                                SetFlags, WantResult);
1205         }
1206     unsigned RHSReg = getRegForValue(RHS);
1207     if (!RHSReg)
1208       return 0;
1209     bool RHSIsKill = hasTrivialKill(RHS);
1210     return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1211                          ExtendType, 0, SetFlags, WantResult);
1212   }
1213 
1214   // Check if the mul can be folded into the instruction.
1215   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1216     if (isMulPowOf2(RHS)) {
1217       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1218       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1219 
1220       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1221         if (C->getValue().isPowerOf2())
1222           std::swap(MulLHS, MulRHS);
1223 
1224       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1225       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1226       unsigned RHSReg = getRegForValue(MulLHS);
1227       if (!RHSReg)
1228         return 0;
1229       bool RHSIsKill = hasTrivialKill(MulLHS);
1230       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1231                                 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags,
1232                                 WantResult);
1233       if (ResultReg)
1234         return ResultReg;
1235     }
1236   }
1237 
1238   // Check if the shift can be folded into the instruction.
1239   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1240     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1241       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1242         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1243         switch (SI->getOpcode()) {
1244         default: break;
1245         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1246         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1247         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1248         }
1249         uint64_t ShiftVal = C->getZExtValue();
1250         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1251           unsigned RHSReg = getRegForValue(SI->getOperand(0));
1252           if (!RHSReg)
1253             return 0;
1254           bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1255           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg,
1256                                     RHSIsKill, ShiftType, ShiftVal, SetFlags,
1257                                     WantResult);
1258           if (ResultReg)
1259             return ResultReg;
1260         }
1261       }
1262     }
1263   }
1264 
1265   unsigned RHSReg = getRegForValue(RHS);
1266   if (!RHSReg)
1267     return 0;
1268   bool RHSIsKill = hasTrivialKill(RHS);
1269 
1270   if (NeedExtend)
1271     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1272 
1273   return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1274                        SetFlags, WantResult);
1275 }
1276 
1277 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1278                                         bool LHSIsKill, unsigned RHSReg,
1279                                         bool RHSIsKill, bool SetFlags,
1280                                         bool WantResult) {
1281   assert(LHSReg && RHSReg && "Invalid register number.");
1282 
1283   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1284       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1285     return 0;
1286 
1287   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1288     return 0;
1289 
1290   static const unsigned OpcTable[2][2][2] = {
1291     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1292       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1293     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1294       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1295   };
1296   bool Is64Bit = RetVT == MVT::i64;
1297   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1298   const TargetRegisterClass *RC =
1299       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1300   unsigned ResultReg;
1301   if (WantResult)
1302     ResultReg = createResultReg(RC);
1303   else
1304     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1305 
1306   const MCInstrDesc &II = TII.get(Opc);
1307   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1308   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1309   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1310       .addReg(LHSReg, getKillRegState(LHSIsKill))
1311       .addReg(RHSReg, getKillRegState(RHSIsKill));
1312   return ResultReg;
1313 }
1314 
1315 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1316                                         bool LHSIsKill, uint64_t Imm,
1317                                         bool SetFlags, bool WantResult) {
1318   assert(LHSReg && "Invalid register number.");
1319 
1320   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1321     return 0;
1322 
1323   unsigned ShiftImm;
1324   if (isUInt<12>(Imm))
1325     ShiftImm = 0;
1326   else if ((Imm & 0xfff000) == Imm) {
1327     ShiftImm = 12;
1328     Imm >>= 12;
1329   } else
1330     return 0;
1331 
1332   static const unsigned OpcTable[2][2][2] = {
1333     { { AArch64::SUBWri,  AArch64::SUBXri  },
1334       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1335     { { AArch64::SUBSWri, AArch64::SUBSXri },
1336       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1337   };
1338   bool Is64Bit = RetVT == MVT::i64;
1339   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1340   const TargetRegisterClass *RC;
1341   if (SetFlags)
1342     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1343   else
1344     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1345   unsigned ResultReg;
1346   if (WantResult)
1347     ResultReg = createResultReg(RC);
1348   else
1349     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1350 
1351   const MCInstrDesc &II = TII.get(Opc);
1352   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1353   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1354       .addReg(LHSReg, getKillRegState(LHSIsKill))
1355       .addImm(Imm)
1356       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1357   return ResultReg;
1358 }
1359 
1360 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1361                                         bool LHSIsKill, unsigned RHSReg,
1362                                         bool RHSIsKill,
1363                                         AArch64_AM::ShiftExtendType ShiftType,
1364                                         uint64_t ShiftImm, bool SetFlags,
1365                                         bool WantResult) {
1366   assert(LHSReg && RHSReg && "Invalid register number.");
1367   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1368          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1369 
1370   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1371     return 0;
1372 
1373   // Don't deal with undefined shifts.
1374   if (ShiftImm >= RetVT.getSizeInBits())
1375     return 0;
1376 
1377   static const unsigned OpcTable[2][2][2] = {
1378     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1379       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1380     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1381       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1382   };
1383   bool Is64Bit = RetVT == MVT::i64;
1384   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1385   const TargetRegisterClass *RC =
1386       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1387   unsigned ResultReg;
1388   if (WantResult)
1389     ResultReg = createResultReg(RC);
1390   else
1391     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1392 
1393   const MCInstrDesc &II = TII.get(Opc);
1394   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1395   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1396   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1397       .addReg(LHSReg, getKillRegState(LHSIsKill))
1398       .addReg(RHSReg, getKillRegState(RHSIsKill))
1399       .addImm(getShifterImm(ShiftType, ShiftImm));
1400   return ResultReg;
1401 }
1402 
1403 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1404                                         bool LHSIsKill, unsigned RHSReg,
1405                                         bool RHSIsKill,
1406                                         AArch64_AM::ShiftExtendType ExtType,
1407                                         uint64_t ShiftImm, bool SetFlags,
1408                                         bool WantResult) {
1409   assert(LHSReg && RHSReg && "Invalid register number.");
1410   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1411          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1412 
1413   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1414     return 0;
1415 
1416   if (ShiftImm >= 4)
1417     return 0;
1418 
1419   static const unsigned OpcTable[2][2][2] = {
1420     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1421       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1422     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1423       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1424   };
1425   bool Is64Bit = RetVT == MVT::i64;
1426   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1427   const TargetRegisterClass *RC = nullptr;
1428   if (SetFlags)
1429     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1430   else
1431     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1432   unsigned ResultReg;
1433   if (WantResult)
1434     ResultReg = createResultReg(RC);
1435   else
1436     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1437 
1438   const MCInstrDesc &II = TII.get(Opc);
1439   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1440   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1441   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1442       .addReg(LHSReg, getKillRegState(LHSIsKill))
1443       .addReg(RHSReg, getKillRegState(RHSIsKill))
1444       .addImm(getArithExtendImm(ExtType, ShiftImm));
1445   return ResultReg;
1446 }
1447 
1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1449   Type *Ty = LHS->getType();
1450   EVT EVT = TLI.getValueType(DL, Ty, true);
1451   if (!EVT.isSimple())
1452     return false;
1453   MVT VT = EVT.getSimpleVT();
1454 
1455   switch (VT.SimpleTy) {
1456   default:
1457     return false;
1458   case MVT::i1:
1459   case MVT::i8:
1460   case MVT::i16:
1461   case MVT::i32:
1462   case MVT::i64:
1463     return emitICmp(VT, LHS, RHS, IsZExt);
1464   case MVT::f32:
1465   case MVT::f64:
1466     return emitFCmp(VT, LHS, RHS);
1467   }
1468 }
1469 
1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1471                                bool IsZExt) {
1472   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1473                  IsZExt) != 0;
1474 }
1475 
1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1477                                   uint64_t Imm) {
1478   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm,
1479                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1480 }
1481 
1482 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1483   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1484     return false;
1485 
1486   // Check to see if the 2nd operand is a constant that we can encode directly
1487   // in the compare.
1488   bool UseImm = false;
1489   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1490     if (CFP->isZero() && !CFP->isNegative())
1491       UseImm = true;
1492 
1493   unsigned LHSReg = getRegForValue(LHS);
1494   if (!LHSReg)
1495     return false;
1496   bool LHSIsKill = hasTrivialKill(LHS);
1497 
1498   if (UseImm) {
1499     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1500     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1501         .addReg(LHSReg, getKillRegState(LHSIsKill));
1502     return true;
1503   }
1504 
1505   unsigned RHSReg = getRegForValue(RHS);
1506   if (!RHSReg)
1507     return false;
1508   bool RHSIsKill = hasTrivialKill(RHS);
1509 
1510   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1511   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1512       .addReg(LHSReg, getKillRegState(LHSIsKill))
1513       .addReg(RHSReg, getKillRegState(RHSIsKill));
1514   return true;
1515 }
1516 
1517 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1518                                   bool SetFlags, bool WantResult, bool IsZExt) {
1519   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1520                     IsZExt);
1521 }
1522 
1523 /// This method is a wrapper to simplify add emission.
1524 ///
1525 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1526 /// that fails, then try to materialize the immediate into a register and use
1527 /// emitAddSub_rr instead.
1528 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill,
1529                                       int64_t Imm) {
1530   unsigned ResultReg;
1531   if (Imm < 0)
1532     ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm);
1533   else
1534     ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm);
1535 
1536   if (ResultReg)
1537     return ResultReg;
1538 
1539   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1540   if (!CReg)
1541     return 0;
1542 
1543   ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true);
1544   return ResultReg;
1545 }
1546 
1547 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1548                                   bool SetFlags, bool WantResult, bool IsZExt) {
1549   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1550                     IsZExt);
1551 }
1552 
1553 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1554                                       bool LHSIsKill, unsigned RHSReg,
1555                                       bool RHSIsKill, bool WantResult) {
1556   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1557                        RHSIsKill, /*SetFlags=*/true, WantResult);
1558 }
1559 
1560 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1561                                       bool LHSIsKill, unsigned RHSReg,
1562                                       bool RHSIsKill,
1563                                       AArch64_AM::ShiftExtendType ShiftType,
1564                                       uint64_t ShiftImm, bool WantResult) {
1565   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg,
1566                        RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true,
1567                        WantResult);
1568 }
1569 
1570 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1571                                         const Value *LHS, const Value *RHS) {
1572   // Canonicalize immediates to the RHS first.
1573   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1574     std::swap(LHS, RHS);
1575 
1576   // Canonicalize mul by power-of-2 to the RHS.
1577   if (LHS->hasOneUse() && isValueAvailable(LHS))
1578     if (isMulPowOf2(LHS))
1579       std::swap(LHS, RHS);
1580 
1581   // Canonicalize shift immediate to the RHS.
1582   if (LHS->hasOneUse() && isValueAvailable(LHS))
1583     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1584       if (isa<ConstantInt>(SI->getOperand(1)))
1585         std::swap(LHS, RHS);
1586 
1587   unsigned LHSReg = getRegForValue(LHS);
1588   if (!LHSReg)
1589     return 0;
1590   bool LHSIsKill = hasTrivialKill(LHS);
1591 
1592   unsigned ResultReg = 0;
1593   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1594     uint64_t Imm = C->getZExtValue();
1595     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm);
1596   }
1597   if (ResultReg)
1598     return ResultReg;
1599 
1600   // Check if the mul can be folded into the instruction.
1601   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1602     if (isMulPowOf2(RHS)) {
1603       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1604       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1605 
1606       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1607         if (C->getValue().isPowerOf2())
1608           std::swap(MulLHS, MulRHS);
1609 
1610       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1611       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1612 
1613       unsigned RHSReg = getRegForValue(MulLHS);
1614       if (!RHSReg)
1615         return 0;
1616       bool RHSIsKill = hasTrivialKill(MulLHS);
1617       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1618                                    RHSIsKill, ShiftVal);
1619       if (ResultReg)
1620         return ResultReg;
1621     }
1622   }
1623 
1624   // Check if the shift can be folded into the instruction.
1625   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1626     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1627       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1628         uint64_t ShiftVal = C->getZExtValue();
1629         unsigned RHSReg = getRegForValue(SI->getOperand(0));
1630         if (!RHSReg)
1631           return 0;
1632         bool RHSIsKill = hasTrivialKill(SI->getOperand(0));
1633         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg,
1634                                      RHSIsKill, ShiftVal);
1635         if (ResultReg)
1636           return ResultReg;
1637       }
1638   }
1639 
1640   unsigned RHSReg = getRegForValue(RHS);
1641   if (!RHSReg)
1642     return 0;
1643   bool RHSIsKill = hasTrivialKill(RHS);
1644 
1645   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1646   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
1647   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1648     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1649     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1650   }
1651   return ResultReg;
1652 }
1653 
1654 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1655                                            unsigned LHSReg, bool LHSIsKill,
1656                                            uint64_t Imm) {
1657   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1658                 "ISD nodes are not consecutive!");
1659   static const unsigned OpcTable[3][2] = {
1660     { AArch64::ANDWri, AArch64::ANDXri },
1661     { AArch64::ORRWri, AArch64::ORRXri },
1662     { AArch64::EORWri, AArch64::EORXri }
1663   };
1664   const TargetRegisterClass *RC;
1665   unsigned Opc;
1666   unsigned RegSize;
1667   switch (RetVT.SimpleTy) {
1668   default:
1669     return 0;
1670   case MVT::i1:
1671   case MVT::i8:
1672   case MVT::i16:
1673   case MVT::i32: {
1674     unsigned Idx = ISDOpc - ISD::AND;
1675     Opc = OpcTable[Idx][0];
1676     RC = &AArch64::GPR32spRegClass;
1677     RegSize = 32;
1678     break;
1679   }
1680   case MVT::i64:
1681     Opc = OpcTable[ISDOpc - ISD::AND][1];
1682     RC = &AArch64::GPR64spRegClass;
1683     RegSize = 64;
1684     break;
1685   }
1686 
1687   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1688     return 0;
1689 
1690   unsigned ResultReg =
1691       fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill,
1692                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1693   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1694     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1695     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1696   }
1697   return ResultReg;
1698 }
1699 
1700 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1701                                            unsigned LHSReg, bool LHSIsKill,
1702                                            unsigned RHSReg, bool RHSIsKill,
1703                                            uint64_t ShiftImm) {
1704   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1705                 "ISD nodes are not consecutive!");
1706   static const unsigned OpcTable[3][2] = {
1707     { AArch64::ANDWrs, AArch64::ANDXrs },
1708     { AArch64::ORRWrs, AArch64::ORRXrs },
1709     { AArch64::EORWrs, AArch64::EORXrs }
1710   };
1711 
1712   // Don't deal with undefined shifts.
1713   if (ShiftImm >= RetVT.getSizeInBits())
1714     return 0;
1715 
1716   const TargetRegisterClass *RC;
1717   unsigned Opc;
1718   switch (RetVT.SimpleTy) {
1719   default:
1720     return 0;
1721   case MVT::i1:
1722   case MVT::i8:
1723   case MVT::i16:
1724   case MVT::i32:
1725     Opc = OpcTable[ISDOpc - ISD::AND][0];
1726     RC = &AArch64::GPR32RegClass;
1727     break;
1728   case MVT::i64:
1729     Opc = OpcTable[ISDOpc - ISD::AND][1];
1730     RC = &AArch64::GPR64RegClass;
1731     break;
1732   }
1733   unsigned ResultReg =
1734       fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill,
1735                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1736   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1737     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1738     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
1739   }
1740   return ResultReg;
1741 }
1742 
1743 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill,
1744                                      uint64_t Imm) {
1745   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm);
1746 }
1747 
1748 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1749                                    bool WantZExt, MachineMemOperand *MMO) {
1750   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1751     return 0;
1752 
1753   // Simplify this down to something we can handle.
1754   if (!simplifyAddress(Addr, VT))
1755     return 0;
1756 
1757   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1758   if (!ScaleFactor)
1759     llvm_unreachable("Unexpected value type.");
1760 
1761   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1762   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1763   bool UseScaled = true;
1764   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1765     UseScaled = false;
1766     ScaleFactor = 1;
1767   }
1768 
1769   static const unsigned GPOpcTable[2][8][4] = {
1770     // Sign-extend.
1771     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1772         AArch64::LDURXi  },
1773       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1774         AArch64::LDURXi  },
1775       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1776         AArch64::LDRXui  },
1777       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1778         AArch64::LDRXui  },
1779       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1780         AArch64::LDRXroX },
1781       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1782         AArch64::LDRXroX },
1783       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1784         AArch64::LDRXroW },
1785       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1786         AArch64::LDRXroW }
1787     },
1788     // Zero-extend.
1789     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1790         AArch64::LDURXi  },
1791       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1792         AArch64::LDURXi  },
1793       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1794         AArch64::LDRXui  },
1795       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1796         AArch64::LDRXui  },
1797       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1798         AArch64::LDRXroX },
1799       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1800         AArch64::LDRXroX },
1801       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1802         AArch64::LDRXroW },
1803       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1804         AArch64::LDRXroW }
1805     }
1806   };
1807 
1808   static const unsigned FPOpcTable[4][2] = {
1809     { AArch64::LDURSi,  AArch64::LDURDi  },
1810     { AArch64::LDRSui,  AArch64::LDRDui  },
1811     { AArch64::LDRSroX, AArch64::LDRDroX },
1812     { AArch64::LDRSroW, AArch64::LDRDroW }
1813   };
1814 
1815   unsigned Opc;
1816   const TargetRegisterClass *RC;
1817   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1818                       Addr.getOffsetReg();
1819   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1820   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1821       Addr.getExtendType() == AArch64_AM::SXTW)
1822     Idx++;
1823 
1824   bool IsRet64Bit = RetVT == MVT::i64;
1825   switch (VT.SimpleTy) {
1826   default:
1827     llvm_unreachable("Unexpected value type.");
1828   case MVT::i1: // Intentional fall-through.
1829   case MVT::i8:
1830     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1831     RC = (IsRet64Bit && !WantZExt) ?
1832              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1833     break;
1834   case MVT::i16:
1835     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1836     RC = (IsRet64Bit && !WantZExt) ?
1837              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1838     break;
1839   case MVT::i32:
1840     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1841     RC = (IsRet64Bit && !WantZExt) ?
1842              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1843     break;
1844   case MVT::i64:
1845     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1846     RC = &AArch64::GPR64RegClass;
1847     break;
1848   case MVT::f32:
1849     Opc = FPOpcTable[Idx][0];
1850     RC = &AArch64::FPR32RegClass;
1851     break;
1852   case MVT::f64:
1853     Opc = FPOpcTable[Idx][1];
1854     RC = &AArch64::FPR64RegClass;
1855     break;
1856   }
1857 
1858   // Create the base instruction, then add the operands.
1859   unsigned ResultReg = createResultReg(RC);
1860   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1861                                     TII.get(Opc), ResultReg);
1862   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1863 
1864   // Loading an i1 requires special handling.
1865   if (VT == MVT::i1) {
1866     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1);
1867     assert(ANDReg && "Unexpected AND instruction emission failure.");
1868     ResultReg = ANDReg;
1869   }
1870 
1871   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1872   // the 32bit reg to a 64bit reg.
1873   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1874     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
1875     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1876             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1877         .addImm(0)
1878         .addReg(ResultReg, getKillRegState(true))
1879         .addImm(AArch64::sub_32);
1880     ResultReg = Reg64;
1881   }
1882   return ResultReg;
1883 }
1884 
1885 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1886   MVT VT;
1887   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1888     return false;
1889 
1890   if (VT.isVector())
1891     return selectOperator(I, I->getOpcode());
1892 
1893   unsigned ResultReg;
1894   switch (I->getOpcode()) {
1895   default:
1896     llvm_unreachable("Unexpected instruction.");
1897   case Instruction::Add:
1898     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1899     break;
1900   case Instruction::Sub:
1901     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1902     break;
1903   }
1904   if (!ResultReg)
1905     return false;
1906 
1907   updateValueMap(I, ResultReg);
1908   return true;
1909 }
1910 
1911 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1912   MVT VT;
1913   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1914     return false;
1915 
1916   if (VT.isVector())
1917     return selectOperator(I, I->getOpcode());
1918 
1919   unsigned ResultReg;
1920   switch (I->getOpcode()) {
1921   default:
1922     llvm_unreachable("Unexpected instruction.");
1923   case Instruction::And:
1924     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1925     break;
1926   case Instruction::Or:
1927     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1928     break;
1929   case Instruction::Xor:
1930     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1931     break;
1932   }
1933   if (!ResultReg)
1934     return false;
1935 
1936   updateValueMap(I, ResultReg);
1937   return true;
1938 }
1939 
1940 bool AArch64FastISel::selectLoad(const Instruction *I) {
1941   MVT VT;
1942   // Verify we have a legal type before going any further.  Currently, we handle
1943   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1944   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1945   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1946       cast<LoadInst>(I)->isAtomic())
1947     return false;
1948 
1949   const Value *SV = I->getOperand(0);
1950   if (TLI.supportSwiftError()) {
1951     // Swifterror values can come from either a function parameter with
1952     // swifterror attribute or an alloca with swifterror attribute.
1953     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1954       if (Arg->hasSwiftErrorAttr())
1955         return false;
1956     }
1957 
1958     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1959       if (Alloca->isSwiftError())
1960         return false;
1961     }
1962   }
1963 
1964   // See if we can handle this address.
1965   Address Addr;
1966   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1967     return false;
1968 
1969   // Fold the following sign-/zero-extend into the load instruction.
1970   bool WantZExt = true;
1971   MVT RetVT = VT;
1972   const Value *IntExtVal = nullptr;
1973   if (I->hasOneUse()) {
1974     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1975       if (isTypeSupported(ZE->getType(), RetVT))
1976         IntExtVal = ZE;
1977       else
1978         RetVT = VT;
1979     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1980       if (isTypeSupported(SE->getType(), RetVT))
1981         IntExtVal = SE;
1982       else
1983         RetVT = VT;
1984       WantZExt = false;
1985     }
1986   }
1987 
1988   unsigned ResultReg =
1989       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1990   if (!ResultReg)
1991     return false;
1992 
1993   // There are a few different cases we have to handle, because the load or the
1994   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1995   // SelectionDAG. There is also an ordering issue when both instructions are in
1996   // different basic blocks.
1997   // 1.) The load instruction is selected by FastISel, but the integer extend
1998   //     not. This usually happens when the integer extend is in a different
1999   //     basic block and SelectionDAG took over for that basic block.
2000   // 2.) The load instruction is selected before the integer extend. This only
2001   //     happens when the integer extend is in a different basic block.
2002   // 3.) The load instruction is selected by SelectionDAG and the integer extend
2003   //     by FastISel. This happens if there are instructions between the load
2004   //     and the integer extend that couldn't be selected by FastISel.
2005   if (IntExtVal) {
2006     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2007     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2008     // it when it selects the integer extend.
2009     unsigned Reg = lookUpRegForValue(IntExtVal);
2010     auto *MI = MRI.getUniqueVRegDef(Reg);
2011     if (!MI) {
2012       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2013         if (WantZExt) {
2014           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2015           MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2016           ResultReg = std::prev(I)->getOperand(0).getReg();
2017           removeDeadCode(I, std::next(I));
2018         } else
2019           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2020                                                  /*IsKill=*/true,
2021                                                  AArch64::sub_32);
2022       }
2023       updateValueMap(I, ResultReg);
2024       return true;
2025     }
2026 
2027     // The integer extend has already been emitted - delete all the instructions
2028     // that have been emitted by the integer extend lowering code and use the
2029     // result from the load instruction directly.
2030     while (MI) {
2031       Reg = 0;
2032       for (auto &Opnd : MI->uses()) {
2033         if (Opnd.isReg()) {
2034           Reg = Opnd.getReg();
2035           break;
2036         }
2037       }
2038       MachineBasicBlock::iterator I(MI);
2039       removeDeadCode(I, std::next(I));
2040       MI = nullptr;
2041       if (Reg)
2042         MI = MRI.getUniqueVRegDef(Reg);
2043     }
2044     updateValueMap(IntExtVal, ResultReg);
2045     return true;
2046   }
2047 
2048   updateValueMap(I, ResultReg);
2049   return true;
2050 }
2051 
2052 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2053                                        unsigned AddrReg,
2054                                        MachineMemOperand *MMO) {
2055   unsigned Opc;
2056   switch (VT.SimpleTy) {
2057   default: return false;
2058   case MVT::i8:  Opc = AArch64::STLRB; break;
2059   case MVT::i16: Opc = AArch64::STLRH; break;
2060   case MVT::i32: Opc = AArch64::STLRW; break;
2061   case MVT::i64: Opc = AArch64::STLRX; break;
2062   }
2063 
2064   const MCInstrDesc &II = TII.get(Opc);
2065   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2066   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2067   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2068       .addReg(SrcReg)
2069       .addReg(AddrReg)
2070       .addMemOperand(MMO);
2071   return true;
2072 }
2073 
2074 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2075                                 MachineMemOperand *MMO) {
2076   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2077     return false;
2078 
2079   // Simplify this down to something we can handle.
2080   if (!simplifyAddress(Addr, VT))
2081     return false;
2082 
2083   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2084   if (!ScaleFactor)
2085     llvm_unreachable("Unexpected value type.");
2086 
2087   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2088   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2089   bool UseScaled = true;
2090   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2091     UseScaled = false;
2092     ScaleFactor = 1;
2093   }
2094 
2095   static const unsigned OpcTable[4][6] = {
2096     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2097       AArch64::STURSi,   AArch64::STURDi },
2098     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2099       AArch64::STRSui,   AArch64::STRDui },
2100     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2101       AArch64::STRSroX,  AArch64::STRDroX },
2102     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2103       AArch64::STRSroW,  AArch64::STRDroW }
2104   };
2105 
2106   unsigned Opc;
2107   bool VTIsi1 = false;
2108   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2109                       Addr.getOffsetReg();
2110   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2111   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2112       Addr.getExtendType() == AArch64_AM::SXTW)
2113     Idx++;
2114 
2115   switch (VT.SimpleTy) {
2116   default: llvm_unreachable("Unexpected value type.");
2117   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2118   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2119   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2120   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2121   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2122   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2123   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2124   }
2125 
2126   // Storing an i1 requires special handling.
2127   if (VTIsi1 && SrcReg != AArch64::WZR) {
2128     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
2129     assert(ANDReg && "Unexpected AND instruction emission failure.");
2130     SrcReg = ANDReg;
2131   }
2132   // Create the base instruction, then add the operands.
2133   const MCInstrDesc &II = TII.get(Opc);
2134   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2135   MachineInstrBuilder MIB =
2136       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2137   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2138 
2139   return true;
2140 }
2141 
2142 bool AArch64FastISel::selectStore(const Instruction *I) {
2143   MVT VT;
2144   const Value *Op0 = I->getOperand(0);
2145   // Verify we have a legal type before going any further.  Currently, we handle
2146   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2147   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2148   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2149     return false;
2150 
2151   const Value *PtrV = I->getOperand(1);
2152   if (TLI.supportSwiftError()) {
2153     // Swifterror values can come from either a function parameter with
2154     // swifterror attribute or an alloca with swifterror attribute.
2155     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2156       if (Arg->hasSwiftErrorAttr())
2157         return false;
2158     }
2159 
2160     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2161       if (Alloca->isSwiftError())
2162         return false;
2163     }
2164   }
2165 
2166   // Get the value to be stored into a register. Use the zero register directly
2167   // when possible to avoid an unnecessary copy and a wasted register.
2168   unsigned SrcReg = 0;
2169   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2170     if (CI->isZero())
2171       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2172   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2173     if (CF->isZero() && !CF->isNegative()) {
2174       VT = MVT::getIntegerVT(VT.getSizeInBits());
2175       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2176     }
2177   }
2178 
2179   if (!SrcReg)
2180     SrcReg = getRegForValue(Op0);
2181 
2182   if (!SrcReg)
2183     return false;
2184 
2185   auto *SI = cast<StoreInst>(I);
2186 
2187   // Try to emit a STLR for seq_cst/release.
2188   if (SI->isAtomic()) {
2189     AtomicOrdering Ord = SI->getOrdering();
2190     // The non-atomic instructions are sufficient for relaxed stores.
2191     if (isReleaseOrStronger(Ord)) {
2192       // The STLR addressing mode only supports a base reg; pass that directly.
2193       unsigned AddrReg = getRegForValue(PtrV);
2194       return emitStoreRelease(VT, SrcReg, AddrReg,
2195                               createMachineMemOperandFor(I));
2196     }
2197   }
2198 
2199   // See if we can handle this address.
2200   Address Addr;
2201   if (!computeAddress(PtrV, Addr, Op0->getType()))
2202     return false;
2203 
2204   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2205     return false;
2206   return true;
2207 }
2208 
2209 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2210   switch (Pred) {
2211   case CmpInst::FCMP_ONE:
2212   case CmpInst::FCMP_UEQ:
2213   default:
2214     // AL is our "false" for now. The other two need more compares.
2215     return AArch64CC::AL;
2216   case CmpInst::ICMP_EQ:
2217   case CmpInst::FCMP_OEQ:
2218     return AArch64CC::EQ;
2219   case CmpInst::ICMP_SGT:
2220   case CmpInst::FCMP_OGT:
2221     return AArch64CC::GT;
2222   case CmpInst::ICMP_SGE:
2223   case CmpInst::FCMP_OGE:
2224     return AArch64CC::GE;
2225   case CmpInst::ICMP_UGT:
2226   case CmpInst::FCMP_UGT:
2227     return AArch64CC::HI;
2228   case CmpInst::FCMP_OLT:
2229     return AArch64CC::MI;
2230   case CmpInst::ICMP_ULE:
2231   case CmpInst::FCMP_OLE:
2232     return AArch64CC::LS;
2233   case CmpInst::FCMP_ORD:
2234     return AArch64CC::VC;
2235   case CmpInst::FCMP_UNO:
2236     return AArch64CC::VS;
2237   case CmpInst::FCMP_UGE:
2238     return AArch64CC::PL;
2239   case CmpInst::ICMP_SLT:
2240   case CmpInst::FCMP_ULT:
2241     return AArch64CC::LT;
2242   case CmpInst::ICMP_SLE:
2243   case CmpInst::FCMP_ULE:
2244     return AArch64CC::LE;
2245   case CmpInst::FCMP_UNE:
2246   case CmpInst::ICMP_NE:
2247     return AArch64CC::NE;
2248   case CmpInst::ICMP_UGE:
2249     return AArch64CC::HS;
2250   case CmpInst::ICMP_ULT:
2251     return AArch64CC::LO;
2252   }
2253 }
2254 
2255 /// Try to emit a combined compare-and-branch instruction.
2256 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2257   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2258   // will not be produced, as they are conditional branch instructions that do
2259   // not set flags.
2260   if (FuncInfo.MF->getFunction().hasFnAttribute(
2261           Attribute::SpeculativeLoadHardening))
2262     return false;
2263 
2264   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2265   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2266   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2267 
2268   const Value *LHS = CI->getOperand(0);
2269   const Value *RHS = CI->getOperand(1);
2270 
2271   MVT VT;
2272   if (!isTypeSupported(LHS->getType(), VT))
2273     return false;
2274 
2275   unsigned BW = VT.getSizeInBits();
2276   if (BW > 64)
2277     return false;
2278 
2279   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2280   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2281 
2282   // Try to take advantage of fallthrough opportunities.
2283   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2284     std::swap(TBB, FBB);
2285     Predicate = CmpInst::getInversePredicate(Predicate);
2286   }
2287 
2288   int TestBit = -1;
2289   bool IsCmpNE;
2290   switch (Predicate) {
2291   default:
2292     return false;
2293   case CmpInst::ICMP_EQ:
2294   case CmpInst::ICMP_NE:
2295     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2296       std::swap(LHS, RHS);
2297 
2298     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2299       return false;
2300 
2301     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2302       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2303         const Value *AndLHS = AI->getOperand(0);
2304         const Value *AndRHS = AI->getOperand(1);
2305 
2306         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2307           if (C->getValue().isPowerOf2())
2308             std::swap(AndLHS, AndRHS);
2309 
2310         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2311           if (C->getValue().isPowerOf2()) {
2312             TestBit = C->getValue().logBase2();
2313             LHS = AndLHS;
2314           }
2315       }
2316 
2317     if (VT == MVT::i1)
2318       TestBit = 0;
2319 
2320     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2321     break;
2322   case CmpInst::ICMP_SLT:
2323   case CmpInst::ICMP_SGE:
2324     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2325       return false;
2326 
2327     TestBit = BW - 1;
2328     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2329     break;
2330   case CmpInst::ICMP_SGT:
2331   case CmpInst::ICMP_SLE:
2332     if (!isa<ConstantInt>(RHS))
2333       return false;
2334 
2335     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2336       return false;
2337 
2338     TestBit = BW - 1;
2339     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2340     break;
2341   } // end switch
2342 
2343   static const unsigned OpcTable[2][2][2] = {
2344     { {AArch64::CBZW,  AArch64::CBZX },
2345       {AArch64::CBNZW, AArch64::CBNZX} },
2346     { {AArch64::TBZW,  AArch64::TBZX },
2347       {AArch64::TBNZW, AArch64::TBNZX} }
2348   };
2349 
2350   bool IsBitTest = TestBit != -1;
2351   bool Is64Bit = BW == 64;
2352   if (TestBit < 32 && TestBit >= 0)
2353     Is64Bit = false;
2354 
2355   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2356   const MCInstrDesc &II = TII.get(Opc);
2357 
2358   unsigned SrcReg = getRegForValue(LHS);
2359   if (!SrcReg)
2360     return false;
2361   bool SrcIsKill = hasTrivialKill(LHS);
2362 
2363   if (BW == 64 && !Is64Bit)
2364     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
2365                                         AArch64::sub_32);
2366 
2367   if ((BW < 32) && !IsBitTest)
2368     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2369 
2370   // Emit the combined compare and branch instruction.
2371   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2372   MachineInstrBuilder MIB =
2373       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2374           .addReg(SrcReg, getKillRegState(SrcIsKill));
2375   if (IsBitTest)
2376     MIB.addImm(TestBit);
2377   MIB.addMBB(TBB);
2378 
2379   finishCondBranch(BI->getParent(), TBB, FBB);
2380   return true;
2381 }
2382 
2383 bool AArch64FastISel::selectBranch(const Instruction *I) {
2384   const BranchInst *BI = cast<BranchInst>(I);
2385   if (BI->isUnconditional()) {
2386     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2387     fastEmitBranch(MSucc, BI->getDebugLoc());
2388     return true;
2389   }
2390 
2391   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2392   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2393 
2394   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2395     if (CI->hasOneUse() && isValueAvailable(CI)) {
2396       // Try to optimize or fold the cmp.
2397       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2398       switch (Predicate) {
2399       default:
2400         break;
2401       case CmpInst::FCMP_FALSE:
2402         fastEmitBranch(FBB, DbgLoc);
2403         return true;
2404       case CmpInst::FCMP_TRUE:
2405         fastEmitBranch(TBB, DbgLoc);
2406         return true;
2407       }
2408 
2409       // Try to emit a combined compare-and-branch first.
2410       if (emitCompareAndBranch(BI))
2411         return true;
2412 
2413       // Try to take advantage of fallthrough opportunities.
2414       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2415         std::swap(TBB, FBB);
2416         Predicate = CmpInst::getInversePredicate(Predicate);
2417       }
2418 
2419       // Emit the cmp.
2420       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2421         return false;
2422 
2423       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2424       // instruction.
2425       AArch64CC::CondCode CC = getCompareCC(Predicate);
2426       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2427       switch (Predicate) {
2428       default:
2429         break;
2430       case CmpInst::FCMP_UEQ:
2431         ExtraCC = AArch64CC::EQ;
2432         CC = AArch64CC::VS;
2433         break;
2434       case CmpInst::FCMP_ONE:
2435         ExtraCC = AArch64CC::MI;
2436         CC = AArch64CC::GT;
2437         break;
2438       }
2439       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2440 
2441       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2442       if (ExtraCC != AArch64CC::AL) {
2443         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2444             .addImm(ExtraCC)
2445             .addMBB(TBB);
2446       }
2447 
2448       // Emit the branch.
2449       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2450           .addImm(CC)
2451           .addMBB(TBB);
2452 
2453       finishCondBranch(BI->getParent(), TBB, FBB);
2454       return true;
2455     }
2456   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2457     uint64_t Imm = CI->getZExtValue();
2458     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2459     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2460         .addMBB(Target);
2461 
2462     // Obtain the branch probability and add the target to the successor list.
2463     if (FuncInfo.BPI) {
2464       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2465           BI->getParent(), Target->getBasicBlock());
2466       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2467     } else
2468       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2469     return true;
2470   } else {
2471     AArch64CC::CondCode CC = AArch64CC::NE;
2472     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2473       // Fake request the condition, otherwise the intrinsic might be completely
2474       // optimized away.
2475       unsigned CondReg = getRegForValue(BI->getCondition());
2476       if (!CondReg)
2477         return false;
2478 
2479       // Emit the branch.
2480       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2481         .addImm(CC)
2482         .addMBB(TBB);
2483 
2484       finishCondBranch(BI->getParent(), TBB, FBB);
2485       return true;
2486     }
2487   }
2488 
2489   unsigned CondReg = getRegForValue(BI->getCondition());
2490   if (CondReg == 0)
2491     return false;
2492   bool CondRegIsKill = hasTrivialKill(BI->getCondition());
2493 
2494   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495   unsigned Opcode = AArch64::TBNZW;
2496   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497     std::swap(TBB, FBB);
2498     Opcode = AArch64::TBZW;
2499   }
2500 
2501   const MCInstrDesc &II = TII.get(Opcode);
2502   unsigned ConstrainedCondReg
2503     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2505       .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill))
2506       .addImm(0)
2507       .addMBB(TBB);
2508 
2509   finishCondBranch(BI->getParent(), TBB, FBB);
2510   return true;
2511 }
2512 
2513 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515   unsigned AddrReg = getRegForValue(BI->getOperand(0));
2516   if (AddrReg == 0)
2517     return false;
2518 
2519   // Emit the indirect branch.
2520   const MCInstrDesc &II = TII.get(AArch64::BR);
2521   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2522   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2523 
2524   // Make sure the CFG is up-to-date.
2525   for (auto *Succ : BI->successors())
2526     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2527 
2528   return true;
2529 }
2530 
2531 bool AArch64FastISel::selectCmp(const Instruction *I) {
2532   const CmpInst *CI = cast<CmpInst>(I);
2533 
2534   // Vectors of i1 are weird: bail out.
2535   if (CI->getType()->isVectorTy())
2536     return false;
2537 
2538   // Try to optimize or fold the cmp.
2539   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2540   unsigned ResultReg = 0;
2541   switch (Predicate) {
2542   default:
2543     break;
2544   case CmpInst::FCMP_FALSE:
2545     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2546     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2547             TII.get(TargetOpcode::COPY), ResultReg)
2548         .addReg(AArch64::WZR, getKillRegState(true));
2549     break;
2550   case CmpInst::FCMP_TRUE:
2551     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2552     break;
2553   }
2554 
2555   if (ResultReg) {
2556     updateValueMap(I, ResultReg);
2557     return true;
2558   }
2559 
2560   // Emit the cmp.
2561   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2562     return false;
2563 
2564   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2565 
2566   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2567   // condition codes are inverted, because they are used by CSINC.
2568   static unsigned CondCodeTable[2][2] = {
2569     { AArch64CC::NE, AArch64CC::VC },
2570     { AArch64CC::PL, AArch64CC::LE }
2571   };
2572   unsigned *CondCodes = nullptr;
2573   switch (Predicate) {
2574   default:
2575     break;
2576   case CmpInst::FCMP_UEQ:
2577     CondCodes = &CondCodeTable[0][0];
2578     break;
2579   case CmpInst::FCMP_ONE:
2580     CondCodes = &CondCodeTable[1][0];
2581     break;
2582   }
2583 
2584   if (CondCodes) {
2585     unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2586     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2587             TmpReg1)
2588         .addReg(AArch64::WZR, getKillRegState(true))
2589         .addReg(AArch64::WZR, getKillRegState(true))
2590         .addImm(CondCodes[0]);
2591     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2592             ResultReg)
2593         .addReg(TmpReg1, getKillRegState(true))
2594         .addReg(AArch64::WZR, getKillRegState(true))
2595         .addImm(CondCodes[1]);
2596 
2597     updateValueMap(I, ResultReg);
2598     return true;
2599   }
2600 
2601   // Now set a register based on the comparison.
2602   AArch64CC::CondCode CC = getCompareCC(Predicate);
2603   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2604   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2605   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2606           ResultReg)
2607       .addReg(AArch64::WZR, getKillRegState(true))
2608       .addReg(AArch64::WZR, getKillRegState(true))
2609       .addImm(invertedCC);
2610 
2611   updateValueMap(I, ResultReg);
2612   return true;
2613 }
2614 
2615 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2616 /// value.
2617 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2618   if (!SI->getType()->isIntegerTy(1))
2619     return false;
2620 
2621   const Value *Src1Val, *Src2Val;
2622   unsigned Opc = 0;
2623   bool NeedExtraOp = false;
2624   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2625     if (CI->isOne()) {
2626       Src1Val = SI->getCondition();
2627       Src2Val = SI->getFalseValue();
2628       Opc = AArch64::ORRWrr;
2629     } else {
2630       assert(CI->isZero());
2631       Src1Val = SI->getFalseValue();
2632       Src2Val = SI->getCondition();
2633       Opc = AArch64::BICWrr;
2634     }
2635   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2636     if (CI->isOne()) {
2637       Src1Val = SI->getCondition();
2638       Src2Val = SI->getTrueValue();
2639       Opc = AArch64::ORRWrr;
2640       NeedExtraOp = true;
2641     } else {
2642       assert(CI->isZero());
2643       Src1Val = SI->getCondition();
2644       Src2Val = SI->getTrueValue();
2645       Opc = AArch64::ANDWrr;
2646     }
2647   }
2648 
2649   if (!Opc)
2650     return false;
2651 
2652   unsigned Src1Reg = getRegForValue(Src1Val);
2653   if (!Src1Reg)
2654     return false;
2655   bool Src1IsKill = hasTrivialKill(Src1Val);
2656 
2657   unsigned Src2Reg = getRegForValue(Src2Val);
2658   if (!Src2Reg)
2659     return false;
2660   bool Src2IsKill = hasTrivialKill(Src2Val);
2661 
2662   if (NeedExtraOp) {
2663     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1);
2664     Src1IsKill = true;
2665   }
2666   unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2667                                        Src1IsKill, Src2Reg, Src2IsKill);
2668   updateValueMap(SI, ResultReg);
2669   return true;
2670 }
2671 
2672 bool AArch64FastISel::selectSelect(const Instruction *I) {
2673   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2674   MVT VT;
2675   if (!isTypeSupported(I->getType(), VT))
2676     return false;
2677 
2678   unsigned Opc;
2679   const TargetRegisterClass *RC;
2680   switch (VT.SimpleTy) {
2681   default:
2682     return false;
2683   case MVT::i1:
2684   case MVT::i8:
2685   case MVT::i16:
2686   case MVT::i32:
2687     Opc = AArch64::CSELWr;
2688     RC = &AArch64::GPR32RegClass;
2689     break;
2690   case MVT::i64:
2691     Opc = AArch64::CSELXr;
2692     RC = &AArch64::GPR64RegClass;
2693     break;
2694   case MVT::f32:
2695     Opc = AArch64::FCSELSrrr;
2696     RC = &AArch64::FPR32RegClass;
2697     break;
2698   case MVT::f64:
2699     Opc = AArch64::FCSELDrrr;
2700     RC = &AArch64::FPR64RegClass;
2701     break;
2702   }
2703 
2704   const SelectInst *SI = cast<SelectInst>(I);
2705   const Value *Cond = SI->getCondition();
2706   AArch64CC::CondCode CC = AArch64CC::NE;
2707   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2708 
2709   if (optimizeSelect(SI))
2710     return true;
2711 
2712   // Try to pickup the flags, so we don't have to emit another compare.
2713   if (foldXALUIntrinsic(CC, I, Cond)) {
2714     // Fake request the condition to force emission of the XALU intrinsic.
2715     unsigned CondReg = getRegForValue(Cond);
2716     if (!CondReg)
2717       return false;
2718   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2719              isValueAvailable(Cond)) {
2720     const auto *Cmp = cast<CmpInst>(Cond);
2721     // Try to optimize or fold the cmp.
2722     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2723     const Value *FoldSelect = nullptr;
2724     switch (Predicate) {
2725     default:
2726       break;
2727     case CmpInst::FCMP_FALSE:
2728       FoldSelect = SI->getFalseValue();
2729       break;
2730     case CmpInst::FCMP_TRUE:
2731       FoldSelect = SI->getTrueValue();
2732       break;
2733     }
2734 
2735     if (FoldSelect) {
2736       unsigned SrcReg = getRegForValue(FoldSelect);
2737       if (!SrcReg)
2738         return false;
2739       unsigned UseReg = lookUpRegForValue(SI);
2740       if (UseReg)
2741         MRI.clearKillFlags(UseReg);
2742 
2743       updateValueMap(I, SrcReg);
2744       return true;
2745     }
2746 
2747     // Emit the cmp.
2748     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2749       return false;
2750 
2751     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2752     CC = getCompareCC(Predicate);
2753     switch (Predicate) {
2754     default:
2755       break;
2756     case CmpInst::FCMP_UEQ:
2757       ExtraCC = AArch64CC::EQ;
2758       CC = AArch64CC::VS;
2759       break;
2760     case CmpInst::FCMP_ONE:
2761       ExtraCC = AArch64CC::MI;
2762       CC = AArch64CC::GT;
2763       break;
2764     }
2765     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2766   } else {
2767     unsigned CondReg = getRegForValue(Cond);
2768     if (!CondReg)
2769       return false;
2770     bool CondIsKill = hasTrivialKill(Cond);
2771 
2772     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2773     CondReg = constrainOperandRegClass(II, CondReg, 1);
2774 
2775     // Emit a TST instruction (ANDS wzr, reg, #imm).
2776     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2777             AArch64::WZR)
2778         .addReg(CondReg, getKillRegState(CondIsKill))
2779         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2780   }
2781 
2782   unsigned Src1Reg = getRegForValue(SI->getTrueValue());
2783   bool Src1IsKill = hasTrivialKill(SI->getTrueValue());
2784 
2785   unsigned Src2Reg = getRegForValue(SI->getFalseValue());
2786   bool Src2IsKill = hasTrivialKill(SI->getFalseValue());
2787 
2788   if (!Src1Reg || !Src2Reg)
2789     return false;
2790 
2791   if (ExtraCC != AArch64CC::AL) {
2792     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2793                                Src2IsKill, ExtraCC);
2794     Src2IsKill = true;
2795   }
2796   unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg,
2797                                         Src2IsKill, CC);
2798   updateValueMap(I, ResultReg);
2799   return true;
2800 }
2801 
2802 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2803   Value *V = I->getOperand(0);
2804   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2805     return false;
2806 
2807   unsigned Op = getRegForValue(V);
2808   if (Op == 0)
2809     return false;
2810 
2811   unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass);
2812   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2813           ResultReg).addReg(Op);
2814   updateValueMap(I, ResultReg);
2815   return true;
2816 }
2817 
2818 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2819   Value *V = I->getOperand(0);
2820   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2821     return false;
2822 
2823   unsigned Op = getRegForValue(V);
2824   if (Op == 0)
2825     return false;
2826 
2827   unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass);
2828   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2829           ResultReg).addReg(Op);
2830   updateValueMap(I, ResultReg);
2831   return true;
2832 }
2833 
2834 // FPToUI and FPToSI
2835 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2836   MVT DestVT;
2837   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2838     return false;
2839 
2840   unsigned SrcReg = getRegForValue(I->getOperand(0));
2841   if (SrcReg == 0)
2842     return false;
2843 
2844   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2845   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2846     return false;
2847 
2848   unsigned Opc;
2849   if (SrcVT == MVT::f64) {
2850     if (Signed)
2851       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2852     else
2853       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2854   } else {
2855     if (Signed)
2856       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2857     else
2858       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2859   }
2860   unsigned ResultReg = createResultReg(
2861       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2862   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2863       .addReg(SrcReg);
2864   updateValueMap(I, ResultReg);
2865   return true;
2866 }
2867 
2868 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2869   MVT DestVT;
2870   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2871     return false;
2872   // Let regular ISEL handle FP16
2873   if (DestVT == MVT::f16)
2874     return false;
2875 
2876   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2877          "Unexpected value type.");
2878 
2879   unsigned SrcReg = getRegForValue(I->getOperand(0));
2880   if (!SrcReg)
2881     return false;
2882   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
2883 
2884   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2885 
2886   // Handle sign-extension.
2887   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2888     SrcReg =
2889         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2890     if (!SrcReg)
2891       return false;
2892     SrcIsKill = true;
2893   }
2894 
2895   unsigned Opc;
2896   if (SrcVT == MVT::i64) {
2897     if (Signed)
2898       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2899     else
2900       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2901   } else {
2902     if (Signed)
2903       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2904     else
2905       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2906   }
2907 
2908   unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg,
2909                                       SrcIsKill);
2910   updateValueMap(I, ResultReg);
2911   return true;
2912 }
2913 
2914 bool AArch64FastISel::fastLowerArguments() {
2915   if (!FuncInfo.CanLowerReturn)
2916     return false;
2917 
2918   const Function *F = FuncInfo.Fn;
2919   if (F->isVarArg())
2920     return false;
2921 
2922   CallingConv::ID CC = F->getCallingConv();
2923   if (CC != CallingConv::C && CC != CallingConv::Swift)
2924     return false;
2925 
2926   if (Subtarget->hasCustomCallingConv())
2927     return false;
2928 
2929   // Only handle simple cases of up to 8 GPR and FPR each.
2930   unsigned GPRCnt = 0;
2931   unsigned FPRCnt = 0;
2932   for (auto const &Arg : F->args()) {
2933     if (Arg.hasAttribute(Attribute::ByVal) ||
2934         Arg.hasAttribute(Attribute::InReg) ||
2935         Arg.hasAttribute(Attribute::StructRet) ||
2936         Arg.hasAttribute(Attribute::SwiftSelf) ||
2937         Arg.hasAttribute(Attribute::SwiftError) ||
2938         Arg.hasAttribute(Attribute::Nest))
2939       return false;
2940 
2941     Type *ArgTy = Arg.getType();
2942     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2943       return false;
2944 
2945     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2946     if (!ArgVT.isSimple())
2947       return false;
2948 
2949     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2950     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2951       return false;
2952 
2953     if (VT.isVector() &&
2954         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2955       return false;
2956 
2957     if (VT >= MVT::i1 && VT <= MVT::i64)
2958       ++GPRCnt;
2959     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2960              VT.is128BitVector())
2961       ++FPRCnt;
2962     else
2963       return false;
2964 
2965     if (GPRCnt > 8 || FPRCnt > 8)
2966       return false;
2967   }
2968 
2969   static const MCPhysReg Registers[6][8] = {
2970     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2971       AArch64::W5, AArch64::W6, AArch64::W7 },
2972     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2973       AArch64::X5, AArch64::X6, AArch64::X7 },
2974     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2975       AArch64::H5, AArch64::H6, AArch64::H7 },
2976     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2977       AArch64::S5, AArch64::S6, AArch64::S7 },
2978     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2979       AArch64::D5, AArch64::D6, AArch64::D7 },
2980     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2981       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2982   };
2983 
2984   unsigned GPRIdx = 0;
2985   unsigned FPRIdx = 0;
2986   for (auto const &Arg : F->args()) {
2987     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2988     unsigned SrcReg;
2989     const TargetRegisterClass *RC;
2990     if (VT >= MVT::i1 && VT <= MVT::i32) {
2991       SrcReg = Registers[0][GPRIdx++];
2992       RC = &AArch64::GPR32RegClass;
2993       VT = MVT::i32;
2994     } else if (VT == MVT::i64) {
2995       SrcReg = Registers[1][GPRIdx++];
2996       RC = &AArch64::GPR64RegClass;
2997     } else if (VT == MVT::f16) {
2998       SrcReg = Registers[2][FPRIdx++];
2999       RC = &AArch64::FPR16RegClass;
3000     } else if (VT ==  MVT::f32) {
3001       SrcReg = Registers[3][FPRIdx++];
3002       RC = &AArch64::FPR32RegClass;
3003     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
3004       SrcReg = Registers[4][FPRIdx++];
3005       RC = &AArch64::FPR64RegClass;
3006     } else if (VT.is128BitVector()) {
3007       SrcReg = Registers[5][FPRIdx++];
3008       RC = &AArch64::FPR128RegClass;
3009     } else
3010       llvm_unreachable("Unexpected value type.");
3011 
3012     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3013     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3014     // Without this, EmitLiveInCopies may eliminate the livein if its only
3015     // use is a bitcast (which isn't turned into an instruction).
3016     unsigned ResultReg = createResultReg(RC);
3017     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3018             TII.get(TargetOpcode::COPY), ResultReg)
3019         .addReg(DstReg, getKillRegState(true));
3020     updateValueMap(&Arg, ResultReg);
3021   }
3022   return true;
3023 }
3024 
3025 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3026                                       SmallVectorImpl<MVT> &OutVTs,
3027                                       unsigned &NumBytes) {
3028   CallingConv::ID CC = CLI.CallConv;
3029   SmallVector<CCValAssign, 16> ArgLocs;
3030   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3031   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3032 
3033   // Get a count of how many bytes are to be pushed on the stack.
3034   NumBytes = CCInfo.getNextStackOffset();
3035 
3036   // Issue CALLSEQ_START
3037   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3038   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3039     .addImm(NumBytes).addImm(0);
3040 
3041   // Process the args.
3042   for (CCValAssign &VA : ArgLocs) {
3043     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3044     MVT ArgVT = OutVTs[VA.getValNo()];
3045 
3046     unsigned ArgReg = getRegForValue(ArgVal);
3047     if (!ArgReg)
3048       return false;
3049 
3050     // Handle arg promotion: SExt, ZExt, AExt.
3051     switch (VA.getLocInfo()) {
3052     case CCValAssign::Full:
3053       break;
3054     case CCValAssign::SExt: {
3055       MVT DestVT = VA.getLocVT();
3056       MVT SrcVT = ArgVT;
3057       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3058       if (!ArgReg)
3059         return false;
3060       break;
3061     }
3062     case CCValAssign::AExt:
3063     // Intentional fall-through.
3064     case CCValAssign::ZExt: {
3065       MVT DestVT = VA.getLocVT();
3066       MVT SrcVT = ArgVT;
3067       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3068       if (!ArgReg)
3069         return false;
3070       break;
3071     }
3072     default:
3073       llvm_unreachable("Unknown arg promotion!");
3074     }
3075 
3076     // Now copy/store arg to correct locations.
3077     if (VA.isRegLoc() && !VA.needsCustom()) {
3078       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3079               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3080       CLI.OutRegs.push_back(VA.getLocReg());
3081     } else if (VA.needsCustom()) {
3082       // FIXME: Handle custom args.
3083       return false;
3084     } else {
3085       assert(VA.isMemLoc() && "Assuming store on stack.");
3086 
3087       // Don't emit stores for undef values.
3088       if (isa<UndefValue>(ArgVal))
3089         continue;
3090 
3091       // Need to store on the stack.
3092       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3093 
3094       unsigned BEAlign = 0;
3095       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3096         BEAlign = 8 - ArgSize;
3097 
3098       Address Addr;
3099       Addr.setKind(Address::RegBase);
3100       Addr.setReg(AArch64::SP);
3101       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3102 
3103       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
3104       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3105           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3106           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3107 
3108       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3109         return false;
3110     }
3111   }
3112   return true;
3113 }
3114 
3115 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3116                                  unsigned NumBytes) {
3117   CallingConv::ID CC = CLI.CallConv;
3118 
3119   // Issue CALLSEQ_END
3120   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3121   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3122     .addImm(NumBytes).addImm(0);
3123 
3124   // Now the return value.
3125   if (RetVT != MVT::isVoid) {
3126     SmallVector<CCValAssign, 16> RVLocs;
3127     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3128     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3129 
3130     // Only handle a single return value.
3131     if (RVLocs.size() != 1)
3132       return false;
3133 
3134     // Copy all of the result registers out of their specified physreg.
3135     MVT CopyVT = RVLocs[0].getValVT();
3136 
3137     // TODO: Handle big-endian results
3138     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3139       return false;
3140 
3141     unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3142     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3143             TII.get(TargetOpcode::COPY), ResultReg)
3144         .addReg(RVLocs[0].getLocReg());
3145     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3146 
3147     CLI.ResultReg = ResultReg;
3148     CLI.NumResultRegs = 1;
3149   }
3150 
3151   return true;
3152 }
3153 
3154 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3155   CallingConv::ID CC  = CLI.CallConv;
3156   bool IsTailCall     = CLI.IsTailCall;
3157   bool IsVarArg       = CLI.IsVarArg;
3158   const Value *Callee = CLI.Callee;
3159   MCSymbol *Symbol = CLI.Symbol;
3160 
3161   if (!Callee && !Symbol)
3162     return false;
3163 
3164   // Allow SelectionDAG isel to handle tail calls.
3165   if (IsTailCall)
3166     return false;
3167 
3168   CodeModel::Model CM = TM.getCodeModel();
3169   // Only support the small-addressing and large code models.
3170   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3171     return false;
3172 
3173   // FIXME: Add large code model support for ELF.
3174   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3175     return false;
3176 
3177   // Let SDISel handle vararg functions.
3178   if (IsVarArg)
3179     return false;
3180 
3181   // FIXME: Only handle *simple* calls for now.
3182   MVT RetVT;
3183   if (CLI.RetTy->isVoidTy())
3184     RetVT = MVT::isVoid;
3185   else if (!isTypeLegal(CLI.RetTy, RetVT))
3186     return false;
3187 
3188   for (auto Flag : CLI.OutFlags)
3189     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3190         Flag.isSwiftSelf() || Flag.isSwiftError())
3191       return false;
3192 
3193   // Set up the argument vectors.
3194   SmallVector<MVT, 16> OutVTs;
3195   OutVTs.reserve(CLI.OutVals.size());
3196 
3197   for (auto *Val : CLI.OutVals) {
3198     MVT VT;
3199     if (!isTypeLegal(Val->getType(), VT) &&
3200         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3201       return false;
3202 
3203     // We don't handle vector parameters yet.
3204     if (VT.isVector() || VT.getSizeInBits() > 64)
3205       return false;
3206 
3207     OutVTs.push_back(VT);
3208   }
3209 
3210   Address Addr;
3211   if (Callee && !computeCallAddress(Callee, Addr))
3212     return false;
3213 
3214   // Handle the arguments now that we've gotten them.
3215   unsigned NumBytes;
3216   if (!processCallArgs(CLI, OutVTs, NumBytes))
3217     return false;
3218 
3219   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3220   if (RegInfo->isAnyArgRegReserved(*MF))
3221     RegInfo->emitReservedArgRegCallError(*MF);
3222 
3223   // Issue the call.
3224   MachineInstrBuilder MIB;
3225   if (Subtarget->useSmallAddressing()) {
3226     const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL);
3227     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3228     if (Symbol)
3229       MIB.addSym(Symbol, 0);
3230     else if (Addr.getGlobalValue())
3231       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3232     else if (Addr.getReg()) {
3233       unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3234       MIB.addReg(Reg);
3235     } else
3236       return false;
3237   } else {
3238     unsigned CallReg = 0;
3239     if (Symbol) {
3240       unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3241       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3242               ADRPReg)
3243           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3244 
3245       CallReg = createResultReg(&AArch64::GPR64RegClass);
3246       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3247               TII.get(AArch64::LDRXui), CallReg)
3248           .addReg(ADRPReg)
3249           .addSym(Symbol,
3250                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3251     } else if (Addr.getGlobalValue())
3252       CallReg = materializeGV(Addr.getGlobalValue());
3253     else if (Addr.getReg())
3254       CallReg = Addr.getReg();
3255 
3256     if (!CallReg)
3257       return false;
3258 
3259     const MCInstrDesc &II = TII.get(AArch64::BLR);
3260     CallReg = constrainOperandRegClass(II, CallReg, 0);
3261     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3262   }
3263 
3264   // Add implicit physical register uses to the call.
3265   for (auto Reg : CLI.OutRegs)
3266     MIB.addReg(Reg, RegState::Implicit);
3267 
3268   // Add a register mask with the call-preserved registers.
3269   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3270   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3271 
3272   CLI.Call = MIB;
3273 
3274   // Finish off the call including any return values.
3275   return finishCall(CLI, RetVT, NumBytes);
3276 }
3277 
3278 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3279   if (Alignment)
3280     return Len / Alignment <= 4;
3281   else
3282     return Len < 32;
3283 }
3284 
3285 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3286                                          uint64_t Len, unsigned Alignment) {
3287   // Make sure we don't bloat code by inlining very large memcpy's.
3288   if (!isMemCpySmall(Len, Alignment))
3289     return false;
3290 
3291   int64_t UnscaledOffset = 0;
3292   Address OrigDest = Dest;
3293   Address OrigSrc = Src;
3294 
3295   while (Len) {
3296     MVT VT;
3297     if (!Alignment || Alignment >= 8) {
3298       if (Len >= 8)
3299         VT = MVT::i64;
3300       else if (Len >= 4)
3301         VT = MVT::i32;
3302       else if (Len >= 2)
3303         VT = MVT::i16;
3304       else {
3305         VT = MVT::i8;
3306       }
3307     } else {
3308       // Bound based on alignment.
3309       if (Len >= 4 && Alignment == 4)
3310         VT = MVT::i32;
3311       else if (Len >= 2 && Alignment == 2)
3312         VT = MVT::i16;
3313       else {
3314         VT = MVT::i8;
3315       }
3316     }
3317 
3318     unsigned ResultReg = emitLoad(VT, VT, Src);
3319     if (!ResultReg)
3320       return false;
3321 
3322     if (!emitStore(VT, ResultReg, Dest))
3323       return false;
3324 
3325     int64_t Size = VT.getSizeInBits() / 8;
3326     Len -= Size;
3327     UnscaledOffset += Size;
3328 
3329     // We need to recompute the unscaled offset for each iteration.
3330     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3331     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3332   }
3333 
3334   return true;
3335 }
3336 
3337 /// Check if it is possible to fold the condition from the XALU intrinsic
3338 /// into the user. The condition code will only be updated on success.
3339 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3340                                         const Instruction *I,
3341                                         const Value *Cond) {
3342   if (!isa<ExtractValueInst>(Cond))
3343     return false;
3344 
3345   const auto *EV = cast<ExtractValueInst>(Cond);
3346   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3347     return false;
3348 
3349   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3350   MVT RetVT;
3351   const Function *Callee = II->getCalledFunction();
3352   Type *RetTy =
3353   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3354   if (!isTypeLegal(RetTy, RetVT))
3355     return false;
3356 
3357   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3358     return false;
3359 
3360   const Value *LHS = II->getArgOperand(0);
3361   const Value *RHS = II->getArgOperand(1);
3362 
3363   // Canonicalize immediate to the RHS.
3364   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3365       isCommutativeIntrinsic(II))
3366     std::swap(LHS, RHS);
3367 
3368   // Simplify multiplies.
3369   Intrinsic::ID IID = II->getIntrinsicID();
3370   switch (IID) {
3371   default:
3372     break;
3373   case Intrinsic::smul_with_overflow:
3374     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3375       if (C->getValue() == 2)
3376         IID = Intrinsic::sadd_with_overflow;
3377     break;
3378   case Intrinsic::umul_with_overflow:
3379     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3380       if (C->getValue() == 2)
3381         IID = Intrinsic::uadd_with_overflow;
3382     break;
3383   }
3384 
3385   AArch64CC::CondCode TmpCC;
3386   switch (IID) {
3387   default:
3388     return false;
3389   case Intrinsic::sadd_with_overflow:
3390   case Intrinsic::ssub_with_overflow:
3391     TmpCC = AArch64CC::VS;
3392     break;
3393   case Intrinsic::uadd_with_overflow:
3394     TmpCC = AArch64CC::HS;
3395     break;
3396   case Intrinsic::usub_with_overflow:
3397     TmpCC = AArch64CC::LO;
3398     break;
3399   case Intrinsic::smul_with_overflow:
3400   case Intrinsic::umul_with_overflow:
3401     TmpCC = AArch64CC::NE;
3402     break;
3403   }
3404 
3405   // Check if both instructions are in the same basic block.
3406   if (!isValueAvailable(II))
3407     return false;
3408 
3409   // Make sure nothing is in the way
3410   BasicBlock::const_iterator Start(I);
3411   BasicBlock::const_iterator End(II);
3412   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3413     // We only expect extractvalue instructions between the intrinsic and the
3414     // instruction to be selected.
3415     if (!isa<ExtractValueInst>(Itr))
3416       return false;
3417 
3418     // Check that the extractvalue operand comes from the intrinsic.
3419     const auto *EVI = cast<ExtractValueInst>(Itr);
3420     if (EVI->getAggregateOperand() != II)
3421       return false;
3422   }
3423 
3424   CC = TmpCC;
3425   return true;
3426 }
3427 
3428 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3429   // FIXME: Handle more intrinsics.
3430   switch (II->getIntrinsicID()) {
3431   default: return false;
3432   case Intrinsic::frameaddress: {
3433     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3434     MFI.setFrameAddressIsTaken(true);
3435 
3436     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3437     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3438     unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3439     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3440             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3441     // Recursively load frame address
3442     // ldr x0, [fp]
3443     // ldr x0, [x0]
3444     // ldr x0, [x0]
3445     // ...
3446     unsigned DestReg;
3447     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3448     while (Depth--) {
3449       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3450                                 SrcReg, /*IsKill=*/true, 0);
3451       assert(DestReg && "Unexpected LDR instruction emission failure.");
3452       SrcReg = DestReg;
3453     }
3454 
3455     updateValueMap(II, SrcReg);
3456     return true;
3457   }
3458   case Intrinsic::sponentry: {
3459     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3460 
3461     // SP = FP + Fixed Object + 16
3462     int FI = MFI.CreateFixedObject(4, 0, false);
3463     unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3464     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3465             TII.get(AArch64::ADDXri), ResultReg)
3466             .addFrameIndex(FI)
3467             .addImm(0)
3468             .addImm(0);
3469 
3470     updateValueMap(II, ResultReg);
3471     return true;
3472   }
3473   case Intrinsic::memcpy:
3474   case Intrinsic::memmove: {
3475     const auto *MTI = cast<MemTransferInst>(II);
3476     // Don't handle volatile.
3477     if (MTI->isVolatile())
3478       return false;
3479 
3480     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3481     // we would emit dead code because we don't currently handle memmoves.
3482     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3483     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3484       // Small memcpy's are common enough that we want to do them without a call
3485       // if possible.
3486       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3487       unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3488                                     MTI->getSourceAlignment());
3489       if (isMemCpySmall(Len, Alignment)) {
3490         Address Dest, Src;
3491         if (!computeAddress(MTI->getRawDest(), Dest) ||
3492             !computeAddress(MTI->getRawSource(), Src))
3493           return false;
3494         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3495           return true;
3496       }
3497     }
3498 
3499     if (!MTI->getLength()->getType()->isIntegerTy(64))
3500       return false;
3501 
3502     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3503       // Fast instruction selection doesn't support the special
3504       // address spaces.
3505       return false;
3506 
3507     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3508     return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1);
3509   }
3510   case Intrinsic::memset: {
3511     const MemSetInst *MSI = cast<MemSetInst>(II);
3512     // Don't handle volatile.
3513     if (MSI->isVolatile())
3514       return false;
3515 
3516     if (!MSI->getLength()->getType()->isIntegerTy(64))
3517       return false;
3518 
3519     if (MSI->getDestAddressSpace() > 255)
3520       // Fast instruction selection doesn't support the special
3521       // address spaces.
3522       return false;
3523 
3524     return lowerCallTo(II, "memset", II->getNumArgOperands() - 1);
3525   }
3526   case Intrinsic::sin:
3527   case Intrinsic::cos:
3528   case Intrinsic::pow: {
3529     MVT RetVT;
3530     if (!isTypeLegal(II->getType(), RetVT))
3531       return false;
3532 
3533     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3534       return false;
3535 
3536     static const RTLIB::Libcall LibCallTable[3][2] = {
3537       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3538       { RTLIB::COS_F32, RTLIB::COS_F64 },
3539       { RTLIB::POW_F32, RTLIB::POW_F64 }
3540     };
3541     RTLIB::Libcall LC;
3542     bool Is64Bit = RetVT == MVT::f64;
3543     switch (II->getIntrinsicID()) {
3544     default:
3545       llvm_unreachable("Unexpected intrinsic.");
3546     case Intrinsic::sin:
3547       LC = LibCallTable[0][Is64Bit];
3548       break;
3549     case Intrinsic::cos:
3550       LC = LibCallTable[1][Is64Bit];
3551       break;
3552     case Intrinsic::pow:
3553       LC = LibCallTable[2][Is64Bit];
3554       break;
3555     }
3556 
3557     ArgListTy Args;
3558     Args.reserve(II->getNumArgOperands());
3559 
3560     // Populate the argument list.
3561     for (auto &Arg : II->arg_operands()) {
3562       ArgListEntry Entry;
3563       Entry.Val = Arg;
3564       Entry.Ty = Arg->getType();
3565       Args.push_back(Entry);
3566     }
3567 
3568     CallLoweringInfo CLI;
3569     MCContext &Ctx = MF->getContext();
3570     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3571                   TLI.getLibcallName(LC), std::move(Args));
3572     if (!lowerCallTo(CLI))
3573       return false;
3574     updateValueMap(II, CLI.ResultReg);
3575     return true;
3576   }
3577   case Intrinsic::fabs: {
3578     MVT VT;
3579     if (!isTypeLegal(II->getType(), VT))
3580       return false;
3581 
3582     unsigned Opc;
3583     switch (VT.SimpleTy) {
3584     default:
3585       return false;
3586     case MVT::f32:
3587       Opc = AArch64::FABSSr;
3588       break;
3589     case MVT::f64:
3590       Opc = AArch64::FABSDr;
3591       break;
3592     }
3593     unsigned SrcReg = getRegForValue(II->getOperand(0));
3594     if (!SrcReg)
3595       return false;
3596     bool SrcRegIsKill = hasTrivialKill(II->getOperand(0));
3597     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
3598     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3599       .addReg(SrcReg, getKillRegState(SrcRegIsKill));
3600     updateValueMap(II, ResultReg);
3601     return true;
3602   }
3603   case Intrinsic::trap:
3604     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3605         .addImm(1);
3606     return true;
3607   case Intrinsic::debugtrap: {
3608     if (Subtarget->isTargetWindows()) {
3609       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3610           .addImm(0xF000);
3611       return true;
3612     }
3613     break;
3614   }
3615 
3616   case Intrinsic::sqrt: {
3617     Type *RetTy = II->getCalledFunction()->getReturnType();
3618 
3619     MVT VT;
3620     if (!isTypeLegal(RetTy, VT))
3621       return false;
3622 
3623     unsigned Op0Reg = getRegForValue(II->getOperand(0));
3624     if (!Op0Reg)
3625       return false;
3626     bool Op0IsKill = hasTrivialKill(II->getOperand(0));
3627 
3628     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill);
3629     if (!ResultReg)
3630       return false;
3631 
3632     updateValueMap(II, ResultReg);
3633     return true;
3634   }
3635   case Intrinsic::sadd_with_overflow:
3636   case Intrinsic::uadd_with_overflow:
3637   case Intrinsic::ssub_with_overflow:
3638   case Intrinsic::usub_with_overflow:
3639   case Intrinsic::smul_with_overflow:
3640   case Intrinsic::umul_with_overflow: {
3641     // This implements the basic lowering of the xalu with overflow intrinsics.
3642     const Function *Callee = II->getCalledFunction();
3643     auto *Ty = cast<StructType>(Callee->getReturnType());
3644     Type *RetTy = Ty->getTypeAtIndex(0U);
3645 
3646     MVT VT;
3647     if (!isTypeLegal(RetTy, VT))
3648       return false;
3649 
3650     if (VT != MVT::i32 && VT != MVT::i64)
3651       return false;
3652 
3653     const Value *LHS = II->getArgOperand(0);
3654     const Value *RHS = II->getArgOperand(1);
3655     // Canonicalize immediate to the RHS.
3656     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
3657         isCommutativeIntrinsic(II))
3658       std::swap(LHS, RHS);
3659 
3660     // Simplify multiplies.
3661     Intrinsic::ID IID = II->getIntrinsicID();
3662     switch (IID) {
3663     default:
3664       break;
3665     case Intrinsic::smul_with_overflow:
3666       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3667         if (C->getValue() == 2) {
3668           IID = Intrinsic::sadd_with_overflow;
3669           RHS = LHS;
3670         }
3671       break;
3672     case Intrinsic::umul_with_overflow:
3673       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3674         if (C->getValue() == 2) {
3675           IID = Intrinsic::uadd_with_overflow;
3676           RHS = LHS;
3677         }
3678       break;
3679     }
3680 
3681     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3682     AArch64CC::CondCode CC = AArch64CC::Invalid;
3683     switch (IID) {
3684     default: llvm_unreachable("Unexpected intrinsic!");
3685     case Intrinsic::sadd_with_overflow:
3686       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3687       CC = AArch64CC::VS;
3688       break;
3689     case Intrinsic::uadd_with_overflow:
3690       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3691       CC = AArch64CC::HS;
3692       break;
3693     case Intrinsic::ssub_with_overflow:
3694       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3695       CC = AArch64CC::VS;
3696       break;
3697     case Intrinsic::usub_with_overflow:
3698       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3699       CC = AArch64CC::LO;
3700       break;
3701     case Intrinsic::smul_with_overflow: {
3702       CC = AArch64CC::NE;
3703       unsigned LHSReg = getRegForValue(LHS);
3704       if (!LHSReg)
3705         return false;
3706       bool LHSIsKill = hasTrivialKill(LHS);
3707 
3708       unsigned RHSReg = getRegForValue(RHS);
3709       if (!RHSReg)
3710         return false;
3711       bool RHSIsKill = hasTrivialKill(RHS);
3712 
3713       if (VT == MVT::i32) {
3714         MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3715         unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg,
3716                                        /*IsKill=*/false, 32);
3717         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3718                                             AArch64::sub_32);
3719         ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true,
3720                                               AArch64::sub_32);
3721         emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3722                     AArch64_AM::ASR, 31, /*WantResult=*/false);
3723       } else {
3724         assert(VT == MVT::i64 && "Unexpected value type.");
3725         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3726         // reused in the next instruction.
3727         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3728                             /*IsKill=*/false);
3729         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill,
3730                                         RHSReg, RHSIsKill);
3731         emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false,
3732                     AArch64_AM::ASR, 63, /*WantResult=*/false);
3733       }
3734       break;
3735     }
3736     case Intrinsic::umul_with_overflow: {
3737       CC = AArch64CC::NE;
3738       unsigned LHSReg = getRegForValue(LHS);
3739       if (!LHSReg)
3740         return false;
3741       bool LHSIsKill = hasTrivialKill(LHS);
3742 
3743       unsigned RHSReg = getRegForValue(RHS);
3744       if (!RHSReg)
3745         return false;
3746       bool RHSIsKill = hasTrivialKill(RHS);
3747 
3748       if (VT == MVT::i32) {
3749         MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill);
3750         emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg,
3751                     /*IsKill=*/false, AArch64_AM::LSR, 32,
3752                     /*WantResult=*/false);
3753         MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true,
3754                                             AArch64::sub_32);
3755       } else {
3756         assert(VT == MVT::i64 && "Unexpected value type.");
3757         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3758         // reused in the next instruction.
3759         MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg,
3760                             /*IsKill=*/false);
3761         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill,
3762                                         RHSReg, RHSIsKill);
3763         emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg,
3764                     /*IsKill=*/false, /*WantResult=*/false);
3765       }
3766       break;
3767     }
3768     }
3769 
3770     if (MulReg) {
3771       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3772       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3773               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3774     }
3775 
3776     if (!ResultReg1)
3777       return false;
3778 
3779     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3780                                   AArch64::WZR, /*IsKill=*/true, AArch64::WZR,
3781                                   /*IsKill=*/true, getInvertedCondCode(CC));
3782     (void)ResultReg2;
3783     assert((ResultReg1 + 1) == ResultReg2 &&
3784            "Nonconsecutive result registers.");
3785     updateValueMap(II, ResultReg1, 2);
3786     return true;
3787   }
3788   }
3789   return false;
3790 }
3791 
3792 bool AArch64FastISel::selectRet(const Instruction *I) {
3793   const ReturnInst *Ret = cast<ReturnInst>(I);
3794   const Function &F = *I->getParent()->getParent();
3795 
3796   if (!FuncInfo.CanLowerReturn)
3797     return false;
3798 
3799   if (F.isVarArg())
3800     return false;
3801 
3802   if (TLI.supportSwiftError() &&
3803       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3804     return false;
3805 
3806   if (TLI.supportSplitCSR(FuncInfo.MF))
3807     return false;
3808 
3809   // Build a list of return value registers.
3810   SmallVector<unsigned, 4> RetRegs;
3811 
3812   if (Ret->getNumOperands() > 0) {
3813     CallingConv::ID CC = F.getCallingConv();
3814     SmallVector<ISD::OutputArg, 4> Outs;
3815     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3816 
3817     // Analyze operands of the call, assigning locations to each operand.
3818     SmallVector<CCValAssign, 16> ValLocs;
3819     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3820     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3821                                                      : RetCC_AArch64_AAPCS;
3822     CCInfo.AnalyzeReturn(Outs, RetCC);
3823 
3824     // Only handle a single return value for now.
3825     if (ValLocs.size() != 1)
3826       return false;
3827 
3828     CCValAssign &VA = ValLocs[0];
3829     const Value *RV = Ret->getOperand(0);
3830 
3831     // Don't bother handling odd stuff for now.
3832     if ((VA.getLocInfo() != CCValAssign::Full) &&
3833         (VA.getLocInfo() != CCValAssign::BCvt))
3834       return false;
3835 
3836     // Only handle register returns for now.
3837     if (!VA.isRegLoc())
3838       return false;
3839 
3840     unsigned Reg = getRegForValue(RV);
3841     if (Reg == 0)
3842       return false;
3843 
3844     unsigned SrcReg = Reg + VA.getValNo();
3845     unsigned DestReg = VA.getLocReg();
3846     // Avoid a cross-class copy. This is very unlikely.
3847     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3848       return false;
3849 
3850     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3851     if (!RVEVT.isSimple())
3852       return false;
3853 
3854     // Vectors (of > 1 lane) in big endian need tricky handling.
3855     if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 &&
3856         !Subtarget->isLittleEndian())
3857       return false;
3858 
3859     MVT RVVT = RVEVT.getSimpleVT();
3860     if (RVVT == MVT::f128)
3861       return false;
3862 
3863     MVT DestVT = VA.getValVT();
3864     // Special handling for extended integers.
3865     if (RVVT != DestVT) {
3866       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3867         return false;
3868 
3869       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3870         return false;
3871 
3872       bool IsZExt = Outs[0].Flags.isZExt();
3873       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3874       if (SrcReg == 0)
3875         return false;
3876     }
3877 
3878     // Make the copy.
3879     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3880             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3881 
3882     // Add register to return instruction.
3883     RetRegs.push_back(VA.getLocReg());
3884   }
3885 
3886   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3887                                     TII.get(AArch64::RET_ReallyLR));
3888   for (unsigned RetReg : RetRegs)
3889     MIB.addReg(RetReg, RegState::Implicit);
3890   return true;
3891 }
3892 
3893 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3894   Type *DestTy = I->getType();
3895   Value *Op = I->getOperand(0);
3896   Type *SrcTy = Op->getType();
3897 
3898   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3899   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3900   if (!SrcEVT.isSimple())
3901     return false;
3902   if (!DestEVT.isSimple())
3903     return false;
3904 
3905   MVT SrcVT = SrcEVT.getSimpleVT();
3906   MVT DestVT = DestEVT.getSimpleVT();
3907 
3908   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3909       SrcVT != MVT::i8)
3910     return false;
3911   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3912       DestVT != MVT::i1)
3913     return false;
3914 
3915   unsigned SrcReg = getRegForValue(Op);
3916   if (!SrcReg)
3917     return false;
3918   bool SrcIsKill = hasTrivialKill(Op);
3919 
3920   // If we're truncating from i64 to a smaller non-legal type then generate an
3921   // AND. Otherwise, we know the high bits are undefined and a truncate only
3922   // generate a COPY. We cannot mark the source register also as result
3923   // register, because this can incorrectly transfer the kill flag onto the
3924   // source register.
3925   unsigned ResultReg;
3926   if (SrcVT == MVT::i64) {
3927     uint64_t Mask = 0;
3928     switch (DestVT.SimpleTy) {
3929     default:
3930       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3931       return false;
3932     case MVT::i1:
3933       Mask = 0x1;
3934       break;
3935     case MVT::i8:
3936       Mask = 0xff;
3937       break;
3938     case MVT::i16:
3939       Mask = 0xffff;
3940       break;
3941     }
3942     // Issue an extract_subreg to get the lower 32-bits.
3943     unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill,
3944                                                 AArch64::sub_32);
3945     // Create the AND instruction which performs the actual truncation.
3946     ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask);
3947     assert(ResultReg && "Unexpected AND instruction emission failure.");
3948   } else {
3949     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3950     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3951             TII.get(TargetOpcode::COPY), ResultReg)
3952         .addReg(SrcReg, getKillRegState(SrcIsKill));
3953   }
3954 
3955   updateValueMap(I, ResultReg);
3956   return true;
3957 }
3958 
3959 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3960   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3961           DestVT == MVT::i64) &&
3962          "Unexpected value type.");
3963   // Handle i8 and i16 as i32.
3964   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3965     DestVT = MVT::i32;
3966 
3967   if (IsZExt) {
3968     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1);
3969     assert(ResultReg && "Unexpected AND instruction emission failure.");
3970     if (DestVT == MVT::i64) {
3971       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3972       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3973       unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3974       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3975               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3976           .addImm(0)
3977           .addReg(ResultReg)
3978           .addImm(AArch64::sub_32);
3979       ResultReg = Reg64;
3980     }
3981     return ResultReg;
3982   } else {
3983     if (DestVT == MVT::i64) {
3984       // FIXME: We're SExt i1 to i64.
3985       return 0;
3986     }
3987     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3988                             /*TODO:IsKill=*/false, 0, 0);
3989   }
3990 }
3991 
3992 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
3993                                       unsigned Op1, bool Op1IsKill) {
3994   unsigned Opc, ZReg;
3995   switch (RetVT.SimpleTy) {
3996   default: return 0;
3997   case MVT::i8:
3998   case MVT::i16:
3999   case MVT::i32:
4000     RetVT = MVT::i32;
4001     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4002   case MVT::i64:
4003     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4004   }
4005 
4006   const TargetRegisterClass *RC =
4007       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4008   return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill,
4009                           /*IsKill=*/ZReg, true);
4010 }
4011 
4012 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4013                                         unsigned Op1, bool Op1IsKill) {
4014   if (RetVT != MVT::i64)
4015     return 0;
4016 
4017   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4018                           Op0, Op0IsKill, Op1, Op1IsKill,
4019                           AArch64::XZR, /*IsKill=*/true);
4020 }
4021 
4022 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill,
4023                                         unsigned Op1, bool Op1IsKill) {
4024   if (RetVT != MVT::i64)
4025     return 0;
4026 
4027   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4028                           Op0, Op0IsKill, Op1, Op1IsKill,
4029                           AArch64::XZR, /*IsKill=*/true);
4030 }
4031 
4032 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4033                                      unsigned Op1Reg, bool Op1IsKill) {
4034   unsigned Opc = 0;
4035   bool NeedTrunc = false;
4036   uint64_t Mask = 0;
4037   switch (RetVT.SimpleTy) {
4038   default: return 0;
4039   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4040   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4041   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4042   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4043   }
4044 
4045   const TargetRegisterClass *RC =
4046       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4047   if (NeedTrunc) {
4048     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4049     Op1IsKill = true;
4050   }
4051   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4052                                        Op1IsKill);
4053   if (NeedTrunc)
4054     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4055   return ResultReg;
4056 }
4057 
4058 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4059                                      bool Op0IsKill, uint64_t Shift,
4060                                      bool IsZExt) {
4061   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4062          "Unexpected source/return type pair.");
4063   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4064           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4065          "Unexpected source value type.");
4066   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4067           RetVT == MVT::i64) && "Unexpected return value type.");
4068 
4069   bool Is64Bit = (RetVT == MVT::i64);
4070   unsigned RegSize = Is64Bit ? 64 : 32;
4071   unsigned DstBits = RetVT.getSizeInBits();
4072   unsigned SrcBits = SrcVT.getSizeInBits();
4073   const TargetRegisterClass *RC =
4074       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4075 
4076   // Just emit a copy for "zero" shifts.
4077   if (Shift == 0) {
4078     if (RetVT == SrcVT) {
4079       unsigned ResultReg = createResultReg(RC);
4080       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4081               TII.get(TargetOpcode::COPY), ResultReg)
4082           .addReg(Op0, getKillRegState(Op0IsKill));
4083       return ResultReg;
4084     } else
4085       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4086   }
4087 
4088   // Don't deal with undefined shifts.
4089   if (Shift >= DstBits)
4090     return 0;
4091 
4092   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4093   // {S|U}BFM Wd, Wn, #r, #s
4094   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4095 
4096   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4097   // %2 = shl i16 %1, 4
4098   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4099   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4100   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4101   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4102 
4103   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4104   // %2 = shl i16 %1, 8
4105   // Wd<32+7-24,32-24> = Wn<7:0>
4106   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4107   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4108   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4109 
4110   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4111   // %2 = shl i16 %1, 12
4112   // Wd<32+3-20,32-20> = Wn<3:0>
4113   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4114   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4115   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4116 
4117   unsigned ImmR = RegSize - Shift;
4118   // Limit the width to the length of the source type.
4119   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4120   static const unsigned OpcTable[2][2] = {
4121     {AArch64::SBFMWri, AArch64::SBFMXri},
4122     {AArch64::UBFMWri, AArch64::UBFMXri}
4123   };
4124   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4125   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4126     unsigned TmpReg = MRI.createVirtualRegister(RC);
4127     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4128             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4129         .addImm(0)
4130         .addReg(Op0, getKillRegState(Op0IsKill))
4131         .addImm(AArch64::sub_32);
4132     Op0 = TmpReg;
4133     Op0IsKill = true;
4134   }
4135   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4136 }
4137 
4138 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4139                                      unsigned Op1Reg, bool Op1IsKill) {
4140   unsigned Opc = 0;
4141   bool NeedTrunc = false;
4142   uint64_t Mask = 0;
4143   switch (RetVT.SimpleTy) {
4144   default: return 0;
4145   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4146   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4147   case MVT::i32: Opc = AArch64::LSRVWr; break;
4148   case MVT::i64: Opc = AArch64::LSRVXr; break;
4149   }
4150 
4151   const TargetRegisterClass *RC =
4152       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4153   if (NeedTrunc) {
4154     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask);
4155     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4156     Op0IsKill = Op1IsKill = true;
4157   }
4158   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4159                                        Op1IsKill);
4160   if (NeedTrunc)
4161     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4162   return ResultReg;
4163 }
4164 
4165 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4166                                      bool Op0IsKill, uint64_t Shift,
4167                                      bool IsZExt) {
4168   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4169          "Unexpected source/return type pair.");
4170   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4171           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4172          "Unexpected source value type.");
4173   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4174           RetVT == MVT::i64) && "Unexpected return value type.");
4175 
4176   bool Is64Bit = (RetVT == MVT::i64);
4177   unsigned RegSize = Is64Bit ? 64 : 32;
4178   unsigned DstBits = RetVT.getSizeInBits();
4179   unsigned SrcBits = SrcVT.getSizeInBits();
4180   const TargetRegisterClass *RC =
4181       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4182 
4183   // Just emit a copy for "zero" shifts.
4184   if (Shift == 0) {
4185     if (RetVT == SrcVT) {
4186       unsigned ResultReg = createResultReg(RC);
4187       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4188               TII.get(TargetOpcode::COPY), ResultReg)
4189       .addReg(Op0, getKillRegState(Op0IsKill));
4190       return ResultReg;
4191     } else
4192       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4193   }
4194 
4195   // Don't deal with undefined shifts.
4196   if (Shift >= DstBits)
4197     return 0;
4198 
4199   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4200   // {S|U}BFM Wd, Wn, #r, #s
4201   // Wd<s-r:0> = Wn<s:r> when r <= s
4202 
4203   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4204   // %2 = lshr i16 %1, 4
4205   // Wd<7-4:0> = Wn<7:4>
4206   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4207   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4208   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4209 
4210   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4211   // %2 = lshr i16 %1, 8
4212   // Wd<7-7,0> = Wn<7:7>
4213   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4214   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4215   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4216 
4217   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4218   // %2 = lshr i16 %1, 12
4219   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4220   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4221   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4222   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4223 
4224   if (Shift >= SrcBits && IsZExt)
4225     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4226 
4227   // It is not possible to fold a sign-extend into the LShr instruction. In this
4228   // case emit a sign-extend.
4229   if (!IsZExt) {
4230     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4231     if (!Op0)
4232       return 0;
4233     Op0IsKill = true;
4234     SrcVT = RetVT;
4235     SrcBits = SrcVT.getSizeInBits();
4236     IsZExt = true;
4237   }
4238 
4239   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4240   unsigned ImmS = SrcBits - 1;
4241   static const unsigned OpcTable[2][2] = {
4242     {AArch64::SBFMWri, AArch64::SBFMXri},
4243     {AArch64::UBFMWri, AArch64::UBFMXri}
4244   };
4245   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4246   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4247     unsigned TmpReg = MRI.createVirtualRegister(RC);
4248     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4249             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4250         .addImm(0)
4251         .addReg(Op0, getKillRegState(Op0IsKill))
4252         .addImm(AArch64::sub_32);
4253     Op0 = TmpReg;
4254     Op0IsKill = true;
4255   }
4256   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4257 }
4258 
4259 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill,
4260                                      unsigned Op1Reg, bool Op1IsKill) {
4261   unsigned Opc = 0;
4262   bool NeedTrunc = false;
4263   uint64_t Mask = 0;
4264   switch (RetVT.SimpleTy) {
4265   default: return 0;
4266   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4267   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4268   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4269   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4270   }
4271 
4272   const TargetRegisterClass *RC =
4273       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4274   if (NeedTrunc) {
4275     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4276     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask);
4277     Op0IsKill = Op1IsKill = true;
4278   }
4279   unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg,
4280                                        Op1IsKill);
4281   if (NeedTrunc)
4282     ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask);
4283   return ResultReg;
4284 }
4285 
4286 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4287                                      bool Op0IsKill, uint64_t Shift,
4288                                      bool IsZExt) {
4289   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4290          "Unexpected source/return type pair.");
4291   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4292           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4293          "Unexpected source value type.");
4294   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4295           RetVT == MVT::i64) && "Unexpected return value type.");
4296 
4297   bool Is64Bit = (RetVT == MVT::i64);
4298   unsigned RegSize = Is64Bit ? 64 : 32;
4299   unsigned DstBits = RetVT.getSizeInBits();
4300   unsigned SrcBits = SrcVT.getSizeInBits();
4301   const TargetRegisterClass *RC =
4302       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4303 
4304   // Just emit a copy for "zero" shifts.
4305   if (Shift == 0) {
4306     if (RetVT == SrcVT) {
4307       unsigned ResultReg = createResultReg(RC);
4308       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4309               TII.get(TargetOpcode::COPY), ResultReg)
4310       .addReg(Op0, getKillRegState(Op0IsKill));
4311       return ResultReg;
4312     } else
4313       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4314   }
4315 
4316   // Don't deal with undefined shifts.
4317   if (Shift >= DstBits)
4318     return 0;
4319 
4320   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4321   // {S|U}BFM Wd, Wn, #r, #s
4322   // Wd<s-r:0> = Wn<s:r> when r <= s
4323 
4324   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4325   // %2 = ashr i16 %1, 4
4326   // Wd<7-4:0> = Wn<7:4>
4327   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4328   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4329   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4330 
4331   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4332   // %2 = ashr i16 %1, 8
4333   // Wd<7-7,0> = Wn<7:7>
4334   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4335   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4336   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4337 
4338   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4339   // %2 = ashr i16 %1, 12
4340   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4341   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4342   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4343   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4344 
4345   if (Shift >= SrcBits && IsZExt)
4346     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4347 
4348   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4349   unsigned ImmS = SrcBits - 1;
4350   static const unsigned OpcTable[2][2] = {
4351     {AArch64::SBFMWri, AArch64::SBFMXri},
4352     {AArch64::UBFMWri, AArch64::UBFMXri}
4353   };
4354   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4355   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4356     unsigned TmpReg = MRI.createVirtualRegister(RC);
4357     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4358             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4359         .addImm(0)
4360         .addReg(Op0, getKillRegState(Op0IsKill))
4361         .addImm(AArch64::sub_32);
4362     Op0 = TmpReg;
4363     Op0IsKill = true;
4364   }
4365   return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS);
4366 }
4367 
4368 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4369                                      bool IsZExt) {
4370   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4371 
4372   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4373   // DestVT are odd things, so test to make sure that they are both types we can
4374   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4375   // bail out to SelectionDAG.
4376   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4377        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4378       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4379        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4380     return 0;
4381 
4382   unsigned Opc;
4383   unsigned Imm = 0;
4384 
4385   switch (SrcVT.SimpleTy) {
4386   default:
4387     return 0;
4388   case MVT::i1:
4389     return emiti1Ext(SrcReg, DestVT, IsZExt);
4390   case MVT::i8:
4391     if (DestVT == MVT::i64)
4392       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4393     else
4394       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4395     Imm = 7;
4396     break;
4397   case MVT::i16:
4398     if (DestVT == MVT::i64)
4399       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4400     else
4401       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4402     Imm = 15;
4403     break;
4404   case MVT::i32:
4405     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4406     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4407     Imm = 31;
4408     break;
4409   }
4410 
4411   // Handle i8 and i16 as i32.
4412   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4413     DestVT = MVT::i32;
4414   else if (DestVT == MVT::i64) {
4415     unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4416     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4417             TII.get(AArch64::SUBREG_TO_REG), Src64)
4418         .addImm(0)
4419         .addReg(SrcReg)
4420         .addImm(AArch64::sub_32);
4421     SrcReg = Src64;
4422   }
4423 
4424   const TargetRegisterClass *RC =
4425       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4426   return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm);
4427 }
4428 
4429 static bool isZExtLoad(const MachineInstr *LI) {
4430   switch (LI->getOpcode()) {
4431   default:
4432     return false;
4433   case AArch64::LDURBBi:
4434   case AArch64::LDURHHi:
4435   case AArch64::LDURWi:
4436   case AArch64::LDRBBui:
4437   case AArch64::LDRHHui:
4438   case AArch64::LDRWui:
4439   case AArch64::LDRBBroX:
4440   case AArch64::LDRHHroX:
4441   case AArch64::LDRWroX:
4442   case AArch64::LDRBBroW:
4443   case AArch64::LDRHHroW:
4444   case AArch64::LDRWroW:
4445     return true;
4446   }
4447 }
4448 
4449 static bool isSExtLoad(const MachineInstr *LI) {
4450   switch (LI->getOpcode()) {
4451   default:
4452     return false;
4453   case AArch64::LDURSBWi:
4454   case AArch64::LDURSHWi:
4455   case AArch64::LDURSBXi:
4456   case AArch64::LDURSHXi:
4457   case AArch64::LDURSWi:
4458   case AArch64::LDRSBWui:
4459   case AArch64::LDRSHWui:
4460   case AArch64::LDRSBXui:
4461   case AArch64::LDRSHXui:
4462   case AArch64::LDRSWui:
4463   case AArch64::LDRSBWroX:
4464   case AArch64::LDRSHWroX:
4465   case AArch64::LDRSBXroX:
4466   case AArch64::LDRSHXroX:
4467   case AArch64::LDRSWroX:
4468   case AArch64::LDRSBWroW:
4469   case AArch64::LDRSHWroW:
4470   case AArch64::LDRSBXroW:
4471   case AArch64::LDRSHXroW:
4472   case AArch64::LDRSWroW:
4473     return true;
4474   }
4475 }
4476 
4477 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4478                                          MVT SrcVT) {
4479   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4480   if (!LI || !LI->hasOneUse())
4481     return false;
4482 
4483   // Check if the load instruction has already been selected.
4484   unsigned Reg = lookUpRegForValue(LI);
4485   if (!Reg)
4486     return false;
4487 
4488   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4489   if (!MI)
4490     return false;
4491 
4492   // Check if the correct load instruction has been emitted - SelectionDAG might
4493   // have emitted a zero-extending load, but we need a sign-extending load.
4494   bool IsZExt = isa<ZExtInst>(I);
4495   const auto *LoadMI = MI;
4496   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4497       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4498     unsigned LoadReg = MI->getOperand(1).getReg();
4499     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4500     assert(LoadMI && "Expected valid instruction");
4501   }
4502   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4503     return false;
4504 
4505   // Nothing to be done.
4506   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4507     updateValueMap(I, Reg);
4508     return true;
4509   }
4510 
4511   if (IsZExt) {
4512     unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass);
4513     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4514             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4515         .addImm(0)
4516         .addReg(Reg, getKillRegState(true))
4517         .addImm(AArch64::sub_32);
4518     Reg = Reg64;
4519   } else {
4520     assert((MI->getOpcode() == TargetOpcode::COPY &&
4521             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4522            "Expected copy instruction");
4523     Reg = MI->getOperand(1).getReg();
4524     MachineBasicBlock::iterator I(MI);
4525     removeDeadCode(I, std::next(I));
4526   }
4527   updateValueMap(I, Reg);
4528   return true;
4529 }
4530 
4531 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4532   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4533          "Unexpected integer extend instruction.");
4534   MVT RetVT;
4535   MVT SrcVT;
4536   if (!isTypeSupported(I->getType(), RetVT))
4537     return false;
4538 
4539   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4540     return false;
4541 
4542   // Try to optimize already sign-/zero-extended values from load instructions.
4543   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4544     return true;
4545 
4546   unsigned SrcReg = getRegForValue(I->getOperand(0));
4547   if (!SrcReg)
4548     return false;
4549   bool SrcIsKill = hasTrivialKill(I->getOperand(0));
4550 
4551   // Try to optimize already sign-/zero-extended values from function arguments.
4552   bool IsZExt = isa<ZExtInst>(I);
4553   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4554     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4555       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4556         unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass);
4557         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4558                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4559             .addImm(0)
4560             .addReg(SrcReg, getKillRegState(SrcIsKill))
4561             .addImm(AArch64::sub_32);
4562         SrcReg = ResultReg;
4563       }
4564       // Conservatively clear all kill flags from all uses, because we are
4565       // replacing a sign-/zero-extend instruction at IR level with a nop at MI
4566       // level. The result of the instruction at IR level might have been
4567       // trivially dead, which is now not longer true.
4568       unsigned UseReg = lookUpRegForValue(I);
4569       if (UseReg)
4570         MRI.clearKillFlags(UseReg);
4571 
4572       updateValueMap(I, SrcReg);
4573       return true;
4574     }
4575   }
4576 
4577   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4578   if (!ResultReg)
4579     return false;
4580 
4581   updateValueMap(I, ResultReg);
4582   return true;
4583 }
4584 
4585 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4586   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4587   if (!DestEVT.isSimple())
4588     return false;
4589 
4590   MVT DestVT = DestEVT.getSimpleVT();
4591   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4592     return false;
4593 
4594   unsigned DivOpc;
4595   bool Is64bit = (DestVT == MVT::i64);
4596   switch (ISDOpcode) {
4597   default:
4598     return false;
4599   case ISD::SREM:
4600     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4601     break;
4602   case ISD::UREM:
4603     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4604     break;
4605   }
4606   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4607   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4608   if (!Src0Reg)
4609     return false;
4610   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4611 
4612   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4613   if (!Src1Reg)
4614     return false;
4615   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4616 
4617   const TargetRegisterClass *RC =
4618       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4619   unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false,
4620                                      Src1Reg, /*IsKill=*/false);
4621   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4622   // The remainder is computed as numerator - (quotient * denominator) using the
4623   // MSUB instruction.
4624   unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true,
4625                                         Src1Reg, Src1IsKill, Src0Reg,
4626                                         Src0IsKill);
4627   updateValueMap(I, ResultReg);
4628   return true;
4629 }
4630 
4631 bool AArch64FastISel::selectMul(const Instruction *I) {
4632   MVT VT;
4633   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4634     return false;
4635 
4636   if (VT.isVector())
4637     return selectBinaryOp(I, ISD::MUL);
4638 
4639   const Value *Src0 = I->getOperand(0);
4640   const Value *Src1 = I->getOperand(1);
4641   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4642     if (C->getValue().isPowerOf2())
4643       std::swap(Src0, Src1);
4644 
4645   // Try to simplify to a shift instruction.
4646   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4647     if (C->getValue().isPowerOf2()) {
4648       uint64_t ShiftVal = C->getValue().logBase2();
4649       MVT SrcVT = VT;
4650       bool IsZExt = true;
4651       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4652         if (!isIntExtFree(ZExt)) {
4653           MVT VT;
4654           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4655             SrcVT = VT;
4656             IsZExt = true;
4657             Src0 = ZExt->getOperand(0);
4658           }
4659         }
4660       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4661         if (!isIntExtFree(SExt)) {
4662           MVT VT;
4663           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4664             SrcVT = VT;
4665             IsZExt = false;
4666             Src0 = SExt->getOperand(0);
4667           }
4668         }
4669       }
4670 
4671       unsigned Src0Reg = getRegForValue(Src0);
4672       if (!Src0Reg)
4673         return false;
4674       bool Src0IsKill = hasTrivialKill(Src0);
4675 
4676       unsigned ResultReg =
4677           emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt);
4678 
4679       if (ResultReg) {
4680         updateValueMap(I, ResultReg);
4681         return true;
4682       }
4683     }
4684 
4685   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4686   if (!Src0Reg)
4687     return false;
4688   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4689 
4690   unsigned Src1Reg = getRegForValue(I->getOperand(1));
4691   if (!Src1Reg)
4692     return false;
4693   bool Src1IsKill = hasTrivialKill(I->getOperand(1));
4694 
4695   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill);
4696 
4697   if (!ResultReg)
4698     return false;
4699 
4700   updateValueMap(I, ResultReg);
4701   return true;
4702 }
4703 
4704 bool AArch64FastISel::selectShift(const Instruction *I) {
4705   MVT RetVT;
4706   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4707     return false;
4708 
4709   if (RetVT.isVector())
4710     return selectOperator(I, I->getOpcode());
4711 
4712   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4713     unsigned ResultReg = 0;
4714     uint64_t ShiftVal = C->getZExtValue();
4715     MVT SrcVT = RetVT;
4716     bool IsZExt = I->getOpcode() != Instruction::AShr;
4717     const Value *Op0 = I->getOperand(0);
4718     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4719       if (!isIntExtFree(ZExt)) {
4720         MVT TmpVT;
4721         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4722           SrcVT = TmpVT;
4723           IsZExt = true;
4724           Op0 = ZExt->getOperand(0);
4725         }
4726       }
4727     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4728       if (!isIntExtFree(SExt)) {
4729         MVT TmpVT;
4730         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4731           SrcVT = TmpVT;
4732           IsZExt = false;
4733           Op0 = SExt->getOperand(0);
4734         }
4735       }
4736     }
4737 
4738     unsigned Op0Reg = getRegForValue(Op0);
4739     if (!Op0Reg)
4740       return false;
4741     bool Op0IsKill = hasTrivialKill(Op0);
4742 
4743     switch (I->getOpcode()) {
4744     default: llvm_unreachable("Unexpected instruction.");
4745     case Instruction::Shl:
4746       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4747       break;
4748     case Instruction::AShr:
4749       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4750       break;
4751     case Instruction::LShr:
4752       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt);
4753       break;
4754     }
4755     if (!ResultReg)
4756       return false;
4757 
4758     updateValueMap(I, ResultReg);
4759     return true;
4760   }
4761 
4762   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4763   if (!Op0Reg)
4764     return false;
4765   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4766 
4767   unsigned Op1Reg = getRegForValue(I->getOperand(1));
4768   if (!Op1Reg)
4769     return false;
4770   bool Op1IsKill = hasTrivialKill(I->getOperand(1));
4771 
4772   unsigned ResultReg = 0;
4773   switch (I->getOpcode()) {
4774   default: llvm_unreachable("Unexpected instruction.");
4775   case Instruction::Shl:
4776     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4777     break;
4778   case Instruction::AShr:
4779     ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4780     break;
4781   case Instruction::LShr:
4782     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill);
4783     break;
4784   }
4785 
4786   if (!ResultReg)
4787     return false;
4788 
4789   updateValueMap(I, ResultReg);
4790   return true;
4791 }
4792 
4793 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4794   MVT RetVT, SrcVT;
4795 
4796   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4797     return false;
4798   if (!isTypeLegal(I->getType(), RetVT))
4799     return false;
4800 
4801   unsigned Opc;
4802   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4803     Opc = AArch64::FMOVWSr;
4804   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4805     Opc = AArch64::FMOVXDr;
4806   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4807     Opc = AArch64::FMOVSWr;
4808   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4809     Opc = AArch64::FMOVDXr;
4810   else
4811     return false;
4812 
4813   const TargetRegisterClass *RC = nullptr;
4814   switch (RetVT.SimpleTy) {
4815   default: llvm_unreachable("Unexpected value type.");
4816   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4817   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4818   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4819   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4820   }
4821   unsigned Op0Reg = getRegForValue(I->getOperand(0));
4822   if (!Op0Reg)
4823     return false;
4824   bool Op0IsKill = hasTrivialKill(I->getOperand(0));
4825   unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill);
4826 
4827   if (!ResultReg)
4828     return false;
4829 
4830   updateValueMap(I, ResultReg);
4831   return true;
4832 }
4833 
4834 bool AArch64FastISel::selectFRem(const Instruction *I) {
4835   MVT RetVT;
4836   if (!isTypeLegal(I->getType(), RetVT))
4837     return false;
4838 
4839   RTLIB::Libcall LC;
4840   switch (RetVT.SimpleTy) {
4841   default:
4842     return false;
4843   case MVT::f32:
4844     LC = RTLIB::REM_F32;
4845     break;
4846   case MVT::f64:
4847     LC = RTLIB::REM_F64;
4848     break;
4849   }
4850 
4851   ArgListTy Args;
4852   Args.reserve(I->getNumOperands());
4853 
4854   // Populate the argument list.
4855   for (auto &Arg : I->operands()) {
4856     ArgListEntry Entry;
4857     Entry.Val = Arg;
4858     Entry.Ty = Arg->getType();
4859     Args.push_back(Entry);
4860   }
4861 
4862   CallLoweringInfo CLI;
4863   MCContext &Ctx = MF->getContext();
4864   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4865                 TLI.getLibcallName(LC), std::move(Args));
4866   if (!lowerCallTo(CLI))
4867     return false;
4868   updateValueMap(I, CLI.ResultReg);
4869   return true;
4870 }
4871 
4872 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4873   MVT VT;
4874   if (!isTypeLegal(I->getType(), VT))
4875     return false;
4876 
4877   if (!isa<ConstantInt>(I->getOperand(1)))
4878     return selectBinaryOp(I, ISD::SDIV);
4879 
4880   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4881   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4882       !(C.isPowerOf2() || (-C).isPowerOf2()))
4883     return selectBinaryOp(I, ISD::SDIV);
4884 
4885   unsigned Lg2 = C.countTrailingZeros();
4886   unsigned Src0Reg = getRegForValue(I->getOperand(0));
4887   if (!Src0Reg)
4888     return false;
4889   bool Src0IsKill = hasTrivialKill(I->getOperand(0));
4890 
4891   if (cast<BinaryOperator>(I)->isExact()) {
4892     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
4893     if (!ResultReg)
4894       return false;
4895     updateValueMap(I, ResultReg);
4896     return true;
4897   }
4898 
4899   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4900   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne);
4901   if (!AddReg)
4902     return false;
4903 
4904   // (Src0 < 0) ? Pow2 - 1 : 0;
4905   if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
4906     return false;
4907 
4908   unsigned SelectOpc;
4909   const TargetRegisterClass *RC;
4910   if (VT == MVT::i64) {
4911     SelectOpc = AArch64::CSELXr;
4912     RC = &AArch64::GPR64RegClass;
4913   } else {
4914     SelectOpc = AArch64::CSELWr;
4915     RC = &AArch64::GPR32RegClass;
4916   }
4917   unsigned SelectReg =
4918       fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
4919                        Src0IsKill, AArch64CC::LT);
4920   if (!SelectReg)
4921     return false;
4922 
4923   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4924   // negate the result.
4925   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4926   unsigned ResultReg;
4927   if (C.isNegative())
4928     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
4929                               SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
4930   else
4931     ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
4932 
4933   if (!ResultReg)
4934     return false;
4935 
4936   updateValueMap(I, ResultReg);
4937   return true;
4938 }
4939 
4940 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4941 /// have to duplicate it for AArch64, because otherwise we would fail during the
4942 /// sign-extend emission.
4943 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4944   unsigned IdxN = getRegForValue(Idx);
4945   if (IdxN == 0)
4946     // Unhandled operand. Halt "fast" selection and bail.
4947     return std::pair<unsigned, bool>(0, false);
4948 
4949   bool IdxNIsKill = hasTrivialKill(Idx);
4950 
4951   // If the index is smaller or larger than intptr_t, truncate or extend it.
4952   MVT PtrVT = TLI.getPointerTy(DL);
4953   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4954   if (IdxVT.bitsLT(PtrVT)) {
4955     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4956     IdxNIsKill = true;
4957   } else if (IdxVT.bitsGT(PtrVT))
4958     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4959   return std::pair<unsigned, bool>(IdxN, IdxNIsKill);
4960 }
4961 
4962 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4963 /// duplicate it for AArch64, because otherwise we would bail out even for
4964 /// simple cases. This is because the standard fastEmit functions don't cover
4965 /// MUL at all and ADD is lowered very inefficientily.
4966 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4967   unsigned N = getRegForValue(I->getOperand(0));
4968   if (!N)
4969     return false;
4970   bool NIsKill = hasTrivialKill(I->getOperand(0));
4971 
4972   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4973   // into a single N = N + TotalOffset.
4974   uint64_t TotalOffs = 0;
4975   MVT VT = TLI.getPointerTy(DL);
4976   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4977        GTI != E; ++GTI) {
4978     const Value *Idx = GTI.getOperand();
4979     if (auto *StTy = GTI.getStructTypeOrNull()) {
4980       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4981       // N = N + Offset
4982       if (Field)
4983         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4984     } else {
4985       Type *Ty = GTI.getIndexedType();
4986 
4987       // If this is a constant subscript, handle it quickly.
4988       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4989         if (CI->isZero())
4990           continue;
4991         // N = N + Offset
4992         TotalOffs +=
4993             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4994         continue;
4995       }
4996       if (TotalOffs) {
4997         N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
4998         if (!N)
4999           return false;
5000         NIsKill = true;
5001         TotalOffs = 0;
5002       }
5003 
5004       // N = N + Idx * ElementSize;
5005       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
5006       std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
5007       unsigned IdxN = Pair.first;
5008       bool IdxNIsKill = Pair.second;
5009       if (!IdxN)
5010         return false;
5011 
5012       if (ElementSize != 1) {
5013         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5014         if (!C)
5015           return false;
5016         IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true);
5017         if (!IdxN)
5018           return false;
5019         IdxNIsKill = true;
5020       }
5021       N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill);
5022       if (!N)
5023         return false;
5024     }
5025   }
5026   if (TotalOffs) {
5027     N = emitAdd_ri_(VT, N, NIsKill, TotalOffs);
5028     if (!N)
5029       return false;
5030   }
5031   updateValueMap(I, N);
5032   return true;
5033 }
5034 
5035 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5036   assert(TM.getOptLevel() == CodeGenOpt::None &&
5037          "cmpxchg survived AtomicExpand at optlevel > -O0");
5038 
5039   auto *RetPairTy = cast<StructType>(I->getType());
5040   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5041   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5042          "cmpxchg has a non-i1 status result");
5043 
5044   MVT VT;
5045   if (!isTypeLegal(RetTy, VT))
5046     return false;
5047 
5048   const TargetRegisterClass *ResRC;
5049   unsigned Opc, CmpOpc;
5050   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5051   // extractvalue selection doesn't support that.
5052   if (VT == MVT::i32) {
5053     Opc = AArch64::CMP_SWAP_32;
5054     CmpOpc = AArch64::SUBSWrs;
5055     ResRC = &AArch64::GPR32RegClass;
5056   } else if (VT == MVT::i64) {
5057     Opc = AArch64::CMP_SWAP_64;
5058     CmpOpc = AArch64::SUBSXrs;
5059     ResRC = &AArch64::GPR64RegClass;
5060   } else {
5061     return false;
5062   }
5063 
5064   const MCInstrDesc &II = TII.get(Opc);
5065 
5066   const unsigned AddrReg = constrainOperandRegClass(
5067       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5068   const unsigned DesiredReg = constrainOperandRegClass(
5069       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5070   const unsigned NewReg = constrainOperandRegClass(
5071       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5072 
5073   const unsigned ResultReg1 = createResultReg(ResRC);
5074   const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5075   const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5076 
5077   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5078   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5079       .addDef(ResultReg1)
5080       .addDef(ScratchReg)
5081       .addUse(AddrReg)
5082       .addUse(DesiredReg)
5083       .addUse(NewReg);
5084 
5085   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5086       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5087       .addUse(ResultReg1)
5088       .addUse(DesiredReg)
5089       .addImm(0);
5090 
5091   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5092       .addDef(ResultReg2)
5093       .addUse(AArch64::WZR)
5094       .addUse(AArch64::WZR)
5095       .addImm(AArch64CC::NE);
5096 
5097   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5098   updateValueMap(I, ResultReg1, 2);
5099   return true;
5100 }
5101 
5102 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5103   switch (I->getOpcode()) {
5104   default:
5105     break;
5106   case Instruction::Add:
5107   case Instruction::Sub:
5108     return selectAddSub(I);
5109   case Instruction::Mul:
5110     return selectMul(I);
5111   case Instruction::SDiv:
5112     return selectSDiv(I);
5113   case Instruction::SRem:
5114     if (!selectBinaryOp(I, ISD::SREM))
5115       return selectRem(I, ISD::SREM);
5116     return true;
5117   case Instruction::URem:
5118     if (!selectBinaryOp(I, ISD::UREM))
5119       return selectRem(I, ISD::UREM);
5120     return true;
5121   case Instruction::Shl:
5122   case Instruction::LShr:
5123   case Instruction::AShr:
5124     return selectShift(I);
5125   case Instruction::And:
5126   case Instruction::Or:
5127   case Instruction::Xor:
5128     return selectLogicalOp(I);
5129   case Instruction::Br:
5130     return selectBranch(I);
5131   case Instruction::IndirectBr:
5132     return selectIndirectBr(I);
5133   case Instruction::BitCast:
5134     if (!FastISel::selectBitCast(I))
5135       return selectBitCast(I);
5136     return true;
5137   case Instruction::FPToSI:
5138     if (!selectCast(I, ISD::FP_TO_SINT))
5139       return selectFPToInt(I, /*Signed=*/true);
5140     return true;
5141   case Instruction::FPToUI:
5142     return selectFPToInt(I, /*Signed=*/false);
5143   case Instruction::ZExt:
5144   case Instruction::SExt:
5145     return selectIntExt(I);
5146   case Instruction::Trunc:
5147     if (!selectCast(I, ISD::TRUNCATE))
5148       return selectTrunc(I);
5149     return true;
5150   case Instruction::FPExt:
5151     return selectFPExt(I);
5152   case Instruction::FPTrunc:
5153     return selectFPTrunc(I);
5154   case Instruction::SIToFP:
5155     if (!selectCast(I, ISD::SINT_TO_FP))
5156       return selectIntToFP(I, /*Signed=*/true);
5157     return true;
5158   case Instruction::UIToFP:
5159     return selectIntToFP(I, /*Signed=*/false);
5160   case Instruction::Load:
5161     return selectLoad(I);
5162   case Instruction::Store:
5163     return selectStore(I);
5164   case Instruction::FCmp:
5165   case Instruction::ICmp:
5166     return selectCmp(I);
5167   case Instruction::Select:
5168     return selectSelect(I);
5169   case Instruction::Ret:
5170     return selectRet(I);
5171   case Instruction::FRem:
5172     return selectFRem(I);
5173   case Instruction::GetElementPtr:
5174     return selectGetElementPtr(I);
5175   case Instruction::AtomicCmpXchg:
5176     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5177   }
5178 
5179   // fall-back to target-independent instruction selection.
5180   return selectOperator(I, I->getOpcode());
5181 }
5182 
5183 namespace llvm {
5184 
5185 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5186                                         const TargetLibraryInfo *LibInfo) {
5187   return new AArch64FastISel(FuncInfo, LibInfo);
5188 }
5189 
5190 } // end namespace llvm
5191