xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp (revision c7a063741720ef81d4caa4613242579d12f1d605)
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Analysis/BranchProbabilityInfo.h"
27 #include "llvm/CodeGen/CallingConvLower.h"
28 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineConstantPool.h"
33 #include "llvm/CodeGen/MachineFrameInfo.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/RuntimeLibcalls.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/IR/Argument.h"
41 #include "llvm/IR/Attributes.h"
42 #include "llvm/IR/BasicBlock.h"
43 #include "llvm/IR/CallingConv.h"
44 #include "llvm/IR/Constant.h"
45 #include "llvm/IR/Constants.h"
46 #include "llvm/IR/DataLayout.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/Function.h"
49 #include "llvm/IR/GetElementPtrTypeIterator.h"
50 #include "llvm/IR/GlobalValue.h"
51 #include "llvm/IR/InstrTypes.h"
52 #include "llvm/IR/Instruction.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/Operator.h"
57 #include "llvm/IR/Type.h"
58 #include "llvm/IR/User.h"
59 #include "llvm/IR/Value.h"
60 #include "llvm/MC/MCInstrDesc.h"
61 #include "llvm/MC/MCRegisterInfo.h"
62 #include "llvm/MC/MCSymbol.h"
63 #include "llvm/Support/AtomicOrdering.h"
64 #include "llvm/Support/Casting.h"
65 #include "llvm/Support/CodeGen.h"
66 #include "llvm/Support/Compiler.h"
67 #include "llvm/Support/ErrorHandling.h"
68 #include "llvm/Support/MachineValueType.h"
69 #include "llvm/Support/MathExtras.h"
70 #include <algorithm>
71 #include <cassert>
72 #include <cstdint>
73 #include <iterator>
74 #include <utility>
75 
76 using namespace llvm;
77 
78 namespace {
79 
80 class AArch64FastISel final : public FastISel {
81   class Address {
82   public:
83     using BaseKind = enum {
84       RegBase,
85       FrameIndexBase
86     };
87 
88   private:
89     BaseKind Kind = RegBase;
90     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
91     union {
92       unsigned Reg;
93       int FI;
94     } Base;
95     unsigned OffsetReg = 0;
96     unsigned Shift = 0;
97     int64_t Offset = 0;
98     const GlobalValue *GV = nullptr;
99 
100   public:
101     Address() { Base.Reg = 0; }
102 
103     void setKind(BaseKind K) { Kind = K; }
104     BaseKind getKind() const { return Kind; }
105     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
106     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
107     bool isRegBase() const { return Kind == RegBase; }
108     bool isFIBase() const { return Kind == FrameIndexBase; }
109 
110     void setReg(unsigned Reg) {
111       assert(isRegBase() && "Invalid base register access!");
112       Base.Reg = Reg;
113     }
114 
115     unsigned getReg() const {
116       assert(isRegBase() && "Invalid base register access!");
117       return Base.Reg;
118     }
119 
120     void setOffsetReg(unsigned Reg) {
121       OffsetReg = Reg;
122     }
123 
124     unsigned getOffsetReg() const {
125       return OffsetReg;
126     }
127 
128     void setFI(unsigned FI) {
129       assert(isFIBase() && "Invalid base frame index  access!");
130       Base.FI = FI;
131     }
132 
133     unsigned getFI() const {
134       assert(isFIBase() && "Invalid base frame index access!");
135       return Base.FI;
136     }
137 
138     void setOffset(int64_t O) { Offset = O; }
139     int64_t getOffset() { return Offset; }
140     void setShift(unsigned S) { Shift = S; }
141     unsigned getShift() { return Shift; }
142 
143     void setGlobalValue(const GlobalValue *G) { GV = G; }
144     const GlobalValue *getGlobalValue() { return GV; }
145   };
146 
147   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
148   /// make the right decision when generating code for different targets.
149   const AArch64Subtarget *Subtarget;
150   LLVMContext *Context;
151 
152   bool fastLowerArguments() override;
153   bool fastLowerCall(CallLoweringInfo &CLI) override;
154   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
155 
156 private:
157   // Selection routines.
158   bool selectAddSub(const Instruction *I);
159   bool selectLogicalOp(const Instruction *I);
160   bool selectLoad(const Instruction *I);
161   bool selectStore(const Instruction *I);
162   bool selectBranch(const Instruction *I);
163   bool selectIndirectBr(const Instruction *I);
164   bool selectCmp(const Instruction *I);
165   bool selectSelect(const Instruction *I);
166   bool selectFPExt(const Instruction *I);
167   bool selectFPTrunc(const Instruction *I);
168   bool selectFPToInt(const Instruction *I, bool Signed);
169   bool selectIntToFP(const Instruction *I, bool Signed);
170   bool selectRem(const Instruction *I, unsigned ISDOpcode);
171   bool selectRet(const Instruction *I);
172   bool selectTrunc(const Instruction *I);
173   bool selectIntExt(const Instruction *I);
174   bool selectMul(const Instruction *I);
175   bool selectShift(const Instruction *I);
176   bool selectBitCast(const Instruction *I);
177   bool selectFRem(const Instruction *I);
178   bool selectSDiv(const Instruction *I);
179   bool selectGetElementPtr(const Instruction *I);
180   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
181 
182   // Utility helper routines.
183   bool isTypeLegal(Type *Ty, MVT &VT);
184   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
185   bool isValueAvailable(const Value *V) const;
186   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
187   bool computeCallAddress(const Value *V, Address &Addr);
188   bool simplifyAddress(Address &Addr, MVT VT);
189   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
190                             MachineMemOperand::Flags Flags,
191                             unsigned ScaleFactor, MachineMemOperand *MMO);
192   bool isMemCpySmall(uint64_t Len, unsigned Alignment);
193   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
194                           unsigned Alignment);
195   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
196                          const Value *Cond);
197   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
198   bool optimizeSelect(const SelectInst *SI);
199   unsigned getRegForGEPIndex(const Value *Idx);
200 
201   // Emit helper routines.
202   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
203                       const Value *RHS, bool SetFlags = false,
204                       bool WantResult = true,  bool IsZExt = false);
205   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
206                          unsigned RHSReg, bool SetFlags = false,
207                          bool WantResult = true);
208   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
209                          uint64_t Imm, bool SetFlags = false,
210                          bool WantResult = true);
211   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
212                          unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
213                          uint64_t ShiftImm, bool SetFlags = false,
214                          bool WantResult = true);
215   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
216                          unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
217                          uint64_t ShiftImm, bool SetFlags = false,
218                          bool WantResult = true);
219 
220   // Emit functions.
221   bool emitCompareAndBranch(const BranchInst *BI);
222   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
223   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
224   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
225   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
226   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
227                     MachineMemOperand *MMO = nullptr);
228   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
229                  MachineMemOperand *MMO = nullptr);
230   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
231                         MachineMemOperand *MMO = nullptr);
232   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
233   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
234   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
235                    bool SetFlags = false, bool WantResult = true,
236                    bool IsZExt = false);
237   unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
238   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
239                    bool SetFlags = false, bool WantResult = true,
240                    bool IsZExt = false);
241   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
242                        bool WantResult = true);
243   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
245                        bool WantResult = true);
246   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
247                          const Value *RHS);
248   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
249                             uint64_t Imm);
250   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251                             unsigned RHSReg, uint64_t ShiftImm);
252   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
253   unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
254   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
255   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
257   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
258                       bool IsZExt = true);
259   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
260   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
261                       bool IsZExt = true);
262   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
263   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
264                       bool IsZExt = false);
265 
266   unsigned materializeInt(const ConstantInt *CI, MVT VT);
267   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
268   unsigned materializeGV(const GlobalValue *GV);
269 
270   // Call handling routines.
271 private:
272   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
273   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
274                        unsigned &NumBytes);
275   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
276 
277 public:
278   // Backend specific FastISel code.
279   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
280   unsigned fastMaterializeConstant(const Constant *C) override;
281   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
282 
283   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
284                            const TargetLibraryInfo *LibInfo)
285       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
286     Subtarget =
287         &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget());
288     Context = &FuncInfo.Fn->getContext();
289   }
290 
291   bool fastSelectInstruction(const Instruction *I) override;
292 
293 #include "AArch64GenFastISel.inc"
294 };
295 
296 } // end anonymous namespace
297 
298 /// Check if the sign-/zero-extend will be a noop.
299 static bool isIntExtFree(const Instruction *I) {
300   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
301          "Unexpected integer extend instruction.");
302   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
303          "Unexpected value type.");
304   bool IsZExt = isa<ZExtInst>(I);
305 
306   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
307     if (LI->hasOneUse())
308       return true;
309 
310   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
311     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
312       return true;
313 
314   return false;
315 }
316 
317 /// Determine the implicit scale factor that is applied by a memory
318 /// operation for a given value type.
319 static unsigned getImplicitScaleFactor(MVT VT) {
320   switch (VT.SimpleTy) {
321   default:
322     return 0;    // invalid
323   case MVT::i1:  // fall-through
324   case MVT::i8:
325     return 1;
326   case MVT::i16:
327     return 2;
328   case MVT::i32: // fall-through
329   case MVT::f32:
330     return 4;
331   case MVT::i64: // fall-through
332   case MVT::f64:
333     return 8;
334   }
335 }
336 
337 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
338   if (CC == CallingConv::WebKit_JS)
339     return CC_AArch64_WebKit_JS;
340   if (CC == CallingConv::GHC)
341     return CC_AArch64_GHC;
342   if (CC == CallingConv::CFGuard_Check)
343     return CC_AArch64_Win64_CFGuard_Check;
344   return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS;
345 }
346 
347 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
348   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
349          "Alloca should always return a pointer.");
350 
351   // Don't handle dynamic allocas.
352   if (!FuncInfo.StaticAllocaMap.count(AI))
353     return 0;
354 
355   DenseMap<const AllocaInst *, int>::iterator SI =
356       FuncInfo.StaticAllocaMap.find(AI);
357 
358   if (SI != FuncInfo.StaticAllocaMap.end()) {
359     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
360     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
361             ResultReg)
362         .addFrameIndex(SI->second)
363         .addImm(0)
364         .addImm(0);
365     return ResultReg;
366   }
367 
368   return 0;
369 }
370 
371 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
372   if (VT > MVT::i64)
373     return 0;
374 
375   if (!CI->isZero())
376     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
377 
378   // Create a copy from the zero register to materialize a "0" value.
379   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
380                                                    : &AArch64::GPR32RegClass;
381   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
382   Register ResultReg = createResultReg(RC);
383   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
384           ResultReg).addReg(ZeroReg, getKillRegState(true));
385   return ResultReg;
386 }
387 
388 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
389   // Positive zero (+0.0) has to be materialized with a fmov from the zero
390   // register, because the immediate version of fmov cannot encode zero.
391   if (CFP->isNullValue())
392     return fastMaterializeFloatZero(CFP);
393 
394   if (VT != MVT::f32 && VT != MVT::f64)
395     return 0;
396 
397   const APFloat Val = CFP->getValueAPF();
398   bool Is64Bit = (VT == MVT::f64);
399   // This checks to see if we can use FMOV instructions to materialize
400   // a constant, otherwise we have to materialize via the constant pool.
401   int Imm =
402       Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
403   if (Imm != -1) {
404     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
405     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
406   }
407 
408   // For the large code model materialize the FP constant in code.
409   if (TM.getCodeModel() == CodeModel::Large) {
410     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
411     const TargetRegisterClass *RC = Is64Bit ?
412         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
413 
414     Register TmpReg = createResultReg(RC);
415     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg)
416         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
417 
418     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
419     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
420             TII.get(TargetOpcode::COPY), ResultReg)
421         .addReg(TmpReg, getKillRegState(true));
422 
423     return ResultReg;
424   }
425 
426   // Materialize via constant pool.  MachineConstantPool wants an explicit
427   // alignment.
428   Align Alignment = DL.getPrefTypeAlign(CFP->getType());
429 
430   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
431   Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
432   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
433           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
434 
435   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
436   Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
437   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
438       .addReg(ADRPReg)
439       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
440   return ResultReg;
441 }
442 
443 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
444   // We can't handle thread-local variables quickly yet.
445   if (GV->isThreadLocal())
446     return 0;
447 
448   // MachO still uses GOT for large code-model accesses, but ELF requires
449   // movz/movk sequences, which FastISel doesn't handle yet.
450   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
451     return 0;
452 
453   unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
454 
455   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
456   if (!DestEVT.isSimple())
457     return 0;
458 
459   Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
460   unsigned ResultReg;
461 
462   if (OpFlags & AArch64II::MO_GOT) {
463     // ADRP + LDRX
464     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
465             ADRPReg)
466         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
467 
468     unsigned LdrOpc;
469     if (Subtarget->isTargetILP32()) {
470       ResultReg = createResultReg(&AArch64::GPR32RegClass);
471       LdrOpc = AArch64::LDRWui;
472     } else {
473       ResultReg = createResultReg(&AArch64::GPR64RegClass);
474       LdrOpc = AArch64::LDRXui;
475     }
476     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc),
477             ResultReg)
478       .addReg(ADRPReg)
479       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
480                         AArch64II::MO_NC | OpFlags);
481     if (!Subtarget->isTargetILP32())
482       return ResultReg;
483 
484     // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
485     // so we must extend the result on ILP32.
486     Register Result64 = createResultReg(&AArch64::GPR64RegClass);
487     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
488             TII.get(TargetOpcode::SUBREG_TO_REG))
489         .addDef(Result64)
490         .addImm(0)
491         .addReg(ResultReg, RegState::Kill)
492         .addImm(AArch64::sub_32);
493     return Result64;
494   } else {
495     // ADRP + ADDX
496     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
497             ADRPReg)
498         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
499 
500     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
501     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
502             ResultReg)
503         .addReg(ADRPReg)
504         .addGlobalAddress(GV, 0,
505                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
506         .addImm(0);
507   }
508   return ResultReg;
509 }
510 
511 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
512   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
513 
514   // Only handle simple types.
515   if (!CEVT.isSimple())
516     return 0;
517   MVT VT = CEVT.getSimpleVT();
518   // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
519   // 'null' pointers need to have a somewhat special treatment.
520   if (isa<ConstantPointerNull>(C)) {
521     assert(VT == MVT::i64 && "Expected 64-bit pointers");
522     return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
523   }
524 
525   if (const auto *CI = dyn_cast<ConstantInt>(C))
526     return materializeInt(CI, VT);
527   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
528     return materializeFP(CFP, VT);
529   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
530     return materializeGV(GV);
531 
532   return 0;
533 }
534 
535 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
536   assert(CFP->isNullValue() &&
537          "Floating-point constant is not a positive zero.");
538   MVT VT;
539   if (!isTypeLegal(CFP->getType(), VT))
540     return 0;
541 
542   if (VT != MVT::f32 && VT != MVT::f64)
543     return 0;
544 
545   bool Is64Bit = (VT == MVT::f64);
546   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
547   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
548   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
549 }
550 
551 /// Check if the multiply is by a power-of-2 constant.
552 static bool isMulPowOf2(const Value *I) {
553   if (const auto *MI = dyn_cast<MulOperator>(I)) {
554     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
555       if (C->getValue().isPowerOf2())
556         return true;
557     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
558       if (C->getValue().isPowerOf2())
559         return true;
560   }
561   return false;
562 }
563 
564 // Computes the address to get to an object.
565 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
566 {
567   const User *U = nullptr;
568   unsigned Opcode = Instruction::UserOp1;
569   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
570     // Don't walk into other basic blocks unless the object is an alloca from
571     // another block, otherwise it may not have a virtual register assigned.
572     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
573         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
574       Opcode = I->getOpcode();
575       U = I;
576     }
577   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
578     Opcode = C->getOpcode();
579     U = C;
580   }
581 
582   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
583     if (Ty->getAddressSpace() > 255)
584       // Fast instruction selection doesn't support the special
585       // address spaces.
586       return false;
587 
588   switch (Opcode) {
589   default:
590     break;
591   case Instruction::BitCast:
592     // Look through bitcasts.
593     return computeAddress(U->getOperand(0), Addr, Ty);
594 
595   case Instruction::IntToPtr:
596     // Look past no-op inttoptrs.
597     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
598         TLI.getPointerTy(DL))
599       return computeAddress(U->getOperand(0), Addr, Ty);
600     break;
601 
602   case Instruction::PtrToInt:
603     // Look past no-op ptrtoints.
604     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
605       return computeAddress(U->getOperand(0), Addr, Ty);
606     break;
607 
608   case Instruction::GetElementPtr: {
609     Address SavedAddr = Addr;
610     uint64_t TmpOffset = Addr.getOffset();
611 
612     // Iterate through the GEP folding the constants into offsets where
613     // we can.
614     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
615          GTI != E; ++GTI) {
616       const Value *Op = GTI.getOperand();
617       if (StructType *STy = GTI.getStructTypeOrNull()) {
618         const StructLayout *SL = DL.getStructLayout(STy);
619         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
620         TmpOffset += SL->getElementOffset(Idx);
621       } else {
622         uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
623         while (true) {
624           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
625             // Constant-offset addressing.
626             TmpOffset += CI->getSExtValue() * S;
627             break;
628           }
629           if (canFoldAddIntoGEP(U, Op)) {
630             // A compatible add with a constant operand. Fold the constant.
631             ConstantInt *CI =
632                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
633             TmpOffset += CI->getSExtValue() * S;
634             // Iterate on the other operand.
635             Op = cast<AddOperator>(Op)->getOperand(0);
636             continue;
637           }
638           // Unsupported
639           goto unsupported_gep;
640         }
641       }
642     }
643 
644     // Try to grab the base operand now.
645     Addr.setOffset(TmpOffset);
646     if (computeAddress(U->getOperand(0), Addr, Ty))
647       return true;
648 
649     // We failed, restore everything and try the other options.
650     Addr = SavedAddr;
651 
652   unsupported_gep:
653     break;
654   }
655   case Instruction::Alloca: {
656     const AllocaInst *AI = cast<AllocaInst>(Obj);
657     DenseMap<const AllocaInst *, int>::iterator SI =
658         FuncInfo.StaticAllocaMap.find(AI);
659     if (SI != FuncInfo.StaticAllocaMap.end()) {
660       Addr.setKind(Address::FrameIndexBase);
661       Addr.setFI(SI->second);
662       return true;
663     }
664     break;
665   }
666   case Instruction::Add: {
667     // Adds of constants are common and easy enough.
668     const Value *LHS = U->getOperand(0);
669     const Value *RHS = U->getOperand(1);
670 
671     if (isa<ConstantInt>(LHS))
672       std::swap(LHS, RHS);
673 
674     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
675       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
676       return computeAddress(LHS, Addr, Ty);
677     }
678 
679     Address Backup = Addr;
680     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
681       return true;
682     Addr = Backup;
683 
684     break;
685   }
686   case Instruction::Sub: {
687     // Subs of constants are common and easy enough.
688     const Value *LHS = U->getOperand(0);
689     const Value *RHS = U->getOperand(1);
690 
691     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
692       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
693       return computeAddress(LHS, Addr, Ty);
694     }
695     break;
696   }
697   case Instruction::Shl: {
698     if (Addr.getOffsetReg())
699       break;
700 
701     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
702     if (!CI)
703       break;
704 
705     unsigned Val = CI->getZExtValue();
706     if (Val < 1 || Val > 3)
707       break;
708 
709     uint64_t NumBytes = 0;
710     if (Ty && Ty->isSized()) {
711       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
712       NumBytes = NumBits / 8;
713       if (!isPowerOf2_64(NumBits))
714         NumBytes = 0;
715     }
716 
717     if (NumBytes != (1ULL << Val))
718       break;
719 
720     Addr.setShift(Val);
721     Addr.setExtendType(AArch64_AM::LSL);
722 
723     const Value *Src = U->getOperand(0);
724     if (const auto *I = dyn_cast<Instruction>(Src)) {
725       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
726         // Fold the zext or sext when it won't become a noop.
727         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
728           if (!isIntExtFree(ZE) &&
729               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
730             Addr.setExtendType(AArch64_AM::UXTW);
731             Src = ZE->getOperand(0);
732           }
733         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
734           if (!isIntExtFree(SE) &&
735               SE->getOperand(0)->getType()->isIntegerTy(32)) {
736             Addr.setExtendType(AArch64_AM::SXTW);
737             Src = SE->getOperand(0);
738           }
739         }
740       }
741     }
742 
743     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
744       if (AI->getOpcode() == Instruction::And) {
745         const Value *LHS = AI->getOperand(0);
746         const Value *RHS = AI->getOperand(1);
747 
748         if (const auto *C = dyn_cast<ConstantInt>(LHS))
749           if (C->getValue() == 0xffffffff)
750             std::swap(LHS, RHS);
751 
752         if (const auto *C = dyn_cast<ConstantInt>(RHS))
753           if (C->getValue() == 0xffffffff) {
754             Addr.setExtendType(AArch64_AM::UXTW);
755             Register Reg = getRegForValue(LHS);
756             if (!Reg)
757               return false;
758             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
759             Addr.setOffsetReg(Reg);
760             return true;
761           }
762       }
763 
764     Register Reg = getRegForValue(Src);
765     if (!Reg)
766       return false;
767     Addr.setOffsetReg(Reg);
768     return true;
769   }
770   case Instruction::Mul: {
771     if (Addr.getOffsetReg())
772       break;
773 
774     if (!isMulPowOf2(U))
775       break;
776 
777     const Value *LHS = U->getOperand(0);
778     const Value *RHS = U->getOperand(1);
779 
780     // Canonicalize power-of-2 value to the RHS.
781     if (const auto *C = dyn_cast<ConstantInt>(LHS))
782       if (C->getValue().isPowerOf2())
783         std::swap(LHS, RHS);
784 
785     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
786     const auto *C = cast<ConstantInt>(RHS);
787     unsigned Val = C->getValue().logBase2();
788     if (Val < 1 || Val > 3)
789       break;
790 
791     uint64_t NumBytes = 0;
792     if (Ty && Ty->isSized()) {
793       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
794       NumBytes = NumBits / 8;
795       if (!isPowerOf2_64(NumBits))
796         NumBytes = 0;
797     }
798 
799     if (NumBytes != (1ULL << Val))
800       break;
801 
802     Addr.setShift(Val);
803     Addr.setExtendType(AArch64_AM::LSL);
804 
805     const Value *Src = LHS;
806     if (const auto *I = dyn_cast<Instruction>(Src)) {
807       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
808         // Fold the zext or sext when it won't become a noop.
809         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
810           if (!isIntExtFree(ZE) &&
811               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
812             Addr.setExtendType(AArch64_AM::UXTW);
813             Src = ZE->getOperand(0);
814           }
815         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
816           if (!isIntExtFree(SE) &&
817               SE->getOperand(0)->getType()->isIntegerTy(32)) {
818             Addr.setExtendType(AArch64_AM::SXTW);
819             Src = SE->getOperand(0);
820           }
821         }
822       }
823     }
824 
825     Register Reg = getRegForValue(Src);
826     if (!Reg)
827       return false;
828     Addr.setOffsetReg(Reg);
829     return true;
830   }
831   case Instruction::And: {
832     if (Addr.getOffsetReg())
833       break;
834 
835     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
836       break;
837 
838     const Value *LHS = U->getOperand(0);
839     const Value *RHS = U->getOperand(1);
840 
841     if (const auto *C = dyn_cast<ConstantInt>(LHS))
842       if (C->getValue() == 0xffffffff)
843         std::swap(LHS, RHS);
844 
845     if (const auto *C = dyn_cast<ConstantInt>(RHS))
846       if (C->getValue() == 0xffffffff) {
847         Addr.setShift(0);
848         Addr.setExtendType(AArch64_AM::LSL);
849         Addr.setExtendType(AArch64_AM::UXTW);
850 
851         Register Reg = getRegForValue(LHS);
852         if (!Reg)
853           return false;
854         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
855         Addr.setOffsetReg(Reg);
856         return true;
857       }
858     break;
859   }
860   case Instruction::SExt:
861   case Instruction::ZExt: {
862     if (!Addr.getReg() || Addr.getOffsetReg())
863       break;
864 
865     const Value *Src = nullptr;
866     // Fold the zext or sext when it won't become a noop.
867     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
868       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
869         Addr.setExtendType(AArch64_AM::UXTW);
870         Src = ZE->getOperand(0);
871       }
872     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
873       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
874         Addr.setExtendType(AArch64_AM::SXTW);
875         Src = SE->getOperand(0);
876       }
877     }
878 
879     if (!Src)
880       break;
881 
882     Addr.setShift(0);
883     Register Reg = getRegForValue(Src);
884     if (!Reg)
885       return false;
886     Addr.setOffsetReg(Reg);
887     return true;
888   }
889   } // end switch
890 
891   if (Addr.isRegBase() && !Addr.getReg()) {
892     Register Reg = getRegForValue(Obj);
893     if (!Reg)
894       return false;
895     Addr.setReg(Reg);
896     return true;
897   }
898 
899   if (!Addr.getOffsetReg()) {
900     Register Reg = getRegForValue(Obj);
901     if (!Reg)
902       return false;
903     Addr.setOffsetReg(Reg);
904     return true;
905   }
906 
907   return false;
908 }
909 
910 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
911   const User *U = nullptr;
912   unsigned Opcode = Instruction::UserOp1;
913   bool InMBB = true;
914 
915   if (const auto *I = dyn_cast<Instruction>(V)) {
916     Opcode = I->getOpcode();
917     U = I;
918     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
919   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
920     Opcode = C->getOpcode();
921     U = C;
922   }
923 
924   switch (Opcode) {
925   default: break;
926   case Instruction::BitCast:
927     // Look past bitcasts if its operand is in the same BB.
928     if (InMBB)
929       return computeCallAddress(U->getOperand(0), Addr);
930     break;
931   case Instruction::IntToPtr:
932     // Look past no-op inttoptrs if its operand is in the same BB.
933     if (InMBB &&
934         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
935             TLI.getPointerTy(DL))
936       return computeCallAddress(U->getOperand(0), Addr);
937     break;
938   case Instruction::PtrToInt:
939     // Look past no-op ptrtoints if its operand is in the same BB.
940     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
941       return computeCallAddress(U->getOperand(0), Addr);
942     break;
943   }
944 
945   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
946     Addr.setGlobalValue(GV);
947     return true;
948   }
949 
950   // If all else fails, try to materialize the value in a register.
951   if (!Addr.getGlobalValue()) {
952     Addr.setReg(getRegForValue(V));
953     return Addr.getReg() != 0;
954   }
955 
956   return false;
957 }
958 
959 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
960   EVT evt = TLI.getValueType(DL, Ty, true);
961 
962   if (Subtarget->isTargetILP32() && Ty->isPointerTy())
963     return false;
964 
965   // Only handle simple types.
966   if (evt == MVT::Other || !evt.isSimple())
967     return false;
968   VT = evt.getSimpleVT();
969 
970   // This is a legal type, but it's not something we handle in fast-isel.
971   if (VT == MVT::f128)
972     return false;
973 
974   // Handle all other legal types, i.e. a register that will directly hold this
975   // value.
976   return TLI.isTypeLegal(VT);
977 }
978 
979 /// Determine if the value type is supported by FastISel.
980 ///
981 /// FastISel for AArch64 can handle more value types than are legal. This adds
982 /// simple value type such as i1, i8, and i16.
983 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
984   if (Ty->isVectorTy() && !IsVectorAllowed)
985     return false;
986 
987   if (isTypeLegal(Ty, VT))
988     return true;
989 
990   // If this is a type than can be sign or zero-extended to a basic operation
991   // go ahead and accept it now.
992   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
993     return true;
994 
995   return false;
996 }
997 
998 bool AArch64FastISel::isValueAvailable(const Value *V) const {
999   if (!isa<Instruction>(V))
1000     return true;
1001 
1002   const auto *I = cast<Instruction>(V);
1003   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1004 }
1005 
1006 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1007   if (Subtarget->isTargetILP32())
1008     return false;
1009 
1010   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1011   if (!ScaleFactor)
1012     return false;
1013 
1014   bool ImmediateOffsetNeedsLowering = false;
1015   bool RegisterOffsetNeedsLowering = false;
1016   int64_t Offset = Addr.getOffset();
1017   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1018     ImmediateOffsetNeedsLowering = true;
1019   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1020            !isUInt<12>(Offset / ScaleFactor))
1021     ImmediateOffsetNeedsLowering = true;
1022 
1023   // Cannot encode an offset register and an immediate offset in the same
1024   // instruction. Fold the immediate offset into the load/store instruction and
1025   // emit an additional add to take care of the offset register.
1026   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1027     RegisterOffsetNeedsLowering = true;
1028 
1029   // Cannot encode zero register as base.
1030   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1031     RegisterOffsetNeedsLowering = true;
1032 
1033   // If this is a stack pointer and the offset needs to be simplified then put
1034   // the alloca address into a register, set the base type back to register and
1035   // continue. This should almost never happen.
1036   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1037   {
1038     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1039     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri),
1040             ResultReg)
1041       .addFrameIndex(Addr.getFI())
1042       .addImm(0)
1043       .addImm(0);
1044     Addr.setKind(Address::RegBase);
1045     Addr.setReg(ResultReg);
1046   }
1047 
1048   if (RegisterOffsetNeedsLowering) {
1049     unsigned ResultReg = 0;
1050     if (Addr.getReg()) {
1051       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1052           Addr.getExtendType() == AArch64_AM::UXTW   )
1053         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1054                                   Addr.getOffsetReg(), Addr.getExtendType(),
1055                                   Addr.getShift());
1056       else
1057         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1058                                   Addr.getOffsetReg(), AArch64_AM::LSL,
1059                                   Addr.getShift());
1060     } else {
1061       if (Addr.getExtendType() == AArch64_AM::UXTW)
1062         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1063                                Addr.getShift(), /*IsZExt=*/true);
1064       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1065         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1066                                Addr.getShift(), /*IsZExt=*/false);
1067       else
1068         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1069                                Addr.getShift());
1070     }
1071     if (!ResultReg)
1072       return false;
1073 
1074     Addr.setReg(ResultReg);
1075     Addr.setOffsetReg(0);
1076     Addr.setShift(0);
1077     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1078   }
1079 
1080   // Since the offset is too large for the load/store instruction get the
1081   // reg+offset into a register.
1082   if (ImmediateOffsetNeedsLowering) {
1083     unsigned ResultReg;
1084     if (Addr.getReg())
1085       // Try to fold the immediate into the add instruction.
1086       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1087     else
1088       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1089 
1090     if (!ResultReg)
1091       return false;
1092     Addr.setReg(ResultReg);
1093     Addr.setOffset(0);
1094   }
1095   return true;
1096 }
1097 
1098 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1099                                            const MachineInstrBuilder &MIB,
1100                                            MachineMemOperand::Flags Flags,
1101                                            unsigned ScaleFactor,
1102                                            MachineMemOperand *MMO) {
1103   int64_t Offset = Addr.getOffset() / ScaleFactor;
1104   // Frame base works a bit differently. Handle it separately.
1105   if (Addr.isFIBase()) {
1106     int FI = Addr.getFI();
1107     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1108     // and alignment should be based on the VT.
1109     MMO = FuncInfo.MF->getMachineMemOperand(
1110         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1111         MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1112     // Now add the rest of the operands.
1113     MIB.addFrameIndex(FI).addImm(Offset);
1114   } else {
1115     assert(Addr.isRegBase() && "Unexpected address kind.");
1116     const MCInstrDesc &II = MIB->getDesc();
1117     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1118     Addr.setReg(
1119       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1120     Addr.setOffsetReg(
1121       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1122     if (Addr.getOffsetReg()) {
1123       assert(Addr.getOffset() == 0 && "Unexpected offset");
1124       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1125                       Addr.getExtendType() == AArch64_AM::SXTX;
1126       MIB.addReg(Addr.getReg());
1127       MIB.addReg(Addr.getOffsetReg());
1128       MIB.addImm(IsSigned);
1129       MIB.addImm(Addr.getShift() != 0);
1130     } else
1131       MIB.addReg(Addr.getReg()).addImm(Offset);
1132   }
1133 
1134   if (MMO)
1135     MIB.addMemOperand(MMO);
1136 }
1137 
1138 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1139                                      const Value *RHS, bool SetFlags,
1140                                      bool WantResult,  bool IsZExt) {
1141   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1142   bool NeedExtend = false;
1143   switch (RetVT.SimpleTy) {
1144   default:
1145     return 0;
1146   case MVT::i1:
1147     NeedExtend = true;
1148     break;
1149   case MVT::i8:
1150     NeedExtend = true;
1151     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1152     break;
1153   case MVT::i16:
1154     NeedExtend = true;
1155     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1156     break;
1157   case MVT::i32:  // fall-through
1158   case MVT::i64:
1159     break;
1160   }
1161   MVT SrcVT = RetVT;
1162   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1163 
1164   // Canonicalize immediates to the RHS first.
1165   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1166     std::swap(LHS, RHS);
1167 
1168   // Canonicalize mul by power of 2 to the RHS.
1169   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1170     if (isMulPowOf2(LHS))
1171       std::swap(LHS, RHS);
1172 
1173   // Canonicalize shift immediate to the RHS.
1174   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1175     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1176       if (isa<ConstantInt>(SI->getOperand(1)))
1177         if (SI->getOpcode() == Instruction::Shl  ||
1178             SI->getOpcode() == Instruction::LShr ||
1179             SI->getOpcode() == Instruction::AShr   )
1180           std::swap(LHS, RHS);
1181 
1182   Register LHSReg = getRegForValue(LHS);
1183   if (!LHSReg)
1184     return 0;
1185 
1186   if (NeedExtend)
1187     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1188 
1189   unsigned ResultReg = 0;
1190   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1191     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1192     if (C->isNegative())
1193       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1194                                 WantResult);
1195     else
1196       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1197                                 WantResult);
1198   } else if (const auto *C = dyn_cast<Constant>(RHS))
1199     if (C->isNullValue())
1200       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1201 
1202   if (ResultReg)
1203     return ResultReg;
1204 
1205   // Only extend the RHS within the instruction if there is a valid extend type.
1206   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1207       isValueAvailable(RHS)) {
1208     if (const auto *SI = dyn_cast<BinaryOperator>(RHS))
1209       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1)))
1210         if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) {
1211           Register RHSReg = getRegForValue(SI->getOperand(0));
1212           if (!RHSReg)
1213             return 0;
1214           return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType,
1215                                C->getZExtValue(), SetFlags, WantResult);
1216         }
1217     Register RHSReg = getRegForValue(RHS);
1218     if (!RHSReg)
1219       return 0;
1220     return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1221                          SetFlags, WantResult);
1222   }
1223 
1224   // Check if the mul can be folded into the instruction.
1225   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1226     if (isMulPowOf2(RHS)) {
1227       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1228       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1229 
1230       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1231         if (C->getValue().isPowerOf2())
1232           std::swap(MulLHS, MulRHS);
1233 
1234       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1235       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1236       Register RHSReg = getRegForValue(MulLHS);
1237       if (!RHSReg)
1238         return 0;
1239       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1240                                 ShiftVal, SetFlags, WantResult);
1241       if (ResultReg)
1242         return ResultReg;
1243     }
1244   }
1245 
1246   // Check if the shift can be folded into the instruction.
1247   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1248     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1249       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1250         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1251         switch (SI->getOpcode()) {
1252         default: break;
1253         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1254         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1255         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1256         }
1257         uint64_t ShiftVal = C->getZExtValue();
1258         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1259           Register RHSReg = getRegForValue(SI->getOperand(0));
1260           if (!RHSReg)
1261             return 0;
1262           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1263                                     ShiftVal, SetFlags, WantResult);
1264           if (ResultReg)
1265             return ResultReg;
1266         }
1267       }
1268     }
1269   }
1270 
1271   Register RHSReg = getRegForValue(RHS);
1272   if (!RHSReg)
1273     return 0;
1274 
1275   if (NeedExtend)
1276     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1277 
1278   return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1279 }
1280 
1281 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1282                                         unsigned RHSReg, bool SetFlags,
1283                                         bool WantResult) {
1284   assert(LHSReg && RHSReg && "Invalid register number.");
1285 
1286   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1287       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1288     return 0;
1289 
1290   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1291     return 0;
1292 
1293   static const unsigned OpcTable[2][2][2] = {
1294     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1295       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1296     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1297       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1298   };
1299   bool Is64Bit = RetVT == MVT::i64;
1300   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1301   const TargetRegisterClass *RC =
1302       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1303   unsigned ResultReg;
1304   if (WantResult)
1305     ResultReg = createResultReg(RC);
1306   else
1307     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1308 
1309   const MCInstrDesc &II = TII.get(Opc);
1310   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1311   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1312   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1313       .addReg(LHSReg)
1314       .addReg(RHSReg);
1315   return ResultReg;
1316 }
1317 
1318 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1319                                         uint64_t Imm, bool SetFlags,
1320                                         bool WantResult) {
1321   assert(LHSReg && "Invalid register number.");
1322 
1323   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1324     return 0;
1325 
1326   unsigned ShiftImm;
1327   if (isUInt<12>(Imm))
1328     ShiftImm = 0;
1329   else if ((Imm & 0xfff000) == Imm) {
1330     ShiftImm = 12;
1331     Imm >>= 12;
1332   } else
1333     return 0;
1334 
1335   static const unsigned OpcTable[2][2][2] = {
1336     { { AArch64::SUBWri,  AArch64::SUBXri  },
1337       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1338     { { AArch64::SUBSWri, AArch64::SUBSXri },
1339       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1340   };
1341   bool Is64Bit = RetVT == MVT::i64;
1342   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1343   const TargetRegisterClass *RC;
1344   if (SetFlags)
1345     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1346   else
1347     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1348   unsigned ResultReg;
1349   if (WantResult)
1350     ResultReg = createResultReg(RC);
1351   else
1352     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1353 
1354   const MCInstrDesc &II = TII.get(Opc);
1355   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1356   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1357       .addReg(LHSReg)
1358       .addImm(Imm)
1359       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1360   return ResultReg;
1361 }
1362 
1363 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1364                                         unsigned RHSReg,
1365                                         AArch64_AM::ShiftExtendType ShiftType,
1366                                         uint64_t ShiftImm, bool SetFlags,
1367                                         bool WantResult) {
1368   assert(LHSReg && RHSReg && "Invalid register number.");
1369   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1370          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1371 
1372   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1373     return 0;
1374 
1375   // Don't deal with undefined shifts.
1376   if (ShiftImm >= RetVT.getSizeInBits())
1377     return 0;
1378 
1379   static const unsigned OpcTable[2][2][2] = {
1380     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1381       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1382     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1383       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1384   };
1385   bool Is64Bit = RetVT == MVT::i64;
1386   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1387   const TargetRegisterClass *RC =
1388       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1389   unsigned ResultReg;
1390   if (WantResult)
1391     ResultReg = createResultReg(RC);
1392   else
1393     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1394 
1395   const MCInstrDesc &II = TII.get(Opc);
1396   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1397   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1398   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1399       .addReg(LHSReg)
1400       .addReg(RHSReg)
1401       .addImm(getShifterImm(ShiftType, ShiftImm));
1402   return ResultReg;
1403 }
1404 
1405 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1406                                         unsigned RHSReg,
1407                                         AArch64_AM::ShiftExtendType ExtType,
1408                                         uint64_t ShiftImm, bool SetFlags,
1409                                         bool WantResult) {
1410   assert(LHSReg && RHSReg && "Invalid register number.");
1411   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1412          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1413 
1414   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1415     return 0;
1416 
1417   if (ShiftImm >= 4)
1418     return 0;
1419 
1420   static const unsigned OpcTable[2][2][2] = {
1421     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1422       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1423     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1424       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1425   };
1426   bool Is64Bit = RetVT == MVT::i64;
1427   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1428   const TargetRegisterClass *RC = nullptr;
1429   if (SetFlags)
1430     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1431   else
1432     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1433   unsigned ResultReg;
1434   if (WantResult)
1435     ResultReg = createResultReg(RC);
1436   else
1437     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1438 
1439   const MCInstrDesc &II = TII.get(Opc);
1440   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1441   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1442   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
1443       .addReg(LHSReg)
1444       .addReg(RHSReg)
1445       .addImm(getArithExtendImm(ExtType, ShiftImm));
1446   return ResultReg;
1447 }
1448 
1449 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1450   Type *Ty = LHS->getType();
1451   EVT EVT = TLI.getValueType(DL, Ty, true);
1452   if (!EVT.isSimple())
1453     return false;
1454   MVT VT = EVT.getSimpleVT();
1455 
1456   switch (VT.SimpleTy) {
1457   default:
1458     return false;
1459   case MVT::i1:
1460   case MVT::i8:
1461   case MVT::i16:
1462   case MVT::i32:
1463   case MVT::i64:
1464     return emitICmp(VT, LHS, RHS, IsZExt);
1465   case MVT::f32:
1466   case MVT::f64:
1467     return emitFCmp(VT, LHS, RHS);
1468   }
1469 }
1470 
1471 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1472                                bool IsZExt) {
1473   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1474                  IsZExt) != 0;
1475 }
1476 
1477 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1478   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1479                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1480 }
1481 
1482 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1483   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1484     return false;
1485 
1486   // Check to see if the 2nd operand is a constant that we can encode directly
1487   // in the compare.
1488   bool UseImm = false;
1489   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1490     if (CFP->isZero() && !CFP->isNegative())
1491       UseImm = true;
1492 
1493   Register LHSReg = getRegForValue(LHS);
1494   if (!LHSReg)
1495     return false;
1496 
1497   if (UseImm) {
1498     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1499     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1500         .addReg(LHSReg);
1501     return true;
1502   }
1503 
1504   Register RHSReg = getRegForValue(RHS);
1505   if (!RHSReg)
1506     return false;
1507 
1508   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1509   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
1510       .addReg(LHSReg)
1511       .addReg(RHSReg);
1512   return true;
1513 }
1514 
1515 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1516                                   bool SetFlags, bool WantResult, bool IsZExt) {
1517   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1518                     IsZExt);
1519 }
1520 
1521 /// This method is a wrapper to simplify add emission.
1522 ///
1523 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1524 /// that fails, then try to materialize the immediate into a register and use
1525 /// emitAddSub_rr instead.
1526 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1527   unsigned ResultReg;
1528   if (Imm < 0)
1529     ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1530   else
1531     ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1532 
1533   if (ResultReg)
1534     return ResultReg;
1535 
1536   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1537   if (!CReg)
1538     return 0;
1539 
1540   ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1541   return ResultReg;
1542 }
1543 
1544 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1545                                   bool SetFlags, bool WantResult, bool IsZExt) {
1546   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1547                     IsZExt);
1548 }
1549 
1550 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1551                                       unsigned RHSReg, bool WantResult) {
1552   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1553                        /*SetFlags=*/true, WantResult);
1554 }
1555 
1556 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1557                                       unsigned RHSReg,
1558                                       AArch64_AM::ShiftExtendType ShiftType,
1559                                       uint64_t ShiftImm, bool WantResult) {
1560   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1561                        ShiftImm, /*SetFlags=*/true, WantResult);
1562 }
1563 
1564 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1565                                         const Value *LHS, const Value *RHS) {
1566   // Canonicalize immediates to the RHS first.
1567   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1568     std::swap(LHS, RHS);
1569 
1570   // Canonicalize mul by power-of-2 to the RHS.
1571   if (LHS->hasOneUse() && isValueAvailable(LHS))
1572     if (isMulPowOf2(LHS))
1573       std::swap(LHS, RHS);
1574 
1575   // Canonicalize shift immediate to the RHS.
1576   if (LHS->hasOneUse() && isValueAvailable(LHS))
1577     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1578       if (isa<ConstantInt>(SI->getOperand(1)))
1579         std::swap(LHS, RHS);
1580 
1581   Register LHSReg = getRegForValue(LHS);
1582   if (!LHSReg)
1583     return 0;
1584 
1585   unsigned ResultReg = 0;
1586   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1587     uint64_t Imm = C->getZExtValue();
1588     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1589   }
1590   if (ResultReg)
1591     return ResultReg;
1592 
1593   // Check if the mul can be folded into the instruction.
1594   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1595     if (isMulPowOf2(RHS)) {
1596       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1597       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1598 
1599       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1600         if (C->getValue().isPowerOf2())
1601           std::swap(MulLHS, MulRHS);
1602 
1603       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1604       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1605 
1606       Register RHSReg = getRegForValue(MulLHS);
1607       if (!RHSReg)
1608         return 0;
1609       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1610       if (ResultReg)
1611         return ResultReg;
1612     }
1613   }
1614 
1615   // Check if the shift can be folded into the instruction.
1616   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1617     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1618       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1619         uint64_t ShiftVal = C->getZExtValue();
1620         Register RHSReg = getRegForValue(SI->getOperand(0));
1621         if (!RHSReg)
1622           return 0;
1623         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1624         if (ResultReg)
1625           return ResultReg;
1626       }
1627   }
1628 
1629   Register RHSReg = getRegForValue(RHS);
1630   if (!RHSReg)
1631     return 0;
1632 
1633   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1634   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1635   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1636     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1637     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1638   }
1639   return ResultReg;
1640 }
1641 
1642 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1643                                            unsigned LHSReg, uint64_t Imm) {
1644   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1645                 "ISD nodes are not consecutive!");
1646   static const unsigned OpcTable[3][2] = {
1647     { AArch64::ANDWri, AArch64::ANDXri },
1648     { AArch64::ORRWri, AArch64::ORRXri },
1649     { AArch64::EORWri, AArch64::EORXri }
1650   };
1651   const TargetRegisterClass *RC;
1652   unsigned Opc;
1653   unsigned RegSize;
1654   switch (RetVT.SimpleTy) {
1655   default:
1656     return 0;
1657   case MVT::i1:
1658   case MVT::i8:
1659   case MVT::i16:
1660   case MVT::i32: {
1661     unsigned Idx = ISDOpc - ISD::AND;
1662     Opc = OpcTable[Idx][0];
1663     RC = &AArch64::GPR32spRegClass;
1664     RegSize = 32;
1665     break;
1666   }
1667   case MVT::i64:
1668     Opc = OpcTable[ISDOpc - ISD::AND][1];
1669     RC = &AArch64::GPR64spRegClass;
1670     RegSize = 64;
1671     break;
1672   }
1673 
1674   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1675     return 0;
1676 
1677   Register ResultReg =
1678       fastEmitInst_ri(Opc, RC, LHSReg,
1679                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1680   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1681     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1682     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1683   }
1684   return ResultReg;
1685 }
1686 
1687 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1688                                            unsigned LHSReg, unsigned RHSReg,
1689                                            uint64_t ShiftImm) {
1690   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1691                 "ISD nodes are not consecutive!");
1692   static const unsigned OpcTable[3][2] = {
1693     { AArch64::ANDWrs, AArch64::ANDXrs },
1694     { AArch64::ORRWrs, AArch64::ORRXrs },
1695     { AArch64::EORWrs, AArch64::EORXrs }
1696   };
1697 
1698   // Don't deal with undefined shifts.
1699   if (ShiftImm >= RetVT.getSizeInBits())
1700     return 0;
1701 
1702   const TargetRegisterClass *RC;
1703   unsigned Opc;
1704   switch (RetVT.SimpleTy) {
1705   default:
1706     return 0;
1707   case MVT::i1:
1708   case MVT::i8:
1709   case MVT::i16:
1710   case MVT::i32:
1711     Opc = OpcTable[ISDOpc - ISD::AND][0];
1712     RC = &AArch64::GPR32RegClass;
1713     break;
1714   case MVT::i64:
1715     Opc = OpcTable[ISDOpc - ISD::AND][1];
1716     RC = &AArch64::GPR64RegClass;
1717     break;
1718   }
1719   Register ResultReg =
1720       fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1721                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1722   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1723     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1724     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1725   }
1726   return ResultReg;
1727 }
1728 
1729 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1730                                      uint64_t Imm) {
1731   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1732 }
1733 
1734 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1735                                    bool WantZExt, MachineMemOperand *MMO) {
1736   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1737     return 0;
1738 
1739   // Simplify this down to something we can handle.
1740   if (!simplifyAddress(Addr, VT))
1741     return 0;
1742 
1743   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1744   if (!ScaleFactor)
1745     llvm_unreachable("Unexpected value type.");
1746 
1747   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1748   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1749   bool UseScaled = true;
1750   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1751     UseScaled = false;
1752     ScaleFactor = 1;
1753   }
1754 
1755   static const unsigned GPOpcTable[2][8][4] = {
1756     // Sign-extend.
1757     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1758         AArch64::LDURXi  },
1759       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1760         AArch64::LDURXi  },
1761       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1762         AArch64::LDRXui  },
1763       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1764         AArch64::LDRXui  },
1765       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1766         AArch64::LDRXroX },
1767       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1768         AArch64::LDRXroX },
1769       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1770         AArch64::LDRXroW },
1771       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1772         AArch64::LDRXroW }
1773     },
1774     // Zero-extend.
1775     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1776         AArch64::LDURXi  },
1777       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1778         AArch64::LDURXi  },
1779       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1780         AArch64::LDRXui  },
1781       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1782         AArch64::LDRXui  },
1783       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1784         AArch64::LDRXroX },
1785       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1786         AArch64::LDRXroX },
1787       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1788         AArch64::LDRXroW },
1789       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1790         AArch64::LDRXroW }
1791     }
1792   };
1793 
1794   static const unsigned FPOpcTable[4][2] = {
1795     { AArch64::LDURSi,  AArch64::LDURDi  },
1796     { AArch64::LDRSui,  AArch64::LDRDui  },
1797     { AArch64::LDRSroX, AArch64::LDRDroX },
1798     { AArch64::LDRSroW, AArch64::LDRDroW }
1799   };
1800 
1801   unsigned Opc;
1802   const TargetRegisterClass *RC;
1803   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1804                       Addr.getOffsetReg();
1805   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1806   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1807       Addr.getExtendType() == AArch64_AM::SXTW)
1808     Idx++;
1809 
1810   bool IsRet64Bit = RetVT == MVT::i64;
1811   switch (VT.SimpleTy) {
1812   default:
1813     llvm_unreachable("Unexpected value type.");
1814   case MVT::i1: // Intentional fall-through.
1815   case MVT::i8:
1816     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1817     RC = (IsRet64Bit && !WantZExt) ?
1818              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1819     break;
1820   case MVT::i16:
1821     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1822     RC = (IsRet64Bit && !WantZExt) ?
1823              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1824     break;
1825   case MVT::i32:
1826     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1827     RC = (IsRet64Bit && !WantZExt) ?
1828              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1829     break;
1830   case MVT::i64:
1831     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1832     RC = &AArch64::GPR64RegClass;
1833     break;
1834   case MVT::f32:
1835     Opc = FPOpcTable[Idx][0];
1836     RC = &AArch64::FPR32RegClass;
1837     break;
1838   case MVT::f64:
1839     Opc = FPOpcTable[Idx][1];
1840     RC = &AArch64::FPR64RegClass;
1841     break;
1842   }
1843 
1844   // Create the base instruction, then add the operands.
1845   Register ResultReg = createResultReg(RC);
1846   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1847                                     TII.get(Opc), ResultReg);
1848   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1849 
1850   // Loading an i1 requires special handling.
1851   if (VT == MVT::i1) {
1852     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1853     assert(ANDReg && "Unexpected AND instruction emission failure.");
1854     ResultReg = ANDReg;
1855   }
1856 
1857   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1858   // the 32bit reg to a 64bit reg.
1859   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1860     Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1861     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
1862             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1863         .addImm(0)
1864         .addReg(ResultReg, getKillRegState(true))
1865         .addImm(AArch64::sub_32);
1866     ResultReg = Reg64;
1867   }
1868   return ResultReg;
1869 }
1870 
1871 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1872   MVT VT;
1873   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1874     return false;
1875 
1876   if (VT.isVector())
1877     return selectOperator(I, I->getOpcode());
1878 
1879   unsigned ResultReg;
1880   switch (I->getOpcode()) {
1881   default:
1882     llvm_unreachable("Unexpected instruction.");
1883   case Instruction::Add:
1884     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1885     break;
1886   case Instruction::Sub:
1887     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1888     break;
1889   }
1890   if (!ResultReg)
1891     return false;
1892 
1893   updateValueMap(I, ResultReg);
1894   return true;
1895 }
1896 
1897 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1898   MVT VT;
1899   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1900     return false;
1901 
1902   if (VT.isVector())
1903     return selectOperator(I, I->getOpcode());
1904 
1905   unsigned ResultReg;
1906   switch (I->getOpcode()) {
1907   default:
1908     llvm_unreachable("Unexpected instruction.");
1909   case Instruction::And:
1910     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1911     break;
1912   case Instruction::Or:
1913     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1914     break;
1915   case Instruction::Xor:
1916     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1917     break;
1918   }
1919   if (!ResultReg)
1920     return false;
1921 
1922   updateValueMap(I, ResultReg);
1923   return true;
1924 }
1925 
1926 bool AArch64FastISel::selectLoad(const Instruction *I) {
1927   MVT VT;
1928   // Verify we have a legal type before going any further.  Currently, we handle
1929   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1930   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1931   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1932       cast<LoadInst>(I)->isAtomic())
1933     return false;
1934 
1935   const Value *SV = I->getOperand(0);
1936   if (TLI.supportSwiftError()) {
1937     // Swifterror values can come from either a function parameter with
1938     // swifterror attribute or an alloca with swifterror attribute.
1939     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1940       if (Arg->hasSwiftErrorAttr())
1941         return false;
1942     }
1943 
1944     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1945       if (Alloca->isSwiftError())
1946         return false;
1947     }
1948   }
1949 
1950   // See if we can handle this address.
1951   Address Addr;
1952   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1953     return false;
1954 
1955   // Fold the following sign-/zero-extend into the load instruction.
1956   bool WantZExt = true;
1957   MVT RetVT = VT;
1958   const Value *IntExtVal = nullptr;
1959   if (I->hasOneUse()) {
1960     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1961       if (isTypeSupported(ZE->getType(), RetVT))
1962         IntExtVal = ZE;
1963       else
1964         RetVT = VT;
1965     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1966       if (isTypeSupported(SE->getType(), RetVT))
1967         IntExtVal = SE;
1968       else
1969         RetVT = VT;
1970       WantZExt = false;
1971     }
1972   }
1973 
1974   unsigned ResultReg =
1975       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1976   if (!ResultReg)
1977     return false;
1978 
1979   // There are a few different cases we have to handle, because the load or the
1980   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1981   // SelectionDAG. There is also an ordering issue when both instructions are in
1982   // different basic blocks.
1983   // 1.) The load instruction is selected by FastISel, but the integer extend
1984   //     not. This usually happens when the integer extend is in a different
1985   //     basic block and SelectionDAG took over for that basic block.
1986   // 2.) The load instruction is selected before the integer extend. This only
1987   //     happens when the integer extend is in a different basic block.
1988   // 3.) The load instruction is selected by SelectionDAG and the integer extend
1989   //     by FastISel. This happens if there are instructions between the load
1990   //     and the integer extend that couldn't be selected by FastISel.
1991   if (IntExtVal) {
1992     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
1993     // could select it. Emit a copy to subreg if necessary. FastISel will remove
1994     // it when it selects the integer extend.
1995     Register Reg = lookUpRegForValue(IntExtVal);
1996     auto *MI = MRI.getUniqueVRegDef(Reg);
1997     if (!MI) {
1998       if (RetVT == MVT::i64 && VT <= MVT::i32) {
1999         if (WantZExt) {
2000           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2001           MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2002           ResultReg = std::prev(I)->getOperand(0).getReg();
2003           removeDeadCode(I, std::next(I));
2004         } else
2005           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2006                                                  AArch64::sub_32);
2007       }
2008       updateValueMap(I, ResultReg);
2009       return true;
2010     }
2011 
2012     // The integer extend has already been emitted - delete all the instructions
2013     // that have been emitted by the integer extend lowering code and use the
2014     // result from the load instruction directly.
2015     while (MI) {
2016       Reg = 0;
2017       for (auto &Opnd : MI->uses()) {
2018         if (Opnd.isReg()) {
2019           Reg = Opnd.getReg();
2020           break;
2021         }
2022       }
2023       MachineBasicBlock::iterator I(MI);
2024       removeDeadCode(I, std::next(I));
2025       MI = nullptr;
2026       if (Reg)
2027         MI = MRI.getUniqueVRegDef(Reg);
2028     }
2029     updateValueMap(IntExtVal, ResultReg);
2030     return true;
2031   }
2032 
2033   updateValueMap(I, ResultReg);
2034   return true;
2035 }
2036 
2037 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2038                                        unsigned AddrReg,
2039                                        MachineMemOperand *MMO) {
2040   unsigned Opc;
2041   switch (VT.SimpleTy) {
2042   default: return false;
2043   case MVT::i8:  Opc = AArch64::STLRB; break;
2044   case MVT::i16: Opc = AArch64::STLRH; break;
2045   case MVT::i32: Opc = AArch64::STLRW; break;
2046   case MVT::i64: Opc = AArch64::STLRX; break;
2047   }
2048 
2049   const MCInstrDesc &II = TII.get(Opc);
2050   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2051   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2052   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2053       .addReg(SrcReg)
2054       .addReg(AddrReg)
2055       .addMemOperand(MMO);
2056   return true;
2057 }
2058 
2059 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2060                                 MachineMemOperand *MMO) {
2061   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2062     return false;
2063 
2064   // Simplify this down to something we can handle.
2065   if (!simplifyAddress(Addr, VT))
2066     return false;
2067 
2068   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2069   if (!ScaleFactor)
2070     llvm_unreachable("Unexpected value type.");
2071 
2072   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2073   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2074   bool UseScaled = true;
2075   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2076     UseScaled = false;
2077     ScaleFactor = 1;
2078   }
2079 
2080   static const unsigned OpcTable[4][6] = {
2081     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2082       AArch64::STURSi,   AArch64::STURDi },
2083     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2084       AArch64::STRSui,   AArch64::STRDui },
2085     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2086       AArch64::STRSroX,  AArch64::STRDroX },
2087     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2088       AArch64::STRSroW,  AArch64::STRDroW }
2089   };
2090 
2091   unsigned Opc;
2092   bool VTIsi1 = false;
2093   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2094                       Addr.getOffsetReg();
2095   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2096   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2097       Addr.getExtendType() == AArch64_AM::SXTW)
2098     Idx++;
2099 
2100   switch (VT.SimpleTy) {
2101   default: llvm_unreachable("Unexpected value type.");
2102   case MVT::i1:  VTIsi1 = true; LLVM_FALLTHROUGH;
2103   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2104   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2105   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2106   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2107   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2108   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2109   }
2110 
2111   // Storing an i1 requires special handling.
2112   if (VTIsi1 && SrcReg != AArch64::WZR) {
2113     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2114     assert(ANDReg && "Unexpected AND instruction emission failure.");
2115     SrcReg = ANDReg;
2116   }
2117   // Create the base instruction, then add the operands.
2118   const MCInstrDesc &II = TII.get(Opc);
2119   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2120   MachineInstrBuilder MIB =
2121       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg);
2122   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2123 
2124   return true;
2125 }
2126 
2127 bool AArch64FastISel::selectStore(const Instruction *I) {
2128   MVT VT;
2129   const Value *Op0 = I->getOperand(0);
2130   // Verify we have a legal type before going any further.  Currently, we handle
2131   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2132   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2133   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2134     return false;
2135 
2136   const Value *PtrV = I->getOperand(1);
2137   if (TLI.supportSwiftError()) {
2138     // Swifterror values can come from either a function parameter with
2139     // swifterror attribute or an alloca with swifterror attribute.
2140     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2141       if (Arg->hasSwiftErrorAttr())
2142         return false;
2143     }
2144 
2145     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2146       if (Alloca->isSwiftError())
2147         return false;
2148     }
2149   }
2150 
2151   // Get the value to be stored into a register. Use the zero register directly
2152   // when possible to avoid an unnecessary copy and a wasted register.
2153   unsigned SrcReg = 0;
2154   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2155     if (CI->isZero())
2156       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2157   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2158     if (CF->isZero() && !CF->isNegative()) {
2159       VT = MVT::getIntegerVT(VT.getSizeInBits());
2160       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2161     }
2162   }
2163 
2164   if (!SrcReg)
2165     SrcReg = getRegForValue(Op0);
2166 
2167   if (!SrcReg)
2168     return false;
2169 
2170   auto *SI = cast<StoreInst>(I);
2171 
2172   // Try to emit a STLR for seq_cst/release.
2173   if (SI->isAtomic()) {
2174     AtomicOrdering Ord = SI->getOrdering();
2175     // The non-atomic instructions are sufficient for relaxed stores.
2176     if (isReleaseOrStronger(Ord)) {
2177       // The STLR addressing mode only supports a base reg; pass that directly.
2178       Register AddrReg = getRegForValue(PtrV);
2179       return emitStoreRelease(VT, SrcReg, AddrReg,
2180                               createMachineMemOperandFor(I));
2181     }
2182   }
2183 
2184   // See if we can handle this address.
2185   Address Addr;
2186   if (!computeAddress(PtrV, Addr, Op0->getType()))
2187     return false;
2188 
2189   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2190     return false;
2191   return true;
2192 }
2193 
2194 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2195   switch (Pred) {
2196   case CmpInst::FCMP_ONE:
2197   case CmpInst::FCMP_UEQ:
2198   default:
2199     // AL is our "false" for now. The other two need more compares.
2200     return AArch64CC::AL;
2201   case CmpInst::ICMP_EQ:
2202   case CmpInst::FCMP_OEQ:
2203     return AArch64CC::EQ;
2204   case CmpInst::ICMP_SGT:
2205   case CmpInst::FCMP_OGT:
2206     return AArch64CC::GT;
2207   case CmpInst::ICMP_SGE:
2208   case CmpInst::FCMP_OGE:
2209     return AArch64CC::GE;
2210   case CmpInst::ICMP_UGT:
2211   case CmpInst::FCMP_UGT:
2212     return AArch64CC::HI;
2213   case CmpInst::FCMP_OLT:
2214     return AArch64CC::MI;
2215   case CmpInst::ICMP_ULE:
2216   case CmpInst::FCMP_OLE:
2217     return AArch64CC::LS;
2218   case CmpInst::FCMP_ORD:
2219     return AArch64CC::VC;
2220   case CmpInst::FCMP_UNO:
2221     return AArch64CC::VS;
2222   case CmpInst::FCMP_UGE:
2223     return AArch64CC::PL;
2224   case CmpInst::ICMP_SLT:
2225   case CmpInst::FCMP_ULT:
2226     return AArch64CC::LT;
2227   case CmpInst::ICMP_SLE:
2228   case CmpInst::FCMP_ULE:
2229     return AArch64CC::LE;
2230   case CmpInst::FCMP_UNE:
2231   case CmpInst::ICMP_NE:
2232     return AArch64CC::NE;
2233   case CmpInst::ICMP_UGE:
2234     return AArch64CC::HS;
2235   case CmpInst::ICMP_ULT:
2236     return AArch64CC::LO;
2237   }
2238 }
2239 
2240 /// Try to emit a combined compare-and-branch instruction.
2241 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2242   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2243   // will not be produced, as they are conditional branch instructions that do
2244   // not set flags.
2245   if (FuncInfo.MF->getFunction().hasFnAttribute(
2246           Attribute::SpeculativeLoadHardening))
2247     return false;
2248 
2249   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2250   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2251   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2252 
2253   const Value *LHS = CI->getOperand(0);
2254   const Value *RHS = CI->getOperand(1);
2255 
2256   MVT VT;
2257   if (!isTypeSupported(LHS->getType(), VT))
2258     return false;
2259 
2260   unsigned BW = VT.getSizeInBits();
2261   if (BW > 64)
2262     return false;
2263 
2264   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2265   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2266 
2267   // Try to take advantage of fallthrough opportunities.
2268   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2269     std::swap(TBB, FBB);
2270     Predicate = CmpInst::getInversePredicate(Predicate);
2271   }
2272 
2273   int TestBit = -1;
2274   bool IsCmpNE;
2275   switch (Predicate) {
2276   default:
2277     return false;
2278   case CmpInst::ICMP_EQ:
2279   case CmpInst::ICMP_NE:
2280     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2281       std::swap(LHS, RHS);
2282 
2283     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2284       return false;
2285 
2286     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2287       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2288         const Value *AndLHS = AI->getOperand(0);
2289         const Value *AndRHS = AI->getOperand(1);
2290 
2291         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2292           if (C->getValue().isPowerOf2())
2293             std::swap(AndLHS, AndRHS);
2294 
2295         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2296           if (C->getValue().isPowerOf2()) {
2297             TestBit = C->getValue().logBase2();
2298             LHS = AndLHS;
2299           }
2300       }
2301 
2302     if (VT == MVT::i1)
2303       TestBit = 0;
2304 
2305     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2306     break;
2307   case CmpInst::ICMP_SLT:
2308   case CmpInst::ICMP_SGE:
2309     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2310       return false;
2311 
2312     TestBit = BW - 1;
2313     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2314     break;
2315   case CmpInst::ICMP_SGT:
2316   case CmpInst::ICMP_SLE:
2317     if (!isa<ConstantInt>(RHS))
2318       return false;
2319 
2320     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2321       return false;
2322 
2323     TestBit = BW - 1;
2324     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2325     break;
2326   } // end switch
2327 
2328   static const unsigned OpcTable[2][2][2] = {
2329     { {AArch64::CBZW,  AArch64::CBZX },
2330       {AArch64::CBNZW, AArch64::CBNZX} },
2331     { {AArch64::TBZW,  AArch64::TBZX },
2332       {AArch64::TBNZW, AArch64::TBNZX} }
2333   };
2334 
2335   bool IsBitTest = TestBit != -1;
2336   bool Is64Bit = BW == 64;
2337   if (TestBit < 32 && TestBit >= 0)
2338     Is64Bit = false;
2339 
2340   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2341   const MCInstrDesc &II = TII.get(Opc);
2342 
2343   Register SrcReg = getRegForValue(LHS);
2344   if (!SrcReg)
2345     return false;
2346 
2347   if (BW == 64 && !Is64Bit)
2348     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2349 
2350   if ((BW < 32) && !IsBitTest)
2351     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2352 
2353   // Emit the combined compare and branch instruction.
2354   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2355   MachineInstrBuilder MIB =
2356       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
2357           .addReg(SrcReg);
2358   if (IsBitTest)
2359     MIB.addImm(TestBit);
2360   MIB.addMBB(TBB);
2361 
2362   finishCondBranch(BI->getParent(), TBB, FBB);
2363   return true;
2364 }
2365 
2366 bool AArch64FastISel::selectBranch(const Instruction *I) {
2367   const BranchInst *BI = cast<BranchInst>(I);
2368   if (BI->isUnconditional()) {
2369     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2370     fastEmitBranch(MSucc, BI->getDebugLoc());
2371     return true;
2372   }
2373 
2374   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2375   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2376 
2377   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2378     if (CI->hasOneUse() && isValueAvailable(CI)) {
2379       // Try to optimize or fold the cmp.
2380       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2381       switch (Predicate) {
2382       default:
2383         break;
2384       case CmpInst::FCMP_FALSE:
2385         fastEmitBranch(FBB, DbgLoc);
2386         return true;
2387       case CmpInst::FCMP_TRUE:
2388         fastEmitBranch(TBB, DbgLoc);
2389         return true;
2390       }
2391 
2392       // Try to emit a combined compare-and-branch first.
2393       if (emitCompareAndBranch(BI))
2394         return true;
2395 
2396       // Try to take advantage of fallthrough opportunities.
2397       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2398         std::swap(TBB, FBB);
2399         Predicate = CmpInst::getInversePredicate(Predicate);
2400       }
2401 
2402       // Emit the cmp.
2403       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2404         return false;
2405 
2406       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2407       // instruction.
2408       AArch64CC::CondCode CC = getCompareCC(Predicate);
2409       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2410       switch (Predicate) {
2411       default:
2412         break;
2413       case CmpInst::FCMP_UEQ:
2414         ExtraCC = AArch64CC::EQ;
2415         CC = AArch64CC::VS;
2416         break;
2417       case CmpInst::FCMP_ONE:
2418         ExtraCC = AArch64CC::MI;
2419         CC = AArch64CC::GT;
2420         break;
2421       }
2422       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2423 
2424       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2425       if (ExtraCC != AArch64CC::AL) {
2426         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2427             .addImm(ExtraCC)
2428             .addMBB(TBB);
2429       }
2430 
2431       // Emit the branch.
2432       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2433           .addImm(CC)
2434           .addMBB(TBB);
2435 
2436       finishCondBranch(BI->getParent(), TBB, FBB);
2437       return true;
2438     }
2439   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2440     uint64_t Imm = CI->getZExtValue();
2441     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2442     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B))
2443         .addMBB(Target);
2444 
2445     // Obtain the branch probability and add the target to the successor list.
2446     if (FuncInfo.BPI) {
2447       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2448           BI->getParent(), Target->getBasicBlock());
2449       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2450     } else
2451       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2452     return true;
2453   } else {
2454     AArch64CC::CondCode CC = AArch64CC::NE;
2455     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2456       // Fake request the condition, otherwise the intrinsic might be completely
2457       // optimized away.
2458       Register CondReg = getRegForValue(BI->getCondition());
2459       if (!CondReg)
2460         return false;
2461 
2462       // Emit the branch.
2463       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
2464         .addImm(CC)
2465         .addMBB(TBB);
2466 
2467       finishCondBranch(BI->getParent(), TBB, FBB);
2468       return true;
2469     }
2470   }
2471 
2472   Register CondReg = getRegForValue(BI->getCondition());
2473   if (CondReg == 0)
2474     return false;
2475 
2476   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2477   unsigned Opcode = AArch64::TBNZW;
2478   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2479     std::swap(TBB, FBB);
2480     Opcode = AArch64::TBZW;
2481   }
2482 
2483   const MCInstrDesc &II = TII.get(Opcode);
2484   Register ConstrainedCondReg
2485     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2486   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
2487       .addReg(ConstrainedCondReg)
2488       .addImm(0)
2489       .addMBB(TBB);
2490 
2491   finishCondBranch(BI->getParent(), TBB, FBB);
2492   return true;
2493 }
2494 
2495 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2496   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2497   Register AddrReg = getRegForValue(BI->getOperand(0));
2498   if (AddrReg == 0)
2499     return false;
2500 
2501   // Emit the indirect branch.
2502   const MCInstrDesc &II = TII.get(AArch64::BR);
2503   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2504   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg);
2505 
2506   // Make sure the CFG is up-to-date.
2507   for (auto *Succ : BI->successors())
2508     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2509 
2510   return true;
2511 }
2512 
2513 bool AArch64FastISel::selectCmp(const Instruction *I) {
2514   const CmpInst *CI = cast<CmpInst>(I);
2515 
2516   // Vectors of i1 are weird: bail out.
2517   if (CI->getType()->isVectorTy())
2518     return false;
2519 
2520   // Try to optimize or fold the cmp.
2521   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2522   unsigned ResultReg = 0;
2523   switch (Predicate) {
2524   default:
2525     break;
2526   case CmpInst::FCMP_FALSE:
2527     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2528     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2529             TII.get(TargetOpcode::COPY), ResultReg)
2530         .addReg(AArch64::WZR, getKillRegState(true));
2531     break;
2532   case CmpInst::FCMP_TRUE:
2533     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2534     break;
2535   }
2536 
2537   if (ResultReg) {
2538     updateValueMap(I, ResultReg);
2539     return true;
2540   }
2541 
2542   // Emit the cmp.
2543   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2544     return false;
2545 
2546   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2547 
2548   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2549   // condition codes are inverted, because they are used by CSINC.
2550   static unsigned CondCodeTable[2][2] = {
2551     { AArch64CC::NE, AArch64CC::VC },
2552     { AArch64CC::PL, AArch64CC::LE }
2553   };
2554   unsigned *CondCodes = nullptr;
2555   switch (Predicate) {
2556   default:
2557     break;
2558   case CmpInst::FCMP_UEQ:
2559     CondCodes = &CondCodeTable[0][0];
2560     break;
2561   case CmpInst::FCMP_ONE:
2562     CondCodes = &CondCodeTable[1][0];
2563     break;
2564   }
2565 
2566   if (CondCodes) {
2567     Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2568     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2569             TmpReg1)
2570         .addReg(AArch64::WZR, getKillRegState(true))
2571         .addReg(AArch64::WZR, getKillRegState(true))
2572         .addImm(CondCodes[0]);
2573     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2574             ResultReg)
2575         .addReg(TmpReg1, getKillRegState(true))
2576         .addReg(AArch64::WZR, getKillRegState(true))
2577         .addImm(CondCodes[1]);
2578 
2579     updateValueMap(I, ResultReg);
2580     return true;
2581   }
2582 
2583   // Now set a register based on the comparison.
2584   AArch64CC::CondCode CC = getCompareCC(Predicate);
2585   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2586   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2587   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr),
2588           ResultReg)
2589       .addReg(AArch64::WZR, getKillRegState(true))
2590       .addReg(AArch64::WZR, getKillRegState(true))
2591       .addImm(invertedCC);
2592 
2593   updateValueMap(I, ResultReg);
2594   return true;
2595 }
2596 
2597 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2598 /// value.
2599 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2600   if (!SI->getType()->isIntegerTy(1))
2601     return false;
2602 
2603   const Value *Src1Val, *Src2Val;
2604   unsigned Opc = 0;
2605   bool NeedExtraOp = false;
2606   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2607     if (CI->isOne()) {
2608       Src1Val = SI->getCondition();
2609       Src2Val = SI->getFalseValue();
2610       Opc = AArch64::ORRWrr;
2611     } else {
2612       assert(CI->isZero());
2613       Src1Val = SI->getFalseValue();
2614       Src2Val = SI->getCondition();
2615       Opc = AArch64::BICWrr;
2616     }
2617   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2618     if (CI->isOne()) {
2619       Src1Val = SI->getCondition();
2620       Src2Val = SI->getTrueValue();
2621       Opc = AArch64::ORRWrr;
2622       NeedExtraOp = true;
2623     } else {
2624       assert(CI->isZero());
2625       Src1Val = SI->getCondition();
2626       Src2Val = SI->getTrueValue();
2627       Opc = AArch64::ANDWrr;
2628     }
2629   }
2630 
2631   if (!Opc)
2632     return false;
2633 
2634   Register Src1Reg = getRegForValue(Src1Val);
2635   if (!Src1Reg)
2636     return false;
2637 
2638   Register Src2Reg = getRegForValue(Src2Val);
2639   if (!Src2Reg)
2640     return false;
2641 
2642   if (NeedExtraOp)
2643     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2644 
2645   Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2646                                        Src2Reg);
2647   updateValueMap(SI, ResultReg);
2648   return true;
2649 }
2650 
2651 bool AArch64FastISel::selectSelect(const Instruction *I) {
2652   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2653   MVT VT;
2654   if (!isTypeSupported(I->getType(), VT))
2655     return false;
2656 
2657   unsigned Opc;
2658   const TargetRegisterClass *RC;
2659   switch (VT.SimpleTy) {
2660   default:
2661     return false;
2662   case MVT::i1:
2663   case MVT::i8:
2664   case MVT::i16:
2665   case MVT::i32:
2666     Opc = AArch64::CSELWr;
2667     RC = &AArch64::GPR32RegClass;
2668     break;
2669   case MVT::i64:
2670     Opc = AArch64::CSELXr;
2671     RC = &AArch64::GPR64RegClass;
2672     break;
2673   case MVT::f32:
2674     Opc = AArch64::FCSELSrrr;
2675     RC = &AArch64::FPR32RegClass;
2676     break;
2677   case MVT::f64:
2678     Opc = AArch64::FCSELDrrr;
2679     RC = &AArch64::FPR64RegClass;
2680     break;
2681   }
2682 
2683   const SelectInst *SI = cast<SelectInst>(I);
2684   const Value *Cond = SI->getCondition();
2685   AArch64CC::CondCode CC = AArch64CC::NE;
2686   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2687 
2688   if (optimizeSelect(SI))
2689     return true;
2690 
2691   // Try to pickup the flags, so we don't have to emit another compare.
2692   if (foldXALUIntrinsic(CC, I, Cond)) {
2693     // Fake request the condition to force emission of the XALU intrinsic.
2694     Register CondReg = getRegForValue(Cond);
2695     if (!CondReg)
2696       return false;
2697   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2698              isValueAvailable(Cond)) {
2699     const auto *Cmp = cast<CmpInst>(Cond);
2700     // Try to optimize or fold the cmp.
2701     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2702     const Value *FoldSelect = nullptr;
2703     switch (Predicate) {
2704     default:
2705       break;
2706     case CmpInst::FCMP_FALSE:
2707       FoldSelect = SI->getFalseValue();
2708       break;
2709     case CmpInst::FCMP_TRUE:
2710       FoldSelect = SI->getTrueValue();
2711       break;
2712     }
2713 
2714     if (FoldSelect) {
2715       Register SrcReg = getRegForValue(FoldSelect);
2716       if (!SrcReg)
2717         return false;
2718 
2719       updateValueMap(I, SrcReg);
2720       return true;
2721     }
2722 
2723     // Emit the cmp.
2724     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2725       return false;
2726 
2727     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2728     CC = getCompareCC(Predicate);
2729     switch (Predicate) {
2730     default:
2731       break;
2732     case CmpInst::FCMP_UEQ:
2733       ExtraCC = AArch64CC::EQ;
2734       CC = AArch64CC::VS;
2735       break;
2736     case CmpInst::FCMP_ONE:
2737       ExtraCC = AArch64CC::MI;
2738       CC = AArch64CC::GT;
2739       break;
2740     }
2741     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2742   } else {
2743     Register CondReg = getRegForValue(Cond);
2744     if (!CondReg)
2745       return false;
2746 
2747     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2748     CondReg = constrainOperandRegClass(II, CondReg, 1);
2749 
2750     // Emit a TST instruction (ANDS wzr, reg, #imm).
2751     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
2752             AArch64::WZR)
2753         .addReg(CondReg)
2754         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2755   }
2756 
2757   Register Src1Reg = getRegForValue(SI->getTrueValue());
2758   Register Src2Reg = getRegForValue(SI->getFalseValue());
2759 
2760   if (!Src1Reg || !Src2Reg)
2761     return false;
2762 
2763   if (ExtraCC != AArch64CC::AL)
2764     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2765 
2766   Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2767   updateValueMap(I, ResultReg);
2768   return true;
2769 }
2770 
2771 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2772   Value *V = I->getOperand(0);
2773   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2774     return false;
2775 
2776   Register Op = getRegForValue(V);
2777   if (Op == 0)
2778     return false;
2779 
2780   Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2781   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr),
2782           ResultReg).addReg(Op);
2783   updateValueMap(I, ResultReg);
2784   return true;
2785 }
2786 
2787 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2788   Value *V = I->getOperand(0);
2789   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2790     return false;
2791 
2792   Register Op = getRegForValue(V);
2793   if (Op == 0)
2794     return false;
2795 
2796   Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2797   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr),
2798           ResultReg).addReg(Op);
2799   updateValueMap(I, ResultReg);
2800   return true;
2801 }
2802 
2803 // FPToUI and FPToSI
2804 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2805   MVT DestVT;
2806   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2807     return false;
2808 
2809   Register SrcReg = getRegForValue(I->getOperand(0));
2810   if (SrcReg == 0)
2811     return false;
2812 
2813   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2814   if (SrcVT == MVT::f128 || SrcVT == MVT::f16)
2815     return false;
2816 
2817   unsigned Opc;
2818   if (SrcVT == MVT::f64) {
2819     if (Signed)
2820       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2821     else
2822       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2823   } else {
2824     if (Signed)
2825       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2826     else
2827       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2828   }
2829   Register ResultReg = createResultReg(
2830       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2831   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
2832       .addReg(SrcReg);
2833   updateValueMap(I, ResultReg);
2834   return true;
2835 }
2836 
2837 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2838   MVT DestVT;
2839   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2840     return false;
2841   // Let regular ISEL handle FP16
2842   if (DestVT == MVT::f16)
2843     return false;
2844 
2845   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2846          "Unexpected value type.");
2847 
2848   Register SrcReg = getRegForValue(I->getOperand(0));
2849   if (!SrcReg)
2850     return false;
2851 
2852   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2853 
2854   // Handle sign-extension.
2855   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2856     SrcReg =
2857         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2858     if (!SrcReg)
2859       return false;
2860   }
2861 
2862   unsigned Opc;
2863   if (SrcVT == MVT::i64) {
2864     if (Signed)
2865       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2866     else
2867       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2868   } else {
2869     if (Signed)
2870       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2871     else
2872       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2873   }
2874 
2875   Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2876   updateValueMap(I, ResultReg);
2877   return true;
2878 }
2879 
2880 bool AArch64FastISel::fastLowerArguments() {
2881   if (!FuncInfo.CanLowerReturn)
2882     return false;
2883 
2884   const Function *F = FuncInfo.Fn;
2885   if (F->isVarArg())
2886     return false;
2887 
2888   CallingConv::ID CC = F->getCallingConv();
2889   if (CC != CallingConv::C && CC != CallingConv::Swift)
2890     return false;
2891 
2892   if (Subtarget->hasCustomCallingConv())
2893     return false;
2894 
2895   // Only handle simple cases of up to 8 GPR and FPR each.
2896   unsigned GPRCnt = 0;
2897   unsigned FPRCnt = 0;
2898   for (auto const &Arg : F->args()) {
2899     if (Arg.hasAttribute(Attribute::ByVal) ||
2900         Arg.hasAttribute(Attribute::InReg) ||
2901         Arg.hasAttribute(Attribute::StructRet) ||
2902         Arg.hasAttribute(Attribute::SwiftSelf) ||
2903         Arg.hasAttribute(Attribute::SwiftAsync) ||
2904         Arg.hasAttribute(Attribute::SwiftError) ||
2905         Arg.hasAttribute(Attribute::Nest))
2906       return false;
2907 
2908     Type *ArgTy = Arg.getType();
2909     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2910       return false;
2911 
2912     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2913     if (!ArgVT.isSimple())
2914       return false;
2915 
2916     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2917     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2918       return false;
2919 
2920     if (VT.isVector() &&
2921         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2922       return false;
2923 
2924     if (VT >= MVT::i1 && VT <= MVT::i64)
2925       ++GPRCnt;
2926     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2927              VT.is128BitVector())
2928       ++FPRCnt;
2929     else
2930       return false;
2931 
2932     if (GPRCnt > 8 || FPRCnt > 8)
2933       return false;
2934   }
2935 
2936   static const MCPhysReg Registers[6][8] = {
2937     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2938       AArch64::W5, AArch64::W6, AArch64::W7 },
2939     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2940       AArch64::X5, AArch64::X6, AArch64::X7 },
2941     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2942       AArch64::H5, AArch64::H6, AArch64::H7 },
2943     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2944       AArch64::S5, AArch64::S6, AArch64::S7 },
2945     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2946       AArch64::D5, AArch64::D6, AArch64::D7 },
2947     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2948       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2949   };
2950 
2951   unsigned GPRIdx = 0;
2952   unsigned FPRIdx = 0;
2953   for (auto const &Arg : F->args()) {
2954     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2955     unsigned SrcReg;
2956     const TargetRegisterClass *RC;
2957     if (VT >= MVT::i1 && VT <= MVT::i32) {
2958       SrcReg = Registers[0][GPRIdx++];
2959       RC = &AArch64::GPR32RegClass;
2960       VT = MVT::i32;
2961     } else if (VT == MVT::i64) {
2962       SrcReg = Registers[1][GPRIdx++];
2963       RC = &AArch64::GPR64RegClass;
2964     } else if (VT == MVT::f16) {
2965       SrcReg = Registers[2][FPRIdx++];
2966       RC = &AArch64::FPR16RegClass;
2967     } else if (VT ==  MVT::f32) {
2968       SrcReg = Registers[3][FPRIdx++];
2969       RC = &AArch64::FPR32RegClass;
2970     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2971       SrcReg = Registers[4][FPRIdx++];
2972       RC = &AArch64::FPR64RegClass;
2973     } else if (VT.is128BitVector()) {
2974       SrcReg = Registers[5][FPRIdx++];
2975       RC = &AArch64::FPR128RegClass;
2976     } else
2977       llvm_unreachable("Unexpected value type.");
2978 
2979     Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
2980     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
2981     // Without this, EmitLiveInCopies may eliminate the livein if its only
2982     // use is a bitcast (which isn't turned into an instruction).
2983     Register ResultReg = createResultReg(RC);
2984     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
2985             TII.get(TargetOpcode::COPY), ResultReg)
2986         .addReg(DstReg, getKillRegState(true));
2987     updateValueMap(&Arg, ResultReg);
2988   }
2989   return true;
2990 }
2991 
2992 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
2993                                       SmallVectorImpl<MVT> &OutVTs,
2994                                       unsigned &NumBytes) {
2995   CallingConv::ID CC = CLI.CallConv;
2996   SmallVector<CCValAssign, 16> ArgLocs;
2997   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
2998   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
2999 
3000   // Get a count of how many bytes are to be pushed on the stack.
3001   NumBytes = CCInfo.getNextStackOffset();
3002 
3003   // Issue CALLSEQ_START
3004   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3005   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
3006     .addImm(NumBytes).addImm(0);
3007 
3008   // Process the args.
3009   for (CCValAssign &VA : ArgLocs) {
3010     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3011     MVT ArgVT = OutVTs[VA.getValNo()];
3012 
3013     Register ArgReg = getRegForValue(ArgVal);
3014     if (!ArgReg)
3015       return false;
3016 
3017     // Handle arg promotion: SExt, ZExt, AExt.
3018     switch (VA.getLocInfo()) {
3019     case CCValAssign::Full:
3020       break;
3021     case CCValAssign::SExt: {
3022       MVT DestVT = VA.getLocVT();
3023       MVT SrcVT = ArgVT;
3024       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3025       if (!ArgReg)
3026         return false;
3027       break;
3028     }
3029     case CCValAssign::AExt:
3030     // Intentional fall-through.
3031     case CCValAssign::ZExt: {
3032       MVT DestVT = VA.getLocVT();
3033       MVT SrcVT = ArgVT;
3034       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3035       if (!ArgReg)
3036         return false;
3037       break;
3038     }
3039     default:
3040       llvm_unreachable("Unknown arg promotion!");
3041     }
3042 
3043     // Now copy/store arg to correct locations.
3044     if (VA.isRegLoc() && !VA.needsCustom()) {
3045       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3046               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3047       CLI.OutRegs.push_back(VA.getLocReg());
3048     } else if (VA.needsCustom()) {
3049       // FIXME: Handle custom args.
3050       return false;
3051     } else {
3052       assert(VA.isMemLoc() && "Assuming store on stack.");
3053 
3054       // Don't emit stores for undef values.
3055       if (isa<UndefValue>(ArgVal))
3056         continue;
3057 
3058       // Need to store on the stack.
3059       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3060 
3061       unsigned BEAlign = 0;
3062       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3063         BEAlign = 8 - ArgSize;
3064 
3065       Address Addr;
3066       Addr.setKind(Address::RegBase);
3067       Addr.setReg(AArch64::SP);
3068       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3069 
3070       Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3071       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3072           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3073           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3074 
3075       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3076         return false;
3077     }
3078   }
3079   return true;
3080 }
3081 
3082 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
3083                                  unsigned NumBytes) {
3084   CallingConv::ID CC = CLI.CallConv;
3085 
3086   // Issue CALLSEQ_END
3087   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3088   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
3089     .addImm(NumBytes).addImm(0);
3090 
3091   // Now the return value.
3092   if (RetVT != MVT::isVoid) {
3093     SmallVector<CCValAssign, 16> RVLocs;
3094     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3095     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC));
3096 
3097     // Only handle a single return value.
3098     if (RVLocs.size() != 1)
3099       return false;
3100 
3101     // Copy all of the result registers out of their specified physreg.
3102     MVT CopyVT = RVLocs[0].getValVT();
3103 
3104     // TODO: Handle big-endian results
3105     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3106       return false;
3107 
3108     Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT));
3109     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3110             TII.get(TargetOpcode::COPY), ResultReg)
3111         .addReg(RVLocs[0].getLocReg());
3112     CLI.InRegs.push_back(RVLocs[0].getLocReg());
3113 
3114     CLI.ResultReg = ResultReg;
3115     CLI.NumResultRegs = 1;
3116   }
3117 
3118   return true;
3119 }
3120 
3121 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3122   CallingConv::ID CC  = CLI.CallConv;
3123   bool IsTailCall     = CLI.IsTailCall;
3124   bool IsVarArg       = CLI.IsVarArg;
3125   const Value *Callee = CLI.Callee;
3126   MCSymbol *Symbol = CLI.Symbol;
3127 
3128   if (!Callee && !Symbol)
3129     return false;
3130 
3131   // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3132   // a bti instruction following the call.
3133   if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3134       !Subtarget->noBTIAtReturnTwice() &&
3135       MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3136     return false;
3137 
3138   // Allow SelectionDAG isel to handle tail calls.
3139   if (IsTailCall)
3140     return false;
3141 
3142   // FIXME: we could and should support this, but for now correctness at -O0 is
3143   // more important.
3144   if (Subtarget->isTargetILP32())
3145     return false;
3146 
3147   CodeModel::Model CM = TM.getCodeModel();
3148   // Only support the small-addressing and large code models.
3149   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3150     return false;
3151 
3152   // FIXME: Add large code model support for ELF.
3153   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3154     return false;
3155 
3156   // Let SDISel handle vararg functions.
3157   if (IsVarArg)
3158     return false;
3159 
3160   // FIXME: Only handle *simple* calls for now.
3161   MVT RetVT;
3162   if (CLI.RetTy->isVoidTy())
3163     RetVT = MVT::isVoid;
3164   else if (!isTypeLegal(CLI.RetTy, RetVT))
3165     return false;
3166 
3167   for (auto Flag : CLI.OutFlags)
3168     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3169         Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3170       return false;
3171 
3172   // Set up the argument vectors.
3173   SmallVector<MVT, 16> OutVTs;
3174   OutVTs.reserve(CLI.OutVals.size());
3175 
3176   for (auto *Val : CLI.OutVals) {
3177     MVT VT;
3178     if (!isTypeLegal(Val->getType(), VT) &&
3179         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3180       return false;
3181 
3182     // We don't handle vector parameters yet.
3183     if (VT.isVector() || VT.getSizeInBits() > 64)
3184       return false;
3185 
3186     OutVTs.push_back(VT);
3187   }
3188 
3189   Address Addr;
3190   if (Callee && !computeCallAddress(Callee, Addr))
3191     return false;
3192 
3193   // The weak function target may be zero; in that case we must use indirect
3194   // addressing via a stub on windows as it may be out of range for a
3195   // PC-relative jump.
3196   if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3197       Addr.getGlobalValue()->hasExternalWeakLinkage())
3198     return false;
3199 
3200   // Handle the arguments now that we've gotten them.
3201   unsigned NumBytes;
3202   if (!processCallArgs(CLI, OutVTs, NumBytes))
3203     return false;
3204 
3205   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3206   if (RegInfo->isAnyArgRegReserved(*MF))
3207     RegInfo->emitReservedArgRegCallError(*MF);
3208 
3209   // Issue the call.
3210   MachineInstrBuilder MIB;
3211   if (Subtarget->useSmallAddressing()) {
3212     const MCInstrDesc &II =
3213         TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3214     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II);
3215     if (Symbol)
3216       MIB.addSym(Symbol, 0);
3217     else if (Addr.getGlobalValue())
3218       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3219     else if (Addr.getReg()) {
3220       Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3221       MIB.addReg(Reg);
3222     } else
3223       return false;
3224   } else {
3225     unsigned CallReg = 0;
3226     if (Symbol) {
3227       Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3228       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP),
3229               ADRPReg)
3230           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3231 
3232       CallReg = createResultReg(&AArch64::GPR64RegClass);
3233       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3234               TII.get(AArch64::LDRXui), CallReg)
3235           .addReg(ADRPReg)
3236           .addSym(Symbol,
3237                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3238     } else if (Addr.getGlobalValue())
3239       CallReg = materializeGV(Addr.getGlobalValue());
3240     else if (Addr.getReg())
3241       CallReg = Addr.getReg();
3242 
3243     if (!CallReg)
3244       return false;
3245 
3246     const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3247     CallReg = constrainOperandRegClass(II, CallReg, 0);
3248     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg);
3249   }
3250 
3251   // Add implicit physical register uses to the call.
3252   for (auto Reg : CLI.OutRegs)
3253     MIB.addReg(Reg, RegState::Implicit);
3254 
3255   // Add a register mask with the call-preserved registers.
3256   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3257   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3258 
3259   CLI.Call = MIB;
3260 
3261   // Finish off the call including any return values.
3262   return finishCall(CLI, RetVT, NumBytes);
3263 }
3264 
3265 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) {
3266   if (Alignment)
3267     return Len / Alignment <= 4;
3268   else
3269     return Len < 32;
3270 }
3271 
3272 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3273                                          uint64_t Len, unsigned Alignment) {
3274   // Make sure we don't bloat code by inlining very large memcpy's.
3275   if (!isMemCpySmall(Len, Alignment))
3276     return false;
3277 
3278   int64_t UnscaledOffset = 0;
3279   Address OrigDest = Dest;
3280   Address OrigSrc = Src;
3281 
3282   while (Len) {
3283     MVT VT;
3284     if (!Alignment || Alignment >= 8) {
3285       if (Len >= 8)
3286         VT = MVT::i64;
3287       else if (Len >= 4)
3288         VT = MVT::i32;
3289       else if (Len >= 2)
3290         VT = MVT::i16;
3291       else {
3292         VT = MVT::i8;
3293       }
3294     } else {
3295       // Bound based on alignment.
3296       if (Len >= 4 && Alignment == 4)
3297         VT = MVT::i32;
3298       else if (Len >= 2 && Alignment == 2)
3299         VT = MVT::i16;
3300       else {
3301         VT = MVT::i8;
3302       }
3303     }
3304 
3305     unsigned ResultReg = emitLoad(VT, VT, Src);
3306     if (!ResultReg)
3307       return false;
3308 
3309     if (!emitStore(VT, ResultReg, Dest))
3310       return false;
3311 
3312     int64_t Size = VT.getSizeInBits() / 8;
3313     Len -= Size;
3314     UnscaledOffset += Size;
3315 
3316     // We need to recompute the unscaled offset for each iteration.
3317     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3318     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3319   }
3320 
3321   return true;
3322 }
3323 
3324 /// Check if it is possible to fold the condition from the XALU intrinsic
3325 /// into the user. The condition code will only be updated on success.
3326 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3327                                         const Instruction *I,
3328                                         const Value *Cond) {
3329   if (!isa<ExtractValueInst>(Cond))
3330     return false;
3331 
3332   const auto *EV = cast<ExtractValueInst>(Cond);
3333   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3334     return false;
3335 
3336   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3337   MVT RetVT;
3338   const Function *Callee = II->getCalledFunction();
3339   Type *RetTy =
3340   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3341   if (!isTypeLegal(RetTy, RetVT))
3342     return false;
3343 
3344   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3345     return false;
3346 
3347   const Value *LHS = II->getArgOperand(0);
3348   const Value *RHS = II->getArgOperand(1);
3349 
3350   // Canonicalize immediate to the RHS.
3351   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3352     std::swap(LHS, RHS);
3353 
3354   // Simplify multiplies.
3355   Intrinsic::ID IID = II->getIntrinsicID();
3356   switch (IID) {
3357   default:
3358     break;
3359   case Intrinsic::smul_with_overflow:
3360     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3361       if (C->getValue() == 2)
3362         IID = Intrinsic::sadd_with_overflow;
3363     break;
3364   case Intrinsic::umul_with_overflow:
3365     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3366       if (C->getValue() == 2)
3367         IID = Intrinsic::uadd_with_overflow;
3368     break;
3369   }
3370 
3371   AArch64CC::CondCode TmpCC;
3372   switch (IID) {
3373   default:
3374     return false;
3375   case Intrinsic::sadd_with_overflow:
3376   case Intrinsic::ssub_with_overflow:
3377     TmpCC = AArch64CC::VS;
3378     break;
3379   case Intrinsic::uadd_with_overflow:
3380     TmpCC = AArch64CC::HS;
3381     break;
3382   case Intrinsic::usub_with_overflow:
3383     TmpCC = AArch64CC::LO;
3384     break;
3385   case Intrinsic::smul_with_overflow:
3386   case Intrinsic::umul_with_overflow:
3387     TmpCC = AArch64CC::NE;
3388     break;
3389   }
3390 
3391   // Check if both instructions are in the same basic block.
3392   if (!isValueAvailable(II))
3393     return false;
3394 
3395   // Make sure nothing is in the way
3396   BasicBlock::const_iterator Start(I);
3397   BasicBlock::const_iterator End(II);
3398   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3399     // We only expect extractvalue instructions between the intrinsic and the
3400     // instruction to be selected.
3401     if (!isa<ExtractValueInst>(Itr))
3402       return false;
3403 
3404     // Check that the extractvalue operand comes from the intrinsic.
3405     const auto *EVI = cast<ExtractValueInst>(Itr);
3406     if (EVI->getAggregateOperand() != II)
3407       return false;
3408   }
3409 
3410   CC = TmpCC;
3411   return true;
3412 }
3413 
3414 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3415   // FIXME: Handle more intrinsics.
3416   switch (II->getIntrinsicID()) {
3417   default: return false;
3418   case Intrinsic::frameaddress: {
3419     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3420     MFI.setFrameAddressIsTaken(true);
3421 
3422     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3423     Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3424     Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3425     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3426             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3427     // Recursively load frame address
3428     // ldr x0, [fp]
3429     // ldr x0, [x0]
3430     // ldr x0, [x0]
3431     // ...
3432     unsigned DestReg;
3433     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3434     while (Depth--) {
3435       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3436                                 SrcReg, 0);
3437       assert(DestReg && "Unexpected LDR instruction emission failure.");
3438       SrcReg = DestReg;
3439     }
3440 
3441     updateValueMap(II, SrcReg);
3442     return true;
3443   }
3444   case Intrinsic::sponentry: {
3445     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3446 
3447     // SP = FP + Fixed Object + 16
3448     int FI = MFI.CreateFixedObject(4, 0, false);
3449     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3450     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3451             TII.get(AArch64::ADDXri), ResultReg)
3452             .addFrameIndex(FI)
3453             .addImm(0)
3454             .addImm(0);
3455 
3456     updateValueMap(II, ResultReg);
3457     return true;
3458   }
3459   case Intrinsic::memcpy:
3460   case Intrinsic::memmove: {
3461     const auto *MTI = cast<MemTransferInst>(II);
3462     // Don't handle volatile.
3463     if (MTI->isVolatile())
3464       return false;
3465 
3466     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3467     // we would emit dead code because we don't currently handle memmoves.
3468     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3469     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3470       // Small memcpy's are common enough that we want to do them without a call
3471       // if possible.
3472       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3473       unsigned Alignment = MinAlign(MTI->getDestAlignment(),
3474                                     MTI->getSourceAlignment());
3475       if (isMemCpySmall(Len, Alignment)) {
3476         Address Dest, Src;
3477         if (!computeAddress(MTI->getRawDest(), Dest) ||
3478             !computeAddress(MTI->getRawSource(), Src))
3479           return false;
3480         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3481           return true;
3482       }
3483     }
3484 
3485     if (!MTI->getLength()->getType()->isIntegerTy(64))
3486       return false;
3487 
3488     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3489       // Fast instruction selection doesn't support the special
3490       // address spaces.
3491       return false;
3492 
3493     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3494     return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3495   }
3496   case Intrinsic::memset: {
3497     const MemSetInst *MSI = cast<MemSetInst>(II);
3498     // Don't handle volatile.
3499     if (MSI->isVolatile())
3500       return false;
3501 
3502     if (!MSI->getLength()->getType()->isIntegerTy(64))
3503       return false;
3504 
3505     if (MSI->getDestAddressSpace() > 255)
3506       // Fast instruction selection doesn't support the special
3507       // address spaces.
3508       return false;
3509 
3510     return lowerCallTo(II, "memset", II->arg_size() - 1);
3511   }
3512   case Intrinsic::sin:
3513   case Intrinsic::cos:
3514   case Intrinsic::pow: {
3515     MVT RetVT;
3516     if (!isTypeLegal(II->getType(), RetVT))
3517       return false;
3518 
3519     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3520       return false;
3521 
3522     static const RTLIB::Libcall LibCallTable[3][2] = {
3523       { RTLIB::SIN_F32, RTLIB::SIN_F64 },
3524       { RTLIB::COS_F32, RTLIB::COS_F64 },
3525       { RTLIB::POW_F32, RTLIB::POW_F64 }
3526     };
3527     RTLIB::Libcall LC;
3528     bool Is64Bit = RetVT == MVT::f64;
3529     switch (II->getIntrinsicID()) {
3530     default:
3531       llvm_unreachable("Unexpected intrinsic.");
3532     case Intrinsic::sin:
3533       LC = LibCallTable[0][Is64Bit];
3534       break;
3535     case Intrinsic::cos:
3536       LC = LibCallTable[1][Is64Bit];
3537       break;
3538     case Intrinsic::pow:
3539       LC = LibCallTable[2][Is64Bit];
3540       break;
3541     }
3542 
3543     ArgListTy Args;
3544     Args.reserve(II->arg_size());
3545 
3546     // Populate the argument list.
3547     for (auto &Arg : II->args()) {
3548       ArgListEntry Entry;
3549       Entry.Val = Arg;
3550       Entry.Ty = Arg->getType();
3551       Args.push_back(Entry);
3552     }
3553 
3554     CallLoweringInfo CLI;
3555     MCContext &Ctx = MF->getContext();
3556     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3557                   TLI.getLibcallName(LC), std::move(Args));
3558     if (!lowerCallTo(CLI))
3559       return false;
3560     updateValueMap(II, CLI.ResultReg);
3561     return true;
3562   }
3563   case Intrinsic::fabs: {
3564     MVT VT;
3565     if (!isTypeLegal(II->getType(), VT))
3566       return false;
3567 
3568     unsigned Opc;
3569     switch (VT.SimpleTy) {
3570     default:
3571       return false;
3572     case MVT::f32:
3573       Opc = AArch64::FABSSr;
3574       break;
3575     case MVT::f64:
3576       Opc = AArch64::FABSDr;
3577       break;
3578     }
3579     Register SrcReg = getRegForValue(II->getOperand(0));
3580     if (!SrcReg)
3581       return false;
3582     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3583     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
3584       .addReg(SrcReg);
3585     updateValueMap(II, ResultReg);
3586     return true;
3587   }
3588   case Intrinsic::trap:
3589     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3590         .addImm(1);
3591     return true;
3592   case Intrinsic::debugtrap:
3593     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK))
3594         .addImm(0xF000);
3595     return true;
3596 
3597   case Intrinsic::sqrt: {
3598     Type *RetTy = II->getCalledFunction()->getReturnType();
3599 
3600     MVT VT;
3601     if (!isTypeLegal(RetTy, VT))
3602       return false;
3603 
3604     Register Op0Reg = getRegForValue(II->getOperand(0));
3605     if (!Op0Reg)
3606       return false;
3607 
3608     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3609     if (!ResultReg)
3610       return false;
3611 
3612     updateValueMap(II, ResultReg);
3613     return true;
3614   }
3615   case Intrinsic::sadd_with_overflow:
3616   case Intrinsic::uadd_with_overflow:
3617   case Intrinsic::ssub_with_overflow:
3618   case Intrinsic::usub_with_overflow:
3619   case Intrinsic::smul_with_overflow:
3620   case Intrinsic::umul_with_overflow: {
3621     // This implements the basic lowering of the xalu with overflow intrinsics.
3622     const Function *Callee = II->getCalledFunction();
3623     auto *Ty = cast<StructType>(Callee->getReturnType());
3624     Type *RetTy = Ty->getTypeAtIndex(0U);
3625 
3626     MVT VT;
3627     if (!isTypeLegal(RetTy, VT))
3628       return false;
3629 
3630     if (VT != MVT::i32 && VT != MVT::i64)
3631       return false;
3632 
3633     const Value *LHS = II->getArgOperand(0);
3634     const Value *RHS = II->getArgOperand(1);
3635     // Canonicalize immediate to the RHS.
3636     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3637       std::swap(LHS, RHS);
3638 
3639     // Simplify multiplies.
3640     Intrinsic::ID IID = II->getIntrinsicID();
3641     switch (IID) {
3642     default:
3643       break;
3644     case Intrinsic::smul_with_overflow:
3645       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3646         if (C->getValue() == 2) {
3647           IID = Intrinsic::sadd_with_overflow;
3648           RHS = LHS;
3649         }
3650       break;
3651     case Intrinsic::umul_with_overflow:
3652       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3653         if (C->getValue() == 2) {
3654           IID = Intrinsic::uadd_with_overflow;
3655           RHS = LHS;
3656         }
3657       break;
3658     }
3659 
3660     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3661     AArch64CC::CondCode CC = AArch64CC::Invalid;
3662     switch (IID) {
3663     default: llvm_unreachable("Unexpected intrinsic!");
3664     case Intrinsic::sadd_with_overflow:
3665       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3666       CC = AArch64CC::VS;
3667       break;
3668     case Intrinsic::uadd_with_overflow:
3669       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3670       CC = AArch64CC::HS;
3671       break;
3672     case Intrinsic::ssub_with_overflow:
3673       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3674       CC = AArch64CC::VS;
3675       break;
3676     case Intrinsic::usub_with_overflow:
3677       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3678       CC = AArch64CC::LO;
3679       break;
3680     case Intrinsic::smul_with_overflow: {
3681       CC = AArch64CC::NE;
3682       Register LHSReg = getRegForValue(LHS);
3683       if (!LHSReg)
3684         return false;
3685 
3686       Register RHSReg = getRegForValue(RHS);
3687       if (!RHSReg)
3688         return false;
3689 
3690       if (VT == MVT::i32) {
3691         MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3692         Register MulSubReg =
3693             fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3694         // cmp xreg, wreg, sxtw
3695         emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3696                       AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3697                       /*WantResult=*/false);
3698         MulReg = MulSubReg;
3699       } else {
3700         assert(VT == MVT::i64 && "Unexpected value type.");
3701         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3702         // reused in the next instruction.
3703         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3704         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3705         emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3706                     /*WantResult=*/false);
3707       }
3708       break;
3709     }
3710     case Intrinsic::umul_with_overflow: {
3711       CC = AArch64CC::NE;
3712       Register LHSReg = getRegForValue(LHS);
3713       if (!LHSReg)
3714         return false;
3715 
3716       Register RHSReg = getRegForValue(RHS);
3717       if (!RHSReg)
3718         return false;
3719 
3720       if (VT == MVT::i32) {
3721         MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3722         // tst xreg, #0xffffffff00000000
3723         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3724                 TII.get(AArch64::ANDSXri), AArch64::XZR)
3725             .addReg(MulReg)
3726             .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3727         MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3728       } else {
3729         assert(VT == MVT::i64 && "Unexpected value type.");
3730         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3731         // reused in the next instruction.
3732         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3733         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3734         emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3735       }
3736       break;
3737     }
3738     }
3739 
3740     if (MulReg) {
3741       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3742       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3743               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3744     }
3745 
3746     if (!ResultReg1)
3747       return false;
3748 
3749     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3750                                   AArch64::WZR, AArch64::WZR,
3751                                   getInvertedCondCode(CC));
3752     (void)ResultReg2;
3753     assert((ResultReg1 + 1) == ResultReg2 &&
3754            "Nonconsecutive result registers.");
3755     updateValueMap(II, ResultReg1, 2);
3756     return true;
3757   }
3758   }
3759   return false;
3760 }
3761 
3762 bool AArch64FastISel::selectRet(const Instruction *I) {
3763   const ReturnInst *Ret = cast<ReturnInst>(I);
3764   const Function &F = *I->getParent()->getParent();
3765 
3766   if (!FuncInfo.CanLowerReturn)
3767     return false;
3768 
3769   if (F.isVarArg())
3770     return false;
3771 
3772   if (TLI.supportSwiftError() &&
3773       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3774     return false;
3775 
3776   if (TLI.supportSplitCSR(FuncInfo.MF))
3777     return false;
3778 
3779   // Build a list of return value registers.
3780   SmallVector<unsigned, 4> RetRegs;
3781 
3782   if (Ret->getNumOperands() > 0) {
3783     CallingConv::ID CC = F.getCallingConv();
3784     SmallVector<ISD::OutputArg, 4> Outs;
3785     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3786 
3787     // Analyze operands of the call, assigning locations to each operand.
3788     SmallVector<CCValAssign, 16> ValLocs;
3789     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3790     CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3791                                                      : RetCC_AArch64_AAPCS;
3792     CCInfo.AnalyzeReturn(Outs, RetCC);
3793 
3794     // Only handle a single return value for now.
3795     if (ValLocs.size() != 1)
3796       return false;
3797 
3798     CCValAssign &VA = ValLocs[0];
3799     const Value *RV = Ret->getOperand(0);
3800 
3801     // Don't bother handling odd stuff for now.
3802     if ((VA.getLocInfo() != CCValAssign::Full) &&
3803         (VA.getLocInfo() != CCValAssign::BCvt))
3804       return false;
3805 
3806     // Only handle register returns for now.
3807     if (!VA.isRegLoc())
3808       return false;
3809 
3810     Register Reg = getRegForValue(RV);
3811     if (Reg == 0)
3812       return false;
3813 
3814     unsigned SrcReg = Reg + VA.getValNo();
3815     Register DestReg = VA.getLocReg();
3816     // Avoid a cross-class copy. This is very unlikely.
3817     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3818       return false;
3819 
3820     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3821     if (!RVEVT.isSimple())
3822       return false;
3823 
3824     // Vectors (of > 1 lane) in big endian need tricky handling.
3825     if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3826         !Subtarget->isLittleEndian())
3827       return false;
3828 
3829     MVT RVVT = RVEVT.getSimpleVT();
3830     if (RVVT == MVT::f128)
3831       return false;
3832 
3833     MVT DestVT = VA.getValVT();
3834     // Special handling for extended integers.
3835     if (RVVT != DestVT) {
3836       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3837         return false;
3838 
3839       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3840         return false;
3841 
3842       bool IsZExt = Outs[0].Flags.isZExt();
3843       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3844       if (SrcReg == 0)
3845         return false;
3846     }
3847 
3848     // "Callee" (i.e. value producer) zero extends pointers at function
3849     // boundary.
3850     if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3851       SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3852 
3853     // Make the copy.
3854     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3855             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3856 
3857     // Add register to return instruction.
3858     RetRegs.push_back(VA.getLocReg());
3859   }
3860 
3861   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3862                                     TII.get(AArch64::RET_ReallyLR));
3863   for (unsigned RetReg : RetRegs)
3864     MIB.addReg(RetReg, RegState::Implicit);
3865   return true;
3866 }
3867 
3868 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3869   Type *DestTy = I->getType();
3870   Value *Op = I->getOperand(0);
3871   Type *SrcTy = Op->getType();
3872 
3873   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3874   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3875   if (!SrcEVT.isSimple())
3876     return false;
3877   if (!DestEVT.isSimple())
3878     return false;
3879 
3880   MVT SrcVT = SrcEVT.getSimpleVT();
3881   MVT DestVT = DestEVT.getSimpleVT();
3882 
3883   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3884       SrcVT != MVT::i8)
3885     return false;
3886   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3887       DestVT != MVT::i1)
3888     return false;
3889 
3890   Register SrcReg = getRegForValue(Op);
3891   if (!SrcReg)
3892     return false;
3893 
3894   // If we're truncating from i64 to a smaller non-legal type then generate an
3895   // AND. Otherwise, we know the high bits are undefined and a truncate only
3896   // generate a COPY. We cannot mark the source register also as result
3897   // register, because this can incorrectly transfer the kill flag onto the
3898   // source register.
3899   unsigned ResultReg;
3900   if (SrcVT == MVT::i64) {
3901     uint64_t Mask = 0;
3902     switch (DestVT.SimpleTy) {
3903     default:
3904       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3905       return false;
3906     case MVT::i1:
3907       Mask = 0x1;
3908       break;
3909     case MVT::i8:
3910       Mask = 0xff;
3911       break;
3912     case MVT::i16:
3913       Mask = 0xffff;
3914       break;
3915     }
3916     // Issue an extract_subreg to get the lower 32-bits.
3917     Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3918                                                 AArch64::sub_32);
3919     // Create the AND instruction which performs the actual truncation.
3920     ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
3921     assert(ResultReg && "Unexpected AND instruction emission failure.");
3922   } else {
3923     ResultReg = createResultReg(&AArch64::GPR32RegClass);
3924     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3925             TII.get(TargetOpcode::COPY), ResultReg)
3926         .addReg(SrcReg);
3927   }
3928 
3929   updateValueMap(I, ResultReg);
3930   return true;
3931 }
3932 
3933 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
3934   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
3935           DestVT == MVT::i64) &&
3936          "Unexpected value type.");
3937   // Handle i8 and i16 as i32.
3938   if (DestVT == MVT::i8 || DestVT == MVT::i16)
3939     DestVT = MVT::i32;
3940 
3941   if (IsZExt) {
3942     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
3943     assert(ResultReg && "Unexpected AND instruction emission failure.");
3944     if (DestVT == MVT::i64) {
3945       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
3946       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
3947       Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3948       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
3949               TII.get(AArch64::SUBREG_TO_REG), Reg64)
3950           .addImm(0)
3951           .addReg(ResultReg)
3952           .addImm(AArch64::sub_32);
3953       ResultReg = Reg64;
3954     }
3955     return ResultReg;
3956   } else {
3957     if (DestVT == MVT::i64) {
3958       // FIXME: We're SExt i1 to i64.
3959       return 0;
3960     }
3961     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
3962                             0, 0);
3963   }
3964 }
3965 
3966 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3967   unsigned Opc, ZReg;
3968   switch (RetVT.SimpleTy) {
3969   default: return 0;
3970   case MVT::i8:
3971   case MVT::i16:
3972   case MVT::i32:
3973     RetVT = MVT::i32;
3974     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
3975   case MVT::i64:
3976     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
3977   }
3978 
3979   const TargetRegisterClass *RC =
3980       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
3981   return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
3982 }
3983 
3984 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3985   if (RetVT != MVT::i64)
3986     return 0;
3987 
3988   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
3989                           Op0, Op1, AArch64::XZR);
3990 }
3991 
3992 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
3993   if (RetVT != MVT::i64)
3994     return 0;
3995 
3996   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
3997                           Op0, Op1, AArch64::XZR);
3998 }
3999 
4000 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4001                                      unsigned Op1Reg) {
4002   unsigned Opc = 0;
4003   bool NeedTrunc = false;
4004   uint64_t Mask = 0;
4005   switch (RetVT.SimpleTy) {
4006   default: return 0;
4007   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4008   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4009   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4010   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4011   }
4012 
4013   const TargetRegisterClass *RC =
4014       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4015   if (NeedTrunc)
4016     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4017 
4018   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4019   if (NeedTrunc)
4020     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4021   return ResultReg;
4022 }
4023 
4024 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4025                                      uint64_t Shift, bool IsZExt) {
4026   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4027          "Unexpected source/return type pair.");
4028   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4029           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4030          "Unexpected source value type.");
4031   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4032           RetVT == MVT::i64) && "Unexpected return value type.");
4033 
4034   bool Is64Bit = (RetVT == MVT::i64);
4035   unsigned RegSize = Is64Bit ? 64 : 32;
4036   unsigned DstBits = RetVT.getSizeInBits();
4037   unsigned SrcBits = SrcVT.getSizeInBits();
4038   const TargetRegisterClass *RC =
4039       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4040 
4041   // Just emit a copy for "zero" shifts.
4042   if (Shift == 0) {
4043     if (RetVT == SrcVT) {
4044       Register ResultReg = createResultReg(RC);
4045       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4046               TII.get(TargetOpcode::COPY), ResultReg)
4047           .addReg(Op0);
4048       return ResultReg;
4049     } else
4050       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4051   }
4052 
4053   // Don't deal with undefined shifts.
4054   if (Shift >= DstBits)
4055     return 0;
4056 
4057   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4058   // {S|U}BFM Wd, Wn, #r, #s
4059   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4060 
4061   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4062   // %2 = shl i16 %1, 4
4063   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4064   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4065   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4066   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4067 
4068   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4069   // %2 = shl i16 %1, 8
4070   // Wd<32+7-24,32-24> = Wn<7:0>
4071   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4072   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4073   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4074 
4075   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4076   // %2 = shl i16 %1, 12
4077   // Wd<32+3-20,32-20> = Wn<3:0>
4078   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4079   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4080   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4081 
4082   unsigned ImmR = RegSize - Shift;
4083   // Limit the width to the length of the source type.
4084   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4085   static const unsigned OpcTable[2][2] = {
4086     {AArch64::SBFMWri, AArch64::SBFMXri},
4087     {AArch64::UBFMWri, AArch64::UBFMXri}
4088   };
4089   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4090   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4091     Register TmpReg = MRI.createVirtualRegister(RC);
4092     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4093             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4094         .addImm(0)
4095         .addReg(Op0)
4096         .addImm(AArch64::sub_32);
4097     Op0 = TmpReg;
4098   }
4099   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4100 }
4101 
4102 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4103                                      unsigned Op1Reg) {
4104   unsigned Opc = 0;
4105   bool NeedTrunc = false;
4106   uint64_t Mask = 0;
4107   switch (RetVT.SimpleTy) {
4108   default: return 0;
4109   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4110   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4111   case MVT::i32: Opc = AArch64::LSRVWr; break;
4112   case MVT::i64: Opc = AArch64::LSRVXr; break;
4113   }
4114 
4115   const TargetRegisterClass *RC =
4116       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4117   if (NeedTrunc) {
4118     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4119     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4120   }
4121   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4122   if (NeedTrunc)
4123     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4124   return ResultReg;
4125 }
4126 
4127 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4128                                      uint64_t Shift, bool IsZExt) {
4129   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4130          "Unexpected source/return type pair.");
4131   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4132           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4133          "Unexpected source value type.");
4134   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4135           RetVT == MVT::i64) && "Unexpected return value type.");
4136 
4137   bool Is64Bit = (RetVT == MVT::i64);
4138   unsigned RegSize = Is64Bit ? 64 : 32;
4139   unsigned DstBits = RetVT.getSizeInBits();
4140   unsigned SrcBits = SrcVT.getSizeInBits();
4141   const TargetRegisterClass *RC =
4142       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4143 
4144   // Just emit a copy for "zero" shifts.
4145   if (Shift == 0) {
4146     if (RetVT == SrcVT) {
4147       Register ResultReg = createResultReg(RC);
4148       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4149               TII.get(TargetOpcode::COPY), ResultReg)
4150       .addReg(Op0);
4151       return ResultReg;
4152     } else
4153       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4154   }
4155 
4156   // Don't deal with undefined shifts.
4157   if (Shift >= DstBits)
4158     return 0;
4159 
4160   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4161   // {S|U}BFM Wd, Wn, #r, #s
4162   // Wd<s-r:0> = Wn<s:r> when r <= s
4163 
4164   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4165   // %2 = lshr i16 %1, 4
4166   // Wd<7-4:0> = Wn<7:4>
4167   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4168   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4169   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4170 
4171   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4172   // %2 = lshr i16 %1, 8
4173   // Wd<7-7,0> = Wn<7:7>
4174   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4175   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4176   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4177 
4178   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4179   // %2 = lshr i16 %1, 12
4180   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4181   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4182   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4183   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4184 
4185   if (Shift >= SrcBits && IsZExt)
4186     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4187 
4188   // It is not possible to fold a sign-extend into the LShr instruction. In this
4189   // case emit a sign-extend.
4190   if (!IsZExt) {
4191     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4192     if (!Op0)
4193       return 0;
4194     SrcVT = RetVT;
4195     SrcBits = SrcVT.getSizeInBits();
4196     IsZExt = true;
4197   }
4198 
4199   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4200   unsigned ImmS = SrcBits - 1;
4201   static const unsigned OpcTable[2][2] = {
4202     {AArch64::SBFMWri, AArch64::SBFMXri},
4203     {AArch64::UBFMWri, AArch64::UBFMXri}
4204   };
4205   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4206   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4207     Register TmpReg = MRI.createVirtualRegister(RC);
4208     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4209             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4210         .addImm(0)
4211         .addReg(Op0)
4212         .addImm(AArch64::sub_32);
4213     Op0 = TmpReg;
4214   }
4215   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4216 }
4217 
4218 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4219                                      unsigned Op1Reg) {
4220   unsigned Opc = 0;
4221   bool NeedTrunc = false;
4222   uint64_t Mask = 0;
4223   switch (RetVT.SimpleTy) {
4224   default: return 0;
4225   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4226   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4227   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4228   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4229   }
4230 
4231   const TargetRegisterClass *RC =
4232       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4233   if (NeedTrunc) {
4234     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4235     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4236   }
4237   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4238   if (NeedTrunc)
4239     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4240   return ResultReg;
4241 }
4242 
4243 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4244                                      uint64_t Shift, bool IsZExt) {
4245   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4246          "Unexpected source/return type pair.");
4247   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4248           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4249          "Unexpected source value type.");
4250   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4251           RetVT == MVT::i64) && "Unexpected return value type.");
4252 
4253   bool Is64Bit = (RetVT == MVT::i64);
4254   unsigned RegSize = Is64Bit ? 64 : 32;
4255   unsigned DstBits = RetVT.getSizeInBits();
4256   unsigned SrcBits = SrcVT.getSizeInBits();
4257   const TargetRegisterClass *RC =
4258       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4259 
4260   // Just emit a copy for "zero" shifts.
4261   if (Shift == 0) {
4262     if (RetVT == SrcVT) {
4263       Register ResultReg = createResultReg(RC);
4264       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4265               TII.get(TargetOpcode::COPY), ResultReg)
4266       .addReg(Op0);
4267       return ResultReg;
4268     } else
4269       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4270   }
4271 
4272   // Don't deal with undefined shifts.
4273   if (Shift >= DstBits)
4274     return 0;
4275 
4276   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4277   // {S|U}BFM Wd, Wn, #r, #s
4278   // Wd<s-r:0> = Wn<s:r> when r <= s
4279 
4280   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4281   // %2 = ashr i16 %1, 4
4282   // Wd<7-4:0> = Wn<7:4>
4283   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4284   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4285   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4286 
4287   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4288   // %2 = ashr i16 %1, 8
4289   // Wd<7-7,0> = Wn<7:7>
4290   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4291   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4292   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4293 
4294   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4295   // %2 = ashr i16 %1, 12
4296   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4297   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4298   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4299   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4300 
4301   if (Shift >= SrcBits && IsZExt)
4302     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4303 
4304   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4305   unsigned ImmS = SrcBits - 1;
4306   static const unsigned OpcTable[2][2] = {
4307     {AArch64::SBFMWri, AArch64::SBFMXri},
4308     {AArch64::UBFMWri, AArch64::UBFMXri}
4309   };
4310   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4311   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4312     Register TmpReg = MRI.createVirtualRegister(RC);
4313     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4314             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4315         .addImm(0)
4316         .addReg(Op0)
4317         .addImm(AArch64::sub_32);
4318     Op0 = TmpReg;
4319   }
4320   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4321 }
4322 
4323 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4324                                      bool IsZExt) {
4325   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4326 
4327   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4328   // DestVT are odd things, so test to make sure that they are both types we can
4329   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4330   // bail out to SelectionDAG.
4331   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4332        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4333       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4334        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4335     return 0;
4336 
4337   unsigned Opc;
4338   unsigned Imm = 0;
4339 
4340   switch (SrcVT.SimpleTy) {
4341   default:
4342     return 0;
4343   case MVT::i1:
4344     return emiti1Ext(SrcReg, DestVT, IsZExt);
4345   case MVT::i8:
4346     if (DestVT == MVT::i64)
4347       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4348     else
4349       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4350     Imm = 7;
4351     break;
4352   case MVT::i16:
4353     if (DestVT == MVT::i64)
4354       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4355     else
4356       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4357     Imm = 15;
4358     break;
4359   case MVT::i32:
4360     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4361     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4362     Imm = 31;
4363     break;
4364   }
4365 
4366   // Handle i8 and i16 as i32.
4367   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4368     DestVT = MVT::i32;
4369   else if (DestVT == MVT::i64) {
4370     Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4371     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4372             TII.get(AArch64::SUBREG_TO_REG), Src64)
4373         .addImm(0)
4374         .addReg(SrcReg)
4375         .addImm(AArch64::sub_32);
4376     SrcReg = Src64;
4377   }
4378 
4379   const TargetRegisterClass *RC =
4380       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4381   return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4382 }
4383 
4384 static bool isZExtLoad(const MachineInstr *LI) {
4385   switch (LI->getOpcode()) {
4386   default:
4387     return false;
4388   case AArch64::LDURBBi:
4389   case AArch64::LDURHHi:
4390   case AArch64::LDURWi:
4391   case AArch64::LDRBBui:
4392   case AArch64::LDRHHui:
4393   case AArch64::LDRWui:
4394   case AArch64::LDRBBroX:
4395   case AArch64::LDRHHroX:
4396   case AArch64::LDRWroX:
4397   case AArch64::LDRBBroW:
4398   case AArch64::LDRHHroW:
4399   case AArch64::LDRWroW:
4400     return true;
4401   }
4402 }
4403 
4404 static bool isSExtLoad(const MachineInstr *LI) {
4405   switch (LI->getOpcode()) {
4406   default:
4407     return false;
4408   case AArch64::LDURSBWi:
4409   case AArch64::LDURSHWi:
4410   case AArch64::LDURSBXi:
4411   case AArch64::LDURSHXi:
4412   case AArch64::LDURSWi:
4413   case AArch64::LDRSBWui:
4414   case AArch64::LDRSHWui:
4415   case AArch64::LDRSBXui:
4416   case AArch64::LDRSHXui:
4417   case AArch64::LDRSWui:
4418   case AArch64::LDRSBWroX:
4419   case AArch64::LDRSHWroX:
4420   case AArch64::LDRSBXroX:
4421   case AArch64::LDRSHXroX:
4422   case AArch64::LDRSWroX:
4423   case AArch64::LDRSBWroW:
4424   case AArch64::LDRSHWroW:
4425   case AArch64::LDRSBXroW:
4426   case AArch64::LDRSHXroW:
4427   case AArch64::LDRSWroW:
4428     return true;
4429   }
4430 }
4431 
4432 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4433                                          MVT SrcVT) {
4434   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4435   if (!LI || !LI->hasOneUse())
4436     return false;
4437 
4438   // Check if the load instruction has already been selected.
4439   Register Reg = lookUpRegForValue(LI);
4440   if (!Reg)
4441     return false;
4442 
4443   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4444   if (!MI)
4445     return false;
4446 
4447   // Check if the correct load instruction has been emitted - SelectionDAG might
4448   // have emitted a zero-extending load, but we need a sign-extending load.
4449   bool IsZExt = isa<ZExtInst>(I);
4450   const auto *LoadMI = MI;
4451   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4452       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4453     Register LoadReg = MI->getOperand(1).getReg();
4454     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4455     assert(LoadMI && "Expected valid instruction");
4456   }
4457   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4458     return false;
4459 
4460   // Nothing to be done.
4461   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4462     updateValueMap(I, Reg);
4463     return true;
4464   }
4465 
4466   if (IsZExt) {
4467     Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4468     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4469             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4470         .addImm(0)
4471         .addReg(Reg, getKillRegState(true))
4472         .addImm(AArch64::sub_32);
4473     Reg = Reg64;
4474   } else {
4475     assert((MI->getOpcode() == TargetOpcode::COPY &&
4476             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4477            "Expected copy instruction");
4478     Reg = MI->getOperand(1).getReg();
4479     MachineBasicBlock::iterator I(MI);
4480     removeDeadCode(I, std::next(I));
4481   }
4482   updateValueMap(I, Reg);
4483   return true;
4484 }
4485 
4486 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4487   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4488          "Unexpected integer extend instruction.");
4489   MVT RetVT;
4490   MVT SrcVT;
4491   if (!isTypeSupported(I->getType(), RetVT))
4492     return false;
4493 
4494   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4495     return false;
4496 
4497   // Try to optimize already sign-/zero-extended values from load instructions.
4498   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4499     return true;
4500 
4501   Register SrcReg = getRegForValue(I->getOperand(0));
4502   if (!SrcReg)
4503     return false;
4504 
4505   // Try to optimize already sign-/zero-extended values from function arguments.
4506   bool IsZExt = isa<ZExtInst>(I);
4507   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4508     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4509       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4510         Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4511         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
4512                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4513             .addImm(0)
4514             .addReg(SrcReg)
4515             .addImm(AArch64::sub_32);
4516         SrcReg = ResultReg;
4517       }
4518 
4519       updateValueMap(I, SrcReg);
4520       return true;
4521     }
4522   }
4523 
4524   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4525   if (!ResultReg)
4526     return false;
4527 
4528   updateValueMap(I, ResultReg);
4529   return true;
4530 }
4531 
4532 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4533   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4534   if (!DestEVT.isSimple())
4535     return false;
4536 
4537   MVT DestVT = DestEVT.getSimpleVT();
4538   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4539     return false;
4540 
4541   unsigned DivOpc;
4542   bool Is64bit = (DestVT == MVT::i64);
4543   switch (ISDOpcode) {
4544   default:
4545     return false;
4546   case ISD::SREM:
4547     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4548     break;
4549   case ISD::UREM:
4550     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4551     break;
4552   }
4553   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4554   Register Src0Reg = getRegForValue(I->getOperand(0));
4555   if (!Src0Reg)
4556     return false;
4557 
4558   Register Src1Reg = getRegForValue(I->getOperand(1));
4559   if (!Src1Reg)
4560     return false;
4561 
4562   const TargetRegisterClass *RC =
4563       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4564   Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4565   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4566   // The remainder is computed as numerator - (quotient * denominator) using the
4567   // MSUB instruction.
4568   Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4569   updateValueMap(I, ResultReg);
4570   return true;
4571 }
4572 
4573 bool AArch64FastISel::selectMul(const Instruction *I) {
4574   MVT VT;
4575   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4576     return false;
4577 
4578   if (VT.isVector())
4579     return selectBinaryOp(I, ISD::MUL);
4580 
4581   const Value *Src0 = I->getOperand(0);
4582   const Value *Src1 = I->getOperand(1);
4583   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4584     if (C->getValue().isPowerOf2())
4585       std::swap(Src0, Src1);
4586 
4587   // Try to simplify to a shift instruction.
4588   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4589     if (C->getValue().isPowerOf2()) {
4590       uint64_t ShiftVal = C->getValue().logBase2();
4591       MVT SrcVT = VT;
4592       bool IsZExt = true;
4593       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4594         if (!isIntExtFree(ZExt)) {
4595           MVT VT;
4596           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4597             SrcVT = VT;
4598             IsZExt = true;
4599             Src0 = ZExt->getOperand(0);
4600           }
4601         }
4602       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4603         if (!isIntExtFree(SExt)) {
4604           MVT VT;
4605           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4606             SrcVT = VT;
4607             IsZExt = false;
4608             Src0 = SExt->getOperand(0);
4609           }
4610         }
4611       }
4612 
4613       Register Src0Reg = getRegForValue(Src0);
4614       if (!Src0Reg)
4615         return false;
4616 
4617       unsigned ResultReg =
4618           emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4619 
4620       if (ResultReg) {
4621         updateValueMap(I, ResultReg);
4622         return true;
4623       }
4624     }
4625 
4626   Register Src0Reg = getRegForValue(I->getOperand(0));
4627   if (!Src0Reg)
4628     return false;
4629 
4630   Register Src1Reg = getRegForValue(I->getOperand(1));
4631   if (!Src1Reg)
4632     return false;
4633 
4634   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4635 
4636   if (!ResultReg)
4637     return false;
4638 
4639   updateValueMap(I, ResultReg);
4640   return true;
4641 }
4642 
4643 bool AArch64FastISel::selectShift(const Instruction *I) {
4644   MVT RetVT;
4645   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4646     return false;
4647 
4648   if (RetVT.isVector())
4649     return selectOperator(I, I->getOpcode());
4650 
4651   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4652     unsigned ResultReg = 0;
4653     uint64_t ShiftVal = C->getZExtValue();
4654     MVT SrcVT = RetVT;
4655     bool IsZExt = I->getOpcode() != Instruction::AShr;
4656     const Value *Op0 = I->getOperand(0);
4657     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4658       if (!isIntExtFree(ZExt)) {
4659         MVT TmpVT;
4660         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4661           SrcVT = TmpVT;
4662           IsZExt = true;
4663           Op0 = ZExt->getOperand(0);
4664         }
4665       }
4666     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4667       if (!isIntExtFree(SExt)) {
4668         MVT TmpVT;
4669         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4670           SrcVT = TmpVT;
4671           IsZExt = false;
4672           Op0 = SExt->getOperand(0);
4673         }
4674       }
4675     }
4676 
4677     Register Op0Reg = getRegForValue(Op0);
4678     if (!Op0Reg)
4679       return false;
4680 
4681     switch (I->getOpcode()) {
4682     default: llvm_unreachable("Unexpected instruction.");
4683     case Instruction::Shl:
4684       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4685       break;
4686     case Instruction::AShr:
4687       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4688       break;
4689     case Instruction::LShr:
4690       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4691       break;
4692     }
4693     if (!ResultReg)
4694       return false;
4695 
4696     updateValueMap(I, ResultReg);
4697     return true;
4698   }
4699 
4700   Register Op0Reg = getRegForValue(I->getOperand(0));
4701   if (!Op0Reg)
4702     return false;
4703 
4704   Register Op1Reg = getRegForValue(I->getOperand(1));
4705   if (!Op1Reg)
4706     return false;
4707 
4708   unsigned ResultReg = 0;
4709   switch (I->getOpcode()) {
4710   default: llvm_unreachable("Unexpected instruction.");
4711   case Instruction::Shl:
4712     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4713     break;
4714   case Instruction::AShr:
4715     ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4716     break;
4717   case Instruction::LShr:
4718     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4719     break;
4720   }
4721 
4722   if (!ResultReg)
4723     return false;
4724 
4725   updateValueMap(I, ResultReg);
4726   return true;
4727 }
4728 
4729 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4730   MVT RetVT, SrcVT;
4731 
4732   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4733     return false;
4734   if (!isTypeLegal(I->getType(), RetVT))
4735     return false;
4736 
4737   unsigned Opc;
4738   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4739     Opc = AArch64::FMOVWSr;
4740   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4741     Opc = AArch64::FMOVXDr;
4742   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4743     Opc = AArch64::FMOVSWr;
4744   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4745     Opc = AArch64::FMOVDXr;
4746   else
4747     return false;
4748 
4749   const TargetRegisterClass *RC = nullptr;
4750   switch (RetVT.SimpleTy) {
4751   default: llvm_unreachable("Unexpected value type.");
4752   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4753   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4754   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4755   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4756   }
4757   Register Op0Reg = getRegForValue(I->getOperand(0));
4758   if (!Op0Reg)
4759     return false;
4760 
4761   Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4762   if (!ResultReg)
4763     return false;
4764 
4765   updateValueMap(I, ResultReg);
4766   return true;
4767 }
4768 
4769 bool AArch64FastISel::selectFRem(const Instruction *I) {
4770   MVT RetVT;
4771   if (!isTypeLegal(I->getType(), RetVT))
4772     return false;
4773 
4774   RTLIB::Libcall LC;
4775   switch (RetVT.SimpleTy) {
4776   default:
4777     return false;
4778   case MVT::f32:
4779     LC = RTLIB::REM_F32;
4780     break;
4781   case MVT::f64:
4782     LC = RTLIB::REM_F64;
4783     break;
4784   }
4785 
4786   ArgListTy Args;
4787   Args.reserve(I->getNumOperands());
4788 
4789   // Populate the argument list.
4790   for (auto &Arg : I->operands()) {
4791     ArgListEntry Entry;
4792     Entry.Val = Arg;
4793     Entry.Ty = Arg->getType();
4794     Args.push_back(Entry);
4795   }
4796 
4797   CallLoweringInfo CLI;
4798   MCContext &Ctx = MF->getContext();
4799   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4800                 TLI.getLibcallName(LC), std::move(Args));
4801   if (!lowerCallTo(CLI))
4802     return false;
4803   updateValueMap(I, CLI.ResultReg);
4804   return true;
4805 }
4806 
4807 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4808   MVT VT;
4809   if (!isTypeLegal(I->getType(), VT))
4810     return false;
4811 
4812   if (!isa<ConstantInt>(I->getOperand(1)))
4813     return selectBinaryOp(I, ISD::SDIV);
4814 
4815   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4816   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4817       !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4818     return selectBinaryOp(I, ISD::SDIV);
4819 
4820   unsigned Lg2 = C.countTrailingZeros();
4821   Register Src0Reg = getRegForValue(I->getOperand(0));
4822   if (!Src0Reg)
4823     return false;
4824 
4825   if (cast<BinaryOperator>(I)->isExact()) {
4826     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4827     if (!ResultReg)
4828       return false;
4829     updateValueMap(I, ResultReg);
4830     return true;
4831   }
4832 
4833   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4834   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4835   if (!AddReg)
4836     return false;
4837 
4838   // (Src0 < 0) ? Pow2 - 1 : 0;
4839   if (!emitICmp_ri(VT, Src0Reg, 0))
4840     return false;
4841 
4842   unsigned SelectOpc;
4843   const TargetRegisterClass *RC;
4844   if (VT == MVT::i64) {
4845     SelectOpc = AArch64::CSELXr;
4846     RC = &AArch64::GPR64RegClass;
4847   } else {
4848     SelectOpc = AArch64::CSELWr;
4849     RC = &AArch64::GPR32RegClass;
4850   }
4851   Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4852                                         AArch64CC::LT);
4853   if (!SelectReg)
4854     return false;
4855 
4856   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4857   // negate the result.
4858   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4859   unsigned ResultReg;
4860   if (C.isNegative())
4861     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4862                               AArch64_AM::ASR, Lg2);
4863   else
4864     ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4865 
4866   if (!ResultReg)
4867     return false;
4868 
4869   updateValueMap(I, ResultReg);
4870   return true;
4871 }
4872 
4873 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4874 /// have to duplicate it for AArch64, because otherwise we would fail during the
4875 /// sign-extend emission.
4876 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4877   Register IdxN = getRegForValue(Idx);
4878   if (IdxN == 0)
4879     // Unhandled operand. Halt "fast" selection and bail.
4880     return 0;
4881 
4882   // If the index is smaller or larger than intptr_t, truncate or extend it.
4883   MVT PtrVT = TLI.getPointerTy(DL);
4884   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4885   if (IdxVT.bitsLT(PtrVT)) {
4886     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4887   } else if (IdxVT.bitsGT(PtrVT))
4888     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4889   return IdxN;
4890 }
4891 
4892 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4893 /// duplicate it for AArch64, because otherwise we would bail out even for
4894 /// simple cases. This is because the standard fastEmit functions don't cover
4895 /// MUL at all and ADD is lowered very inefficientily.
4896 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4897   if (Subtarget->isTargetILP32())
4898     return false;
4899 
4900   Register N = getRegForValue(I->getOperand(0));
4901   if (!N)
4902     return false;
4903 
4904   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4905   // into a single N = N + TotalOffset.
4906   uint64_t TotalOffs = 0;
4907   MVT VT = TLI.getPointerTy(DL);
4908   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4909        GTI != E; ++GTI) {
4910     const Value *Idx = GTI.getOperand();
4911     if (auto *StTy = GTI.getStructTypeOrNull()) {
4912       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4913       // N = N + Offset
4914       if (Field)
4915         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4916     } else {
4917       Type *Ty = GTI.getIndexedType();
4918 
4919       // If this is a constant subscript, handle it quickly.
4920       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
4921         if (CI->isZero())
4922           continue;
4923         // N = N + Offset
4924         TotalOffs +=
4925             DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue();
4926         continue;
4927       }
4928       if (TotalOffs) {
4929         N = emitAdd_ri_(VT, N, TotalOffs);
4930         if (!N)
4931           return false;
4932         TotalOffs = 0;
4933       }
4934 
4935       // N = N + Idx * ElementSize;
4936       uint64_t ElementSize = DL.getTypeAllocSize(Ty);
4937       unsigned IdxN = getRegForGEPIndex(Idx);
4938       if (!IdxN)
4939         return false;
4940 
4941       if (ElementSize != 1) {
4942         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
4943         if (!C)
4944           return false;
4945         IdxN = emitMul_rr(VT, IdxN, C);
4946         if (!IdxN)
4947           return false;
4948       }
4949       N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
4950       if (!N)
4951         return false;
4952     }
4953   }
4954   if (TotalOffs) {
4955     N = emitAdd_ri_(VT, N, TotalOffs);
4956     if (!N)
4957       return false;
4958   }
4959   updateValueMap(I, N);
4960   return true;
4961 }
4962 
4963 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
4964   assert(TM.getOptLevel() == CodeGenOpt::None &&
4965          "cmpxchg survived AtomicExpand at optlevel > -O0");
4966 
4967   auto *RetPairTy = cast<StructType>(I->getType());
4968   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
4969   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
4970          "cmpxchg has a non-i1 status result");
4971 
4972   MVT VT;
4973   if (!isTypeLegal(RetTy, VT))
4974     return false;
4975 
4976   const TargetRegisterClass *ResRC;
4977   unsigned Opc, CmpOpc;
4978   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
4979   // extractvalue selection doesn't support that.
4980   if (VT == MVT::i32) {
4981     Opc = AArch64::CMP_SWAP_32;
4982     CmpOpc = AArch64::SUBSWrs;
4983     ResRC = &AArch64::GPR32RegClass;
4984   } else if (VT == MVT::i64) {
4985     Opc = AArch64::CMP_SWAP_64;
4986     CmpOpc = AArch64::SUBSXrs;
4987     ResRC = &AArch64::GPR64RegClass;
4988   } else {
4989     return false;
4990   }
4991 
4992   const MCInstrDesc &II = TII.get(Opc);
4993 
4994   const Register AddrReg = constrainOperandRegClass(
4995       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
4996   const Register DesiredReg = constrainOperandRegClass(
4997       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
4998   const Register NewReg = constrainOperandRegClass(
4999       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5000 
5001   const Register ResultReg1 = createResultReg(ResRC);
5002   const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5003   const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5004 
5005   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5006   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
5007       .addDef(ResultReg1)
5008       .addDef(ScratchReg)
5009       .addUse(AddrReg)
5010       .addUse(DesiredReg)
5011       .addUse(NewReg);
5012 
5013   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
5014       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5015       .addUse(ResultReg1)
5016       .addUse(DesiredReg)
5017       .addImm(0);
5018 
5019   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
5020       .addDef(ResultReg2)
5021       .addUse(AArch64::WZR)
5022       .addUse(AArch64::WZR)
5023       .addImm(AArch64CC::NE);
5024 
5025   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5026   updateValueMap(I, ResultReg1, 2);
5027   return true;
5028 }
5029 
5030 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5031   switch (I->getOpcode()) {
5032   default:
5033     break;
5034   case Instruction::Add:
5035   case Instruction::Sub:
5036     return selectAddSub(I);
5037   case Instruction::Mul:
5038     return selectMul(I);
5039   case Instruction::SDiv:
5040     return selectSDiv(I);
5041   case Instruction::SRem:
5042     if (!selectBinaryOp(I, ISD::SREM))
5043       return selectRem(I, ISD::SREM);
5044     return true;
5045   case Instruction::URem:
5046     if (!selectBinaryOp(I, ISD::UREM))
5047       return selectRem(I, ISD::UREM);
5048     return true;
5049   case Instruction::Shl:
5050   case Instruction::LShr:
5051   case Instruction::AShr:
5052     return selectShift(I);
5053   case Instruction::And:
5054   case Instruction::Or:
5055   case Instruction::Xor:
5056     return selectLogicalOp(I);
5057   case Instruction::Br:
5058     return selectBranch(I);
5059   case Instruction::IndirectBr:
5060     return selectIndirectBr(I);
5061   case Instruction::BitCast:
5062     if (!FastISel::selectBitCast(I))
5063       return selectBitCast(I);
5064     return true;
5065   case Instruction::FPToSI:
5066     if (!selectCast(I, ISD::FP_TO_SINT))
5067       return selectFPToInt(I, /*Signed=*/true);
5068     return true;
5069   case Instruction::FPToUI:
5070     return selectFPToInt(I, /*Signed=*/false);
5071   case Instruction::ZExt:
5072   case Instruction::SExt:
5073     return selectIntExt(I);
5074   case Instruction::Trunc:
5075     if (!selectCast(I, ISD::TRUNCATE))
5076       return selectTrunc(I);
5077     return true;
5078   case Instruction::FPExt:
5079     return selectFPExt(I);
5080   case Instruction::FPTrunc:
5081     return selectFPTrunc(I);
5082   case Instruction::SIToFP:
5083     if (!selectCast(I, ISD::SINT_TO_FP))
5084       return selectIntToFP(I, /*Signed=*/true);
5085     return true;
5086   case Instruction::UIToFP:
5087     return selectIntToFP(I, /*Signed=*/false);
5088   case Instruction::Load:
5089     return selectLoad(I);
5090   case Instruction::Store:
5091     return selectStore(I);
5092   case Instruction::FCmp:
5093   case Instruction::ICmp:
5094     return selectCmp(I);
5095   case Instruction::Select:
5096     return selectSelect(I);
5097   case Instruction::Ret:
5098     return selectRet(I);
5099   case Instruction::FRem:
5100     return selectFRem(I);
5101   case Instruction::GetElementPtr:
5102     return selectGetElementPtr(I);
5103   case Instruction::AtomicCmpXchg:
5104     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5105   }
5106 
5107   // fall-back to target-independent instruction selection.
5108   return selectOperator(I, I->getOpcode());
5109 }
5110 
5111 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5112                                         const TargetLibraryInfo *LibInfo) {
5113   return new AArch64FastISel(FuncInfo, LibInfo);
5114 }
5115