xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64FastISel.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Analysis/BranchProbabilityInfo.h"
27 #include "llvm/CodeGen/CallingConvLower.h"
28 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineConstantPool.h"
33 #include "llvm/CodeGen/MachineFrameInfo.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/CodeGenTypes/MachineValueType.h"
41 #include "llvm/IR/Argument.h"
42 #include "llvm/IR/Attributes.h"
43 #include "llvm/IR/BasicBlock.h"
44 #include "llvm/IR/CallingConv.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DataLayout.h"
48 #include "llvm/IR/DerivedTypes.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GetElementPtrTypeIterator.h"
51 #include "llvm/IR/GlobalValue.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/Intrinsics.h"
57 #include "llvm/IR/IntrinsicsAArch64.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/Type.h"
61 #include "llvm/IR/User.h"
62 #include "llvm/IR/Value.h"
63 #include "llvm/MC/MCInstrDesc.h"
64 #include "llvm/MC/MCRegisterInfo.h"
65 #include "llvm/MC/MCSymbol.h"
66 #include "llvm/Support/AtomicOrdering.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/CodeGen.h"
69 #include "llvm/Support/Compiler.h"
70 #include "llvm/Support/ErrorHandling.h"
71 #include "llvm/Support/MathExtras.h"
72 #include <algorithm>
73 #include <cassert>
74 #include <cstdint>
75 #include <iterator>
76 #include <utility>
77 
78 using namespace llvm;
79 
80 namespace {
81 
82 class AArch64FastISel final : public FastISel {
83   class Address {
84   public:
85     using BaseKind = enum {
86       RegBase,
87       FrameIndexBase
88     };
89 
90   private:
91     BaseKind Kind = RegBase;
92     AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
93     union {
94       unsigned Reg;
95       int FI;
96     } Base;
97     unsigned OffsetReg = 0;
98     unsigned Shift = 0;
99     int64_t Offset = 0;
100     const GlobalValue *GV = nullptr;
101 
102   public:
Address()103     Address() { Base.Reg = 0; }
104 
setKind(BaseKind K)105     void setKind(BaseKind K) { Kind = K; }
getKind() const106     BaseKind getKind() const { return Kind; }
setExtendType(AArch64_AM::ShiftExtendType E)107     void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
getExtendType() const108     AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
isRegBase() const109     bool isRegBase() const { return Kind == RegBase; }
isFIBase() const110     bool isFIBase() const { return Kind == FrameIndexBase; }
111 
setReg(unsigned Reg)112     void setReg(unsigned Reg) {
113       assert(isRegBase() && "Invalid base register access!");
114       Base.Reg = Reg;
115     }
116 
getReg() const117     unsigned getReg() const {
118       assert(isRegBase() && "Invalid base register access!");
119       return Base.Reg;
120     }
121 
setOffsetReg(unsigned Reg)122     void setOffsetReg(unsigned Reg) {
123       OffsetReg = Reg;
124     }
125 
getOffsetReg() const126     unsigned getOffsetReg() const {
127       return OffsetReg;
128     }
129 
setFI(unsigned FI)130     void setFI(unsigned FI) {
131       assert(isFIBase() && "Invalid base frame index  access!");
132       Base.FI = FI;
133     }
134 
getFI() const135     unsigned getFI() const {
136       assert(isFIBase() && "Invalid base frame index access!");
137       return Base.FI;
138     }
139 
setOffset(int64_t O)140     void setOffset(int64_t O) { Offset = O; }
getOffset()141     int64_t getOffset() { return Offset; }
setShift(unsigned S)142     void setShift(unsigned S) { Shift = S; }
getShift()143     unsigned getShift() { return Shift; }
144 
setGlobalValue(const GlobalValue * G)145     void setGlobalValue(const GlobalValue *G) { GV = G; }
getGlobalValue()146     const GlobalValue *getGlobalValue() { return GV; }
147   };
148 
149   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
150   /// make the right decision when generating code for different targets.
151   const AArch64Subtarget *Subtarget;
152   LLVMContext *Context;
153 
154   bool fastLowerArguments() override;
155   bool fastLowerCall(CallLoweringInfo &CLI) override;
156   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
157 
158 private:
159   // Selection routines.
160   bool selectAddSub(const Instruction *I);
161   bool selectLogicalOp(const Instruction *I);
162   bool selectLoad(const Instruction *I);
163   bool selectStore(const Instruction *I);
164   bool selectBranch(const Instruction *I);
165   bool selectIndirectBr(const Instruction *I);
166   bool selectCmp(const Instruction *I);
167   bool selectSelect(const Instruction *I);
168   bool selectFPExt(const Instruction *I);
169   bool selectFPTrunc(const Instruction *I);
170   bool selectFPToInt(const Instruction *I, bool Signed);
171   bool selectIntToFP(const Instruction *I, bool Signed);
172   bool selectRem(const Instruction *I, unsigned ISDOpcode);
173   bool selectRet(const Instruction *I);
174   bool selectTrunc(const Instruction *I);
175   bool selectIntExt(const Instruction *I);
176   bool selectMul(const Instruction *I);
177   bool selectShift(const Instruction *I);
178   bool selectBitCast(const Instruction *I);
179   bool selectFRem(const Instruction *I);
180   bool selectSDiv(const Instruction *I);
181   bool selectGetElementPtr(const Instruction *I);
182   bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
183 
184   // Utility helper routines.
185   bool isTypeLegal(Type *Ty, MVT &VT);
186   bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
187   bool isValueAvailable(const Value *V) const;
188   bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
189   bool computeCallAddress(const Value *V, Address &Addr);
190   bool simplifyAddress(Address &Addr, MVT VT);
191   void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
192                             MachineMemOperand::Flags Flags,
193                             unsigned ScaleFactor, MachineMemOperand *MMO);
194   bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
195   bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
196                           MaybeAlign Alignment);
197   bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
198                          const Value *Cond);
199   bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
200   bool optimizeSelect(const SelectInst *SI);
201   unsigned getRegForGEPIndex(const Value *Idx);
202 
203   // Emit helper routines.
204   unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
205                       const Value *RHS, bool SetFlags = false,
206                       bool WantResult = true,  bool IsZExt = false);
207   unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
208                          unsigned RHSReg, bool SetFlags = false,
209                          bool WantResult = true);
210   unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
211                          uint64_t Imm, bool SetFlags = false,
212                          bool WantResult = true);
213   unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
214                          unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
215                          uint64_t ShiftImm, bool SetFlags = false,
216                          bool WantResult = true);
217   unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
218                          unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
219                          uint64_t ShiftImm, bool SetFlags = false,
220                          bool WantResult = true);
221 
222   // Emit functions.
223   bool emitCompareAndBranch(const BranchInst *BI);
224   bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225   bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226   bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
227   bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228   unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229                     MachineMemOperand *MMO = nullptr);
230   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231                  MachineMemOperand *MMO = nullptr);
232   bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233                         MachineMemOperand *MMO = nullptr);
234   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237                    bool SetFlags = false, bool WantResult = true,
238                    bool IsZExt = false);
239   unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
240   unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241                    bool SetFlags = false, bool WantResult = true,
242                    bool IsZExt = false);
243   unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244                        bool WantResult = true);
245   unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
246                        AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247                        bool WantResult = true);
248   unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249                          const Value *RHS);
250   unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251                             uint64_t Imm);
252   unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253                             unsigned RHSReg, uint64_t ShiftImm);
254   unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
255   unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256   unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257   unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
258   unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
259   unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
260                       bool IsZExt = true);
261   unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
262   unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
263                       bool IsZExt = true);
264   unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
265   unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
266                       bool IsZExt = false);
267 
268   unsigned materializeInt(const ConstantInt *CI, MVT VT);
269   unsigned materializeFP(const ConstantFP *CFP, MVT VT);
270   unsigned materializeGV(const GlobalValue *GV);
271 
272   // Call handling routines.
273 private:
274   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
275   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
276                        unsigned &NumBytes);
277   bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
278 
279 public:
280   // Backend specific FastISel code.
281   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
282   unsigned fastMaterializeConstant(const Constant *C) override;
283   unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
284 
AArch64FastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)285   explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
286                            const TargetLibraryInfo *LibInfo)
287       : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
288     Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
289     Context = &FuncInfo.Fn->getContext();
290   }
291 
292   bool fastSelectInstruction(const Instruction *I) override;
293 
294 #include "AArch64GenFastISel.inc"
295 };
296 
297 } // end anonymous namespace
298 
299 /// Check if the sign-/zero-extend will be a noop.
isIntExtFree(const Instruction * I)300 static bool isIntExtFree(const Instruction *I) {
301   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
302          "Unexpected integer extend instruction.");
303   assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
304          "Unexpected value type.");
305   bool IsZExt = isa<ZExtInst>(I);
306 
307   if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
308     if (LI->hasOneUse())
309       return true;
310 
311   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
312     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
313       return true;
314 
315   return false;
316 }
317 
318 /// Determine the implicit scale factor that is applied by a memory
319 /// operation for a given value type.
getImplicitScaleFactor(MVT VT)320 static unsigned getImplicitScaleFactor(MVT VT) {
321   switch (VT.SimpleTy) {
322   default:
323     return 0;    // invalid
324   case MVT::i1:  // fall-through
325   case MVT::i8:
326     return 1;
327   case MVT::i16:
328     return 2;
329   case MVT::i32: // fall-through
330   case MVT::f32:
331     return 4;
332   case MVT::i64: // fall-through
333   case MVT::f64:
334     return 8;
335   }
336 }
337 
CCAssignFnForCall(CallingConv::ID CC) const338 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
339   if (CC == CallingConv::GHC)
340     return CC_AArch64_GHC;
341   if (CC == CallingConv::CFGuard_Check)
342     return CC_AArch64_Win64_CFGuard_Check;
343   if (Subtarget->isTargetDarwin())
344     return CC_AArch64_DarwinPCS;
345   if (Subtarget->isTargetWindows())
346     return CC_AArch64_Win64PCS;
347   return CC_AArch64_AAPCS;
348 }
349 
fastMaterializeAlloca(const AllocaInst * AI)350 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
351   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
352          "Alloca should always return a pointer.");
353 
354   // Don't handle dynamic allocas.
355   if (!FuncInfo.StaticAllocaMap.count(AI))
356     return 0;
357 
358   DenseMap<const AllocaInst *, int>::iterator SI =
359       FuncInfo.StaticAllocaMap.find(AI);
360 
361   if (SI != FuncInfo.StaticAllocaMap.end()) {
362     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
363     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
364             ResultReg)
365         .addFrameIndex(SI->second)
366         .addImm(0)
367         .addImm(0);
368     return ResultReg;
369   }
370 
371   return 0;
372 }
373 
materializeInt(const ConstantInt * CI,MVT VT)374 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
375   if (VT > MVT::i64)
376     return 0;
377 
378   if (!CI->isZero())
379     return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
380 
381   // Create a copy from the zero register to materialize a "0" value.
382   const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
383                                                    : &AArch64::GPR32RegClass;
384   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
385   Register ResultReg = createResultReg(RC);
386   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
387           ResultReg).addReg(ZeroReg, getKillRegState(true));
388   return ResultReg;
389 }
390 
materializeFP(const ConstantFP * CFP,MVT VT)391 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
392   // Positive zero (+0.0) has to be materialized with a fmov from the zero
393   // register, because the immediate version of fmov cannot encode zero.
394   if (CFP->isNullValue())
395     return fastMaterializeFloatZero(CFP);
396 
397   if (VT != MVT::f32 && VT != MVT::f64)
398     return 0;
399 
400   const APFloat Val = CFP->getValueAPF();
401   bool Is64Bit = (VT == MVT::f64);
402   // This checks to see if we can use FMOV instructions to materialize
403   // a constant, otherwise we have to materialize via the constant pool.
404   int Imm =
405       Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
406   if (Imm != -1) {
407     unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
408     return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
409   }
410 
411   // For the large code model materialize the FP constant in code.
412   if (TM.getCodeModel() == CodeModel::Large) {
413     unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
414     const TargetRegisterClass *RC = Is64Bit ?
415         &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
416 
417     Register TmpReg = createResultReg(RC);
418     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
419         .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
420 
421     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
422     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
423             TII.get(TargetOpcode::COPY), ResultReg)
424         .addReg(TmpReg, getKillRegState(true));
425 
426     return ResultReg;
427   }
428 
429   // Materialize via constant pool.  MachineConstantPool wants an explicit
430   // alignment.
431   Align Alignment = DL.getPrefTypeAlign(CFP->getType());
432 
433   unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
434   Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
435   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
436           ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
437 
438   unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
439   Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
440   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
441       .addReg(ADRPReg)
442       .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
443   return ResultReg;
444 }
445 
materializeGV(const GlobalValue * GV)446 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
447   // We can't handle thread-local variables quickly yet.
448   if (GV->isThreadLocal())
449     return 0;
450 
451   // MachO still uses GOT for large code-model accesses, but ELF requires
452   // movz/movk sequences, which FastISel doesn't handle yet.
453   if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
454     return 0;
455 
456   unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
457 
458   EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
459   if (!DestEVT.isSimple())
460     return 0;
461 
462   Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
463   unsigned ResultReg;
464 
465   if (OpFlags & AArch64II::MO_GOT) {
466     // ADRP + LDRX
467     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
468             ADRPReg)
469         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
470 
471     unsigned LdrOpc;
472     if (Subtarget->isTargetILP32()) {
473       ResultReg = createResultReg(&AArch64::GPR32RegClass);
474       LdrOpc = AArch64::LDRWui;
475     } else {
476       ResultReg = createResultReg(&AArch64::GPR64RegClass);
477       LdrOpc = AArch64::LDRXui;
478     }
479     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
480             ResultReg)
481       .addReg(ADRPReg)
482       .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
483                         AArch64II::MO_NC | OpFlags);
484     if (!Subtarget->isTargetILP32())
485       return ResultReg;
486 
487     // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
488     // so we must extend the result on ILP32.
489     Register Result64 = createResultReg(&AArch64::GPR64RegClass);
490     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
491             TII.get(TargetOpcode::SUBREG_TO_REG))
492         .addDef(Result64)
493         .addImm(0)
494         .addReg(ResultReg, RegState::Kill)
495         .addImm(AArch64::sub_32);
496     return Result64;
497   } else {
498     // ADRP + ADDX
499     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
500             ADRPReg)
501         .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
502 
503     if (OpFlags & AArch64II::MO_TAGGED) {
504       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
505       // We do so by creating a MOVK that sets bits 48-63 of the register to
506       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
507       // the small code model so we can assume a binary size of <= 4GB, which
508       // makes the untagged PC relative offset positive. The binary must also be
509       // loaded into address range [0, 2^48). Both of these properties need to
510       // be ensured at runtime when using tagged addresses.
511       //
512       // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
513       // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
514       // are not exactly 1:1 with FastISel so we cannot easily abstract this
515       // out. At some point, it would be nice to find a way to not have this
516       // duplciate code.
517       unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
518       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
519               DstReg)
520           .addReg(ADRPReg)
521           .addGlobalAddress(GV, /*Offset=*/0x100000000,
522                             AArch64II::MO_PREL | AArch64II::MO_G3)
523           .addImm(48);
524       ADRPReg = DstReg;
525     }
526 
527     ResultReg = createResultReg(&AArch64::GPR64spRegClass);
528     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
529             ResultReg)
530         .addReg(ADRPReg)
531         .addGlobalAddress(GV, 0,
532                           AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
533         .addImm(0);
534   }
535   return ResultReg;
536 }
537 
fastMaterializeConstant(const Constant * C)538 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
539   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
540 
541   // Only handle simple types.
542   if (!CEVT.isSimple())
543     return 0;
544   MVT VT = CEVT.getSimpleVT();
545   // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
546   // 'null' pointers need to have a somewhat special treatment.
547   if (isa<ConstantPointerNull>(C)) {
548     assert(VT == MVT::i64 && "Expected 64-bit pointers");
549     return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
550   }
551 
552   if (const auto *CI = dyn_cast<ConstantInt>(C))
553     return materializeInt(CI, VT);
554   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
555     return materializeFP(CFP, VT);
556   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
557     return materializeGV(GV);
558 
559   return 0;
560 }
561 
fastMaterializeFloatZero(const ConstantFP * CFP)562 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
563   assert(CFP->isNullValue() &&
564          "Floating-point constant is not a positive zero.");
565   MVT VT;
566   if (!isTypeLegal(CFP->getType(), VT))
567     return 0;
568 
569   if (VT != MVT::f32 && VT != MVT::f64)
570     return 0;
571 
572   bool Is64Bit = (VT == MVT::f64);
573   unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
574   unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
575   return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
576 }
577 
578 /// Check if the multiply is by a power-of-2 constant.
isMulPowOf2(const Value * I)579 static bool isMulPowOf2(const Value *I) {
580   if (const auto *MI = dyn_cast<MulOperator>(I)) {
581     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
582       if (C->getValue().isPowerOf2())
583         return true;
584     if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
585       if (C->getValue().isPowerOf2())
586         return true;
587   }
588   return false;
589 }
590 
591 // Computes the address to get to an object.
computeAddress(const Value * Obj,Address & Addr,Type * Ty)592 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
593 {
594   const User *U = nullptr;
595   unsigned Opcode = Instruction::UserOp1;
596   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
597     // Don't walk into other basic blocks unless the object is an alloca from
598     // another block, otherwise it may not have a virtual register assigned.
599     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
600         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
601       Opcode = I->getOpcode();
602       U = I;
603     }
604   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
605     Opcode = C->getOpcode();
606     U = C;
607   }
608 
609   if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
610     if (Ty->getAddressSpace() > 255)
611       // Fast instruction selection doesn't support the special
612       // address spaces.
613       return false;
614 
615   switch (Opcode) {
616   default:
617     break;
618   case Instruction::BitCast:
619     // Look through bitcasts.
620     return computeAddress(U->getOperand(0), Addr, Ty);
621 
622   case Instruction::IntToPtr:
623     // Look past no-op inttoptrs.
624     if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
625         TLI.getPointerTy(DL))
626       return computeAddress(U->getOperand(0), Addr, Ty);
627     break;
628 
629   case Instruction::PtrToInt:
630     // Look past no-op ptrtoints.
631     if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
632       return computeAddress(U->getOperand(0), Addr, Ty);
633     break;
634 
635   case Instruction::GetElementPtr: {
636     Address SavedAddr = Addr;
637     uint64_t TmpOffset = Addr.getOffset();
638 
639     // Iterate through the GEP folding the constants into offsets where
640     // we can.
641     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
642          GTI != E; ++GTI) {
643       const Value *Op = GTI.getOperand();
644       if (StructType *STy = GTI.getStructTypeOrNull()) {
645         const StructLayout *SL = DL.getStructLayout(STy);
646         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
647         TmpOffset += SL->getElementOffset(Idx);
648       } else {
649         uint64_t S = GTI.getSequentialElementStride(DL);
650         while (true) {
651           if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
652             // Constant-offset addressing.
653             TmpOffset += CI->getSExtValue() * S;
654             break;
655           }
656           if (canFoldAddIntoGEP(U, Op)) {
657             // A compatible add with a constant operand. Fold the constant.
658             ConstantInt *CI =
659                 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
660             TmpOffset += CI->getSExtValue() * S;
661             // Iterate on the other operand.
662             Op = cast<AddOperator>(Op)->getOperand(0);
663             continue;
664           }
665           // Unsupported
666           goto unsupported_gep;
667         }
668       }
669     }
670 
671     // Try to grab the base operand now.
672     Addr.setOffset(TmpOffset);
673     if (computeAddress(U->getOperand(0), Addr, Ty))
674       return true;
675 
676     // We failed, restore everything and try the other options.
677     Addr = SavedAddr;
678 
679   unsupported_gep:
680     break;
681   }
682   case Instruction::Alloca: {
683     const AllocaInst *AI = cast<AllocaInst>(Obj);
684     DenseMap<const AllocaInst *, int>::iterator SI =
685         FuncInfo.StaticAllocaMap.find(AI);
686     if (SI != FuncInfo.StaticAllocaMap.end()) {
687       Addr.setKind(Address::FrameIndexBase);
688       Addr.setFI(SI->second);
689       return true;
690     }
691     break;
692   }
693   case Instruction::Add: {
694     // Adds of constants are common and easy enough.
695     const Value *LHS = U->getOperand(0);
696     const Value *RHS = U->getOperand(1);
697 
698     if (isa<ConstantInt>(LHS))
699       std::swap(LHS, RHS);
700 
701     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
702       Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
703       return computeAddress(LHS, Addr, Ty);
704     }
705 
706     Address Backup = Addr;
707     if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
708       return true;
709     Addr = Backup;
710 
711     break;
712   }
713   case Instruction::Sub: {
714     // Subs of constants are common and easy enough.
715     const Value *LHS = U->getOperand(0);
716     const Value *RHS = U->getOperand(1);
717 
718     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
719       Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
720       return computeAddress(LHS, Addr, Ty);
721     }
722     break;
723   }
724   case Instruction::Shl: {
725     if (Addr.getOffsetReg())
726       break;
727 
728     const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
729     if (!CI)
730       break;
731 
732     unsigned Val = CI->getZExtValue();
733     if (Val < 1 || Val > 3)
734       break;
735 
736     uint64_t NumBytes = 0;
737     if (Ty && Ty->isSized()) {
738       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
739       NumBytes = NumBits / 8;
740       if (!isPowerOf2_64(NumBits))
741         NumBytes = 0;
742     }
743 
744     if (NumBytes != (1ULL << Val))
745       break;
746 
747     Addr.setShift(Val);
748     Addr.setExtendType(AArch64_AM::LSL);
749 
750     const Value *Src = U->getOperand(0);
751     if (const auto *I = dyn_cast<Instruction>(Src)) {
752       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
753         // Fold the zext or sext when it won't become a noop.
754         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
755           if (!isIntExtFree(ZE) &&
756               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
757             Addr.setExtendType(AArch64_AM::UXTW);
758             Src = ZE->getOperand(0);
759           }
760         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
761           if (!isIntExtFree(SE) &&
762               SE->getOperand(0)->getType()->isIntegerTy(32)) {
763             Addr.setExtendType(AArch64_AM::SXTW);
764             Src = SE->getOperand(0);
765           }
766         }
767       }
768     }
769 
770     if (const auto *AI = dyn_cast<BinaryOperator>(Src))
771       if (AI->getOpcode() == Instruction::And) {
772         const Value *LHS = AI->getOperand(0);
773         const Value *RHS = AI->getOperand(1);
774 
775         if (const auto *C = dyn_cast<ConstantInt>(LHS))
776           if (C->getValue() == 0xffffffff)
777             std::swap(LHS, RHS);
778 
779         if (const auto *C = dyn_cast<ConstantInt>(RHS))
780           if (C->getValue() == 0xffffffff) {
781             Addr.setExtendType(AArch64_AM::UXTW);
782             Register Reg = getRegForValue(LHS);
783             if (!Reg)
784               return false;
785             Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
786             Addr.setOffsetReg(Reg);
787             return true;
788           }
789       }
790 
791     Register Reg = getRegForValue(Src);
792     if (!Reg)
793       return false;
794     Addr.setOffsetReg(Reg);
795     return true;
796   }
797   case Instruction::Mul: {
798     if (Addr.getOffsetReg())
799       break;
800 
801     if (!isMulPowOf2(U))
802       break;
803 
804     const Value *LHS = U->getOperand(0);
805     const Value *RHS = U->getOperand(1);
806 
807     // Canonicalize power-of-2 value to the RHS.
808     if (const auto *C = dyn_cast<ConstantInt>(LHS))
809       if (C->getValue().isPowerOf2())
810         std::swap(LHS, RHS);
811 
812     assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
813     const auto *C = cast<ConstantInt>(RHS);
814     unsigned Val = C->getValue().logBase2();
815     if (Val < 1 || Val > 3)
816       break;
817 
818     uint64_t NumBytes = 0;
819     if (Ty && Ty->isSized()) {
820       uint64_t NumBits = DL.getTypeSizeInBits(Ty);
821       NumBytes = NumBits / 8;
822       if (!isPowerOf2_64(NumBits))
823         NumBytes = 0;
824     }
825 
826     if (NumBytes != (1ULL << Val))
827       break;
828 
829     Addr.setShift(Val);
830     Addr.setExtendType(AArch64_AM::LSL);
831 
832     const Value *Src = LHS;
833     if (const auto *I = dyn_cast<Instruction>(Src)) {
834       if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
835         // Fold the zext or sext when it won't become a noop.
836         if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
837           if (!isIntExtFree(ZE) &&
838               ZE->getOperand(0)->getType()->isIntegerTy(32)) {
839             Addr.setExtendType(AArch64_AM::UXTW);
840             Src = ZE->getOperand(0);
841           }
842         } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
843           if (!isIntExtFree(SE) &&
844               SE->getOperand(0)->getType()->isIntegerTy(32)) {
845             Addr.setExtendType(AArch64_AM::SXTW);
846             Src = SE->getOperand(0);
847           }
848         }
849       }
850     }
851 
852     Register Reg = getRegForValue(Src);
853     if (!Reg)
854       return false;
855     Addr.setOffsetReg(Reg);
856     return true;
857   }
858   case Instruction::And: {
859     if (Addr.getOffsetReg())
860       break;
861 
862     if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
863       break;
864 
865     const Value *LHS = U->getOperand(0);
866     const Value *RHS = U->getOperand(1);
867 
868     if (const auto *C = dyn_cast<ConstantInt>(LHS))
869       if (C->getValue() == 0xffffffff)
870         std::swap(LHS, RHS);
871 
872     if (const auto *C = dyn_cast<ConstantInt>(RHS))
873       if (C->getValue() == 0xffffffff) {
874         Addr.setShift(0);
875         Addr.setExtendType(AArch64_AM::LSL);
876         Addr.setExtendType(AArch64_AM::UXTW);
877 
878         Register Reg = getRegForValue(LHS);
879         if (!Reg)
880           return false;
881         Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
882         Addr.setOffsetReg(Reg);
883         return true;
884       }
885     break;
886   }
887   case Instruction::SExt:
888   case Instruction::ZExt: {
889     if (!Addr.getReg() || Addr.getOffsetReg())
890       break;
891 
892     const Value *Src = nullptr;
893     // Fold the zext or sext when it won't become a noop.
894     if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
895       if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
896         Addr.setExtendType(AArch64_AM::UXTW);
897         Src = ZE->getOperand(0);
898       }
899     } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
900       if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
901         Addr.setExtendType(AArch64_AM::SXTW);
902         Src = SE->getOperand(0);
903       }
904     }
905 
906     if (!Src)
907       break;
908 
909     Addr.setShift(0);
910     Register Reg = getRegForValue(Src);
911     if (!Reg)
912       return false;
913     Addr.setOffsetReg(Reg);
914     return true;
915   }
916   } // end switch
917 
918   if (Addr.isRegBase() && !Addr.getReg()) {
919     Register Reg = getRegForValue(Obj);
920     if (!Reg)
921       return false;
922     Addr.setReg(Reg);
923     return true;
924   }
925 
926   if (!Addr.getOffsetReg()) {
927     Register Reg = getRegForValue(Obj);
928     if (!Reg)
929       return false;
930     Addr.setOffsetReg(Reg);
931     return true;
932   }
933 
934   return false;
935 }
936 
computeCallAddress(const Value * V,Address & Addr)937 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
938   const User *U = nullptr;
939   unsigned Opcode = Instruction::UserOp1;
940   bool InMBB = true;
941 
942   if (const auto *I = dyn_cast<Instruction>(V)) {
943     Opcode = I->getOpcode();
944     U = I;
945     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
946   } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
947     Opcode = C->getOpcode();
948     U = C;
949   }
950 
951   switch (Opcode) {
952   default: break;
953   case Instruction::BitCast:
954     // Look past bitcasts if its operand is in the same BB.
955     if (InMBB)
956       return computeCallAddress(U->getOperand(0), Addr);
957     break;
958   case Instruction::IntToPtr:
959     // Look past no-op inttoptrs if its operand is in the same BB.
960     if (InMBB &&
961         TLI.getValueType(DL, U->getOperand(0)->getType()) ==
962             TLI.getPointerTy(DL))
963       return computeCallAddress(U->getOperand(0), Addr);
964     break;
965   case Instruction::PtrToInt:
966     // Look past no-op ptrtoints if its operand is in the same BB.
967     if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
968       return computeCallAddress(U->getOperand(0), Addr);
969     break;
970   }
971 
972   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
973     Addr.setGlobalValue(GV);
974     return true;
975   }
976 
977   // If all else fails, try to materialize the value in a register.
978   if (!Addr.getGlobalValue()) {
979     Addr.setReg(getRegForValue(V));
980     return Addr.getReg() != 0;
981   }
982 
983   return false;
984 }
985 
isTypeLegal(Type * Ty,MVT & VT)986 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
987   EVT evt = TLI.getValueType(DL, Ty, true);
988 
989   if (Subtarget->isTargetILP32() && Ty->isPointerTy())
990     return false;
991 
992   // Only handle simple types.
993   if (evt == MVT::Other || !evt.isSimple())
994     return false;
995   VT = evt.getSimpleVT();
996 
997   // This is a legal type, but it's not something we handle in fast-isel.
998   if (VT == MVT::f128)
999     return false;
1000 
1001   // Handle all other legal types, i.e. a register that will directly hold this
1002   // value.
1003   return TLI.isTypeLegal(VT);
1004 }
1005 
1006 /// Determine if the value type is supported by FastISel.
1007 ///
1008 /// FastISel for AArch64 can handle more value types than are legal. This adds
1009 /// simple value type such as i1, i8, and i16.
isTypeSupported(Type * Ty,MVT & VT,bool IsVectorAllowed)1010 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1011   if (Ty->isVectorTy() && !IsVectorAllowed)
1012     return false;
1013 
1014   if (isTypeLegal(Ty, VT))
1015     return true;
1016 
1017   // If this is a type than can be sign or zero-extended to a basic operation
1018   // go ahead and accept it now.
1019   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1020     return true;
1021 
1022   return false;
1023 }
1024 
isValueAvailable(const Value * V) const1025 bool AArch64FastISel::isValueAvailable(const Value *V) const {
1026   if (!isa<Instruction>(V))
1027     return true;
1028 
1029   const auto *I = cast<Instruction>(V);
1030   return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1031 }
1032 
simplifyAddress(Address & Addr,MVT VT)1033 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1034   if (Subtarget->isTargetILP32())
1035     return false;
1036 
1037   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1038   if (!ScaleFactor)
1039     return false;
1040 
1041   bool ImmediateOffsetNeedsLowering = false;
1042   bool RegisterOffsetNeedsLowering = false;
1043   int64_t Offset = Addr.getOffset();
1044   if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1045     ImmediateOffsetNeedsLowering = true;
1046   else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1047            !isUInt<12>(Offset / ScaleFactor))
1048     ImmediateOffsetNeedsLowering = true;
1049 
1050   // Cannot encode an offset register and an immediate offset in the same
1051   // instruction. Fold the immediate offset into the load/store instruction and
1052   // emit an additional add to take care of the offset register.
1053   if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1054     RegisterOffsetNeedsLowering = true;
1055 
1056   // Cannot encode zero register as base.
1057   if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1058     RegisterOffsetNeedsLowering = true;
1059 
1060   // If this is a stack pointer and the offset needs to be simplified then put
1061   // the alloca address into a register, set the base type back to register and
1062   // continue. This should almost never happen.
1063   if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1064   {
1065     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1066     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1067             ResultReg)
1068       .addFrameIndex(Addr.getFI())
1069       .addImm(0)
1070       .addImm(0);
1071     Addr.setKind(Address::RegBase);
1072     Addr.setReg(ResultReg);
1073   }
1074 
1075   if (RegisterOffsetNeedsLowering) {
1076     unsigned ResultReg = 0;
1077     if (Addr.getReg()) {
1078       if (Addr.getExtendType() == AArch64_AM::SXTW ||
1079           Addr.getExtendType() == AArch64_AM::UXTW   )
1080         ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1081                                   Addr.getOffsetReg(), Addr.getExtendType(),
1082                                   Addr.getShift());
1083       else
1084         ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1085                                   Addr.getOffsetReg(), AArch64_AM::LSL,
1086                                   Addr.getShift());
1087     } else {
1088       if (Addr.getExtendType() == AArch64_AM::UXTW)
1089         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1090                                Addr.getShift(), /*IsZExt=*/true);
1091       else if (Addr.getExtendType() == AArch64_AM::SXTW)
1092         ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1093                                Addr.getShift(), /*IsZExt=*/false);
1094       else
1095         ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1096                                Addr.getShift());
1097     }
1098     if (!ResultReg)
1099       return false;
1100 
1101     Addr.setReg(ResultReg);
1102     Addr.setOffsetReg(0);
1103     Addr.setShift(0);
1104     Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1105   }
1106 
1107   // Since the offset is too large for the load/store instruction get the
1108   // reg+offset into a register.
1109   if (ImmediateOffsetNeedsLowering) {
1110     unsigned ResultReg;
1111     if (Addr.getReg())
1112       // Try to fold the immediate into the add instruction.
1113       ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1114     else
1115       ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1116 
1117     if (!ResultReg)
1118       return false;
1119     Addr.setReg(ResultReg);
1120     Addr.setOffset(0);
1121   }
1122   return true;
1123 }
1124 
addLoadStoreOperands(Address & Addr,const MachineInstrBuilder & MIB,MachineMemOperand::Flags Flags,unsigned ScaleFactor,MachineMemOperand * MMO)1125 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1126                                            const MachineInstrBuilder &MIB,
1127                                            MachineMemOperand::Flags Flags,
1128                                            unsigned ScaleFactor,
1129                                            MachineMemOperand *MMO) {
1130   int64_t Offset = Addr.getOffset() / ScaleFactor;
1131   // Frame base works a bit differently. Handle it separately.
1132   if (Addr.isFIBase()) {
1133     int FI = Addr.getFI();
1134     // FIXME: We shouldn't be using getObjectSize/getObjectAlignment.  The size
1135     // and alignment should be based on the VT.
1136     MMO = FuncInfo.MF->getMachineMemOperand(
1137         MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1138         MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1139     // Now add the rest of the operands.
1140     MIB.addFrameIndex(FI).addImm(Offset);
1141   } else {
1142     assert(Addr.isRegBase() && "Unexpected address kind.");
1143     const MCInstrDesc &II = MIB->getDesc();
1144     unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1145     Addr.setReg(
1146       constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1147     Addr.setOffsetReg(
1148       constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1149     if (Addr.getOffsetReg()) {
1150       assert(Addr.getOffset() == 0 && "Unexpected offset");
1151       bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1152                       Addr.getExtendType() == AArch64_AM::SXTX;
1153       MIB.addReg(Addr.getReg());
1154       MIB.addReg(Addr.getOffsetReg());
1155       MIB.addImm(IsSigned);
1156       MIB.addImm(Addr.getShift() != 0);
1157     } else
1158       MIB.addReg(Addr.getReg()).addImm(Offset);
1159   }
1160 
1161   if (MMO)
1162     MIB.addMemOperand(MMO);
1163 }
1164 
emitAddSub(bool UseAdd,MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1165 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1166                                      const Value *RHS, bool SetFlags,
1167                                      bool WantResult,  bool IsZExt) {
1168   AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1169   bool NeedExtend = false;
1170   switch (RetVT.SimpleTy) {
1171   default:
1172     return 0;
1173   case MVT::i1:
1174     NeedExtend = true;
1175     break;
1176   case MVT::i8:
1177     NeedExtend = true;
1178     ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1179     break;
1180   case MVT::i16:
1181     NeedExtend = true;
1182     ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1183     break;
1184   case MVT::i32:  // fall-through
1185   case MVT::i64:
1186     break;
1187   }
1188   MVT SrcVT = RetVT;
1189   RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1190 
1191   // Canonicalize immediates to the RHS first.
1192   if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1193     std::swap(LHS, RHS);
1194 
1195   // Canonicalize mul by power of 2 to the RHS.
1196   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1197     if (isMulPowOf2(LHS))
1198       std::swap(LHS, RHS);
1199 
1200   // Canonicalize shift immediate to the RHS.
1201   if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1202     if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1203       if (isa<ConstantInt>(SI->getOperand(1)))
1204         if (SI->getOpcode() == Instruction::Shl  ||
1205             SI->getOpcode() == Instruction::LShr ||
1206             SI->getOpcode() == Instruction::AShr   )
1207           std::swap(LHS, RHS);
1208 
1209   Register LHSReg = getRegForValue(LHS);
1210   if (!LHSReg)
1211     return 0;
1212 
1213   if (NeedExtend)
1214     LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1215 
1216   unsigned ResultReg = 0;
1217   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1218     uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1219     if (C->isNegative())
1220       ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1221                                 WantResult);
1222     else
1223       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1224                                 WantResult);
1225   } else if (const auto *C = dyn_cast<Constant>(RHS))
1226     if (C->isNullValue())
1227       ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1228 
1229   if (ResultReg)
1230     return ResultReg;
1231 
1232   // Only extend the RHS within the instruction if there is a valid extend type.
1233   if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1234       isValueAvailable(RHS)) {
1235     Register RHSReg = getRegForValue(RHS);
1236     if (!RHSReg)
1237       return 0;
1238     return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1239                          SetFlags, WantResult);
1240   }
1241 
1242   // Check if the mul can be folded into the instruction.
1243   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244     if (isMulPowOf2(RHS)) {
1245       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1246       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1247 
1248       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1249         if (C->getValue().isPowerOf2())
1250           std::swap(MulLHS, MulRHS);
1251 
1252       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1253       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1254       Register RHSReg = getRegForValue(MulLHS);
1255       if (!RHSReg)
1256         return 0;
1257       ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1258                                 ShiftVal, SetFlags, WantResult);
1259       if (ResultReg)
1260         return ResultReg;
1261     }
1262   }
1263 
1264   // Check if the shift can be folded into the instruction.
1265   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1266     if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1267       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1268         AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1269         switch (SI->getOpcode()) {
1270         default: break;
1271         case Instruction::Shl:  ShiftType = AArch64_AM::LSL; break;
1272         case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1273         case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1274         }
1275         uint64_t ShiftVal = C->getZExtValue();
1276         if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1277           Register RHSReg = getRegForValue(SI->getOperand(0));
1278           if (!RHSReg)
1279             return 0;
1280           ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1281                                     ShiftVal, SetFlags, WantResult);
1282           if (ResultReg)
1283             return ResultReg;
1284         }
1285       }
1286     }
1287   }
1288 
1289   Register RHSReg = getRegForValue(RHS);
1290   if (!RHSReg)
1291     return 0;
1292 
1293   if (NeedExtend)
1294     RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1295 
1296   return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1297 }
1298 
emitAddSub_rr(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,bool SetFlags,bool WantResult)1299 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1300                                         unsigned RHSReg, bool SetFlags,
1301                                         bool WantResult) {
1302   assert(LHSReg && RHSReg && "Invalid register number.");
1303 
1304   if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1305       RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1306     return 0;
1307 
1308   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1309     return 0;
1310 
1311   static const unsigned OpcTable[2][2][2] = {
1312     { { AArch64::SUBWrr,  AArch64::SUBXrr  },
1313       { AArch64::ADDWrr,  AArch64::ADDXrr  }  },
1314     { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1315       { AArch64::ADDSWrr, AArch64::ADDSXrr }  }
1316   };
1317   bool Is64Bit = RetVT == MVT::i64;
1318   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1319   const TargetRegisterClass *RC =
1320       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1321   unsigned ResultReg;
1322   if (WantResult)
1323     ResultReg = createResultReg(RC);
1324   else
1325     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1326 
1327   const MCInstrDesc &II = TII.get(Opc);
1328   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1329   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1330   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1331       .addReg(LHSReg)
1332       .addReg(RHSReg);
1333   return ResultReg;
1334 }
1335 
emitAddSub_ri(bool UseAdd,MVT RetVT,unsigned LHSReg,uint64_t Imm,bool SetFlags,bool WantResult)1336 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1337                                         uint64_t Imm, bool SetFlags,
1338                                         bool WantResult) {
1339   assert(LHSReg && "Invalid register number.");
1340 
1341   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1342     return 0;
1343 
1344   unsigned ShiftImm;
1345   if (isUInt<12>(Imm))
1346     ShiftImm = 0;
1347   else if ((Imm & 0xfff000) == Imm) {
1348     ShiftImm = 12;
1349     Imm >>= 12;
1350   } else
1351     return 0;
1352 
1353   static const unsigned OpcTable[2][2][2] = {
1354     { { AArch64::SUBWri,  AArch64::SUBXri  },
1355       { AArch64::ADDWri,  AArch64::ADDXri  }  },
1356     { { AArch64::SUBSWri, AArch64::SUBSXri },
1357       { AArch64::ADDSWri, AArch64::ADDSXri }  }
1358   };
1359   bool Is64Bit = RetVT == MVT::i64;
1360   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1361   const TargetRegisterClass *RC;
1362   if (SetFlags)
1363     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1364   else
1365     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1366   unsigned ResultReg;
1367   if (WantResult)
1368     ResultReg = createResultReg(RC);
1369   else
1370     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1371 
1372   const MCInstrDesc &II = TII.get(Opc);
1373   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1374   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1375       .addReg(LHSReg)
1376       .addImm(Imm)
1377       .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1378   return ResultReg;
1379 }
1380 
emitAddSub_rs(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1381 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1382                                         unsigned RHSReg,
1383                                         AArch64_AM::ShiftExtendType ShiftType,
1384                                         uint64_t ShiftImm, bool SetFlags,
1385                                         bool WantResult) {
1386   assert(LHSReg && RHSReg && "Invalid register number.");
1387   assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1388          RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1389 
1390   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1391     return 0;
1392 
1393   // Don't deal with undefined shifts.
1394   if (ShiftImm >= RetVT.getSizeInBits())
1395     return 0;
1396 
1397   static const unsigned OpcTable[2][2][2] = {
1398     { { AArch64::SUBWrs,  AArch64::SUBXrs  },
1399       { AArch64::ADDWrs,  AArch64::ADDXrs  }  },
1400     { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1401       { AArch64::ADDSWrs, AArch64::ADDSXrs }  }
1402   };
1403   bool Is64Bit = RetVT == MVT::i64;
1404   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1405   const TargetRegisterClass *RC =
1406       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1407   unsigned ResultReg;
1408   if (WantResult)
1409     ResultReg = createResultReg(RC);
1410   else
1411     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1412 
1413   const MCInstrDesc &II = TII.get(Opc);
1414   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1415   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1416   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1417       .addReg(LHSReg)
1418       .addReg(RHSReg)
1419       .addImm(getShifterImm(ShiftType, ShiftImm));
1420   return ResultReg;
1421 }
1422 
emitAddSub_rx(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ExtType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1423 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1424                                         unsigned RHSReg,
1425                                         AArch64_AM::ShiftExtendType ExtType,
1426                                         uint64_t ShiftImm, bool SetFlags,
1427                                         bool WantResult) {
1428   assert(LHSReg && RHSReg && "Invalid register number.");
1429   assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1430          RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1431 
1432   if (RetVT != MVT::i32 && RetVT != MVT::i64)
1433     return 0;
1434 
1435   if (ShiftImm >= 4)
1436     return 0;
1437 
1438   static const unsigned OpcTable[2][2][2] = {
1439     { { AArch64::SUBWrx,  AArch64::SUBXrx  },
1440       { AArch64::ADDWrx,  AArch64::ADDXrx  }  },
1441     { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1442       { AArch64::ADDSWrx, AArch64::ADDSXrx }  }
1443   };
1444   bool Is64Bit = RetVT == MVT::i64;
1445   unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1446   const TargetRegisterClass *RC = nullptr;
1447   if (SetFlags)
1448     RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1449   else
1450     RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1451   unsigned ResultReg;
1452   if (WantResult)
1453     ResultReg = createResultReg(RC);
1454   else
1455     ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1456 
1457   const MCInstrDesc &II = TII.get(Opc);
1458   LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1459   RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1460   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1461       .addReg(LHSReg)
1462       .addReg(RHSReg)
1463       .addImm(getArithExtendImm(ExtType, ShiftImm));
1464   return ResultReg;
1465 }
1466 
emitCmp(const Value * LHS,const Value * RHS,bool IsZExt)1467 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1468   Type *Ty = LHS->getType();
1469   EVT EVT = TLI.getValueType(DL, Ty, true);
1470   if (!EVT.isSimple())
1471     return false;
1472   MVT VT = EVT.getSimpleVT();
1473 
1474   switch (VT.SimpleTy) {
1475   default:
1476     return false;
1477   case MVT::i1:
1478   case MVT::i8:
1479   case MVT::i16:
1480   case MVT::i32:
1481   case MVT::i64:
1482     return emitICmp(VT, LHS, RHS, IsZExt);
1483   case MVT::f32:
1484   case MVT::f64:
1485     return emitFCmp(VT, LHS, RHS);
1486   }
1487 }
1488 
emitICmp(MVT RetVT,const Value * LHS,const Value * RHS,bool IsZExt)1489 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1490                                bool IsZExt) {
1491   return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1492                  IsZExt) != 0;
1493 }
1494 
emitICmp_ri(MVT RetVT,unsigned LHSReg,uint64_t Imm)1495 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1496   return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1497                        /*SetFlags=*/true, /*WantResult=*/false) != 0;
1498 }
1499 
emitFCmp(MVT RetVT,const Value * LHS,const Value * RHS)1500 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1501   if (RetVT != MVT::f32 && RetVT != MVT::f64)
1502     return false;
1503 
1504   // Check to see if the 2nd operand is a constant that we can encode directly
1505   // in the compare.
1506   bool UseImm = false;
1507   if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1508     if (CFP->isZero() && !CFP->isNegative())
1509       UseImm = true;
1510 
1511   Register LHSReg = getRegForValue(LHS);
1512   if (!LHSReg)
1513     return false;
1514 
1515   if (UseImm) {
1516     unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1517     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1518         .addReg(LHSReg);
1519     return true;
1520   }
1521 
1522   Register RHSReg = getRegForValue(RHS);
1523   if (!RHSReg)
1524     return false;
1525 
1526   unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1527   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1528       .addReg(LHSReg)
1529       .addReg(RHSReg);
1530   return true;
1531 }
1532 
emitAdd(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1533 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1534                                   bool SetFlags, bool WantResult, bool IsZExt) {
1535   return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1536                     IsZExt);
1537 }
1538 
1539 /// This method is a wrapper to simplify add emission.
1540 ///
1541 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1542 /// that fails, then try to materialize the immediate into a register and use
1543 /// emitAddSub_rr instead.
emitAdd_ri_(MVT VT,unsigned Op0,int64_t Imm)1544 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1545   unsigned ResultReg;
1546   if (Imm < 0)
1547     ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1548   else
1549     ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1550 
1551   if (ResultReg)
1552     return ResultReg;
1553 
1554   unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1555   if (!CReg)
1556     return 0;
1557 
1558   ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1559   return ResultReg;
1560 }
1561 
emitSub(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1562 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1563                                   bool SetFlags, bool WantResult, bool IsZExt) {
1564   return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1565                     IsZExt);
1566 }
1567 
emitSubs_rr(MVT RetVT,unsigned LHSReg,unsigned RHSReg,bool WantResult)1568 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1569                                       unsigned RHSReg, bool WantResult) {
1570   return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1571                        /*SetFlags=*/true, WantResult);
1572 }
1573 
emitSubs_rs(MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool WantResult)1574 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1575                                       unsigned RHSReg,
1576                                       AArch64_AM::ShiftExtendType ShiftType,
1577                                       uint64_t ShiftImm, bool WantResult) {
1578   return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1579                        ShiftImm, /*SetFlags=*/true, WantResult);
1580 }
1581 
emitLogicalOp(unsigned ISDOpc,MVT RetVT,const Value * LHS,const Value * RHS)1582 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1583                                         const Value *LHS, const Value *RHS) {
1584   // Canonicalize immediates to the RHS first.
1585   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1586     std::swap(LHS, RHS);
1587 
1588   // Canonicalize mul by power-of-2 to the RHS.
1589   if (LHS->hasOneUse() && isValueAvailable(LHS))
1590     if (isMulPowOf2(LHS))
1591       std::swap(LHS, RHS);
1592 
1593   // Canonicalize shift immediate to the RHS.
1594   if (LHS->hasOneUse() && isValueAvailable(LHS))
1595     if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1596       if (isa<ConstantInt>(SI->getOperand(1)))
1597         std::swap(LHS, RHS);
1598 
1599   Register LHSReg = getRegForValue(LHS);
1600   if (!LHSReg)
1601     return 0;
1602 
1603   unsigned ResultReg = 0;
1604   if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1605     uint64_t Imm = C->getZExtValue();
1606     ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1607   }
1608   if (ResultReg)
1609     return ResultReg;
1610 
1611   // Check if the mul can be folded into the instruction.
1612   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1613     if (isMulPowOf2(RHS)) {
1614       const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1615       const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1616 
1617       if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1618         if (C->getValue().isPowerOf2())
1619           std::swap(MulLHS, MulRHS);
1620 
1621       assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1622       uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1623 
1624       Register RHSReg = getRegForValue(MulLHS);
1625       if (!RHSReg)
1626         return 0;
1627       ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1628       if (ResultReg)
1629         return ResultReg;
1630     }
1631   }
1632 
1633   // Check if the shift can be folded into the instruction.
1634   if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1635     if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1636       if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1637         uint64_t ShiftVal = C->getZExtValue();
1638         Register RHSReg = getRegForValue(SI->getOperand(0));
1639         if (!RHSReg)
1640           return 0;
1641         ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1642         if (ResultReg)
1643           return ResultReg;
1644       }
1645   }
1646 
1647   Register RHSReg = getRegForValue(RHS);
1648   if (!RHSReg)
1649     return 0;
1650 
1651   MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1652   ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1653   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1654     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1655     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1656   }
1657   return ResultReg;
1658 }
1659 
emitLogicalOp_ri(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,uint64_t Imm)1660 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1661                                            unsigned LHSReg, uint64_t Imm) {
1662   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1663                 "ISD nodes are not consecutive!");
1664   static const unsigned OpcTable[3][2] = {
1665     { AArch64::ANDWri, AArch64::ANDXri },
1666     { AArch64::ORRWri, AArch64::ORRXri },
1667     { AArch64::EORWri, AArch64::EORXri }
1668   };
1669   const TargetRegisterClass *RC;
1670   unsigned Opc;
1671   unsigned RegSize;
1672   switch (RetVT.SimpleTy) {
1673   default:
1674     return 0;
1675   case MVT::i1:
1676   case MVT::i8:
1677   case MVT::i16:
1678   case MVT::i32: {
1679     unsigned Idx = ISDOpc - ISD::AND;
1680     Opc = OpcTable[Idx][0];
1681     RC = &AArch64::GPR32spRegClass;
1682     RegSize = 32;
1683     break;
1684   }
1685   case MVT::i64:
1686     Opc = OpcTable[ISDOpc - ISD::AND][1];
1687     RC = &AArch64::GPR64spRegClass;
1688     RegSize = 64;
1689     break;
1690   }
1691 
1692   if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1693     return 0;
1694 
1695   Register ResultReg =
1696       fastEmitInst_ri(Opc, RC, LHSReg,
1697                       AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1698   if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1699     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1700     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1701   }
1702   return ResultReg;
1703 }
1704 
emitLogicalOp_rs(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,unsigned RHSReg,uint64_t ShiftImm)1705 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1706                                            unsigned LHSReg, unsigned RHSReg,
1707                                            uint64_t ShiftImm) {
1708   static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709                 "ISD nodes are not consecutive!");
1710   static const unsigned OpcTable[3][2] = {
1711     { AArch64::ANDWrs, AArch64::ANDXrs },
1712     { AArch64::ORRWrs, AArch64::ORRXrs },
1713     { AArch64::EORWrs, AArch64::EORXrs }
1714   };
1715 
1716   // Don't deal with undefined shifts.
1717   if (ShiftImm >= RetVT.getSizeInBits())
1718     return 0;
1719 
1720   const TargetRegisterClass *RC;
1721   unsigned Opc;
1722   switch (RetVT.SimpleTy) {
1723   default:
1724     return 0;
1725   case MVT::i1:
1726   case MVT::i8:
1727   case MVT::i16:
1728   case MVT::i32:
1729     Opc = OpcTable[ISDOpc - ISD::AND][0];
1730     RC = &AArch64::GPR32RegClass;
1731     break;
1732   case MVT::i64:
1733     Opc = OpcTable[ISDOpc - ISD::AND][1];
1734     RC = &AArch64::GPR64RegClass;
1735     break;
1736   }
1737   Register ResultReg =
1738       fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1739                        AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1740   if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741     uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1743   }
1744   return ResultReg;
1745 }
1746 
emitAnd_ri(MVT RetVT,unsigned LHSReg,uint64_t Imm)1747 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1748                                      uint64_t Imm) {
1749   return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1750 }
1751 
emitLoad(MVT VT,MVT RetVT,Address Addr,bool WantZExt,MachineMemOperand * MMO)1752 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753                                    bool WantZExt, MachineMemOperand *MMO) {
1754   if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755     return 0;
1756 
1757   // Simplify this down to something we can handle.
1758   if (!simplifyAddress(Addr, VT))
1759     return 0;
1760 
1761   unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762   if (!ScaleFactor)
1763     llvm_unreachable("Unexpected value type.");
1764 
1765   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767   bool UseScaled = true;
1768   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769     UseScaled = false;
1770     ScaleFactor = 1;
1771   }
1772 
1773   static const unsigned GPOpcTable[2][8][4] = {
1774     // Sign-extend.
1775     { { AArch64::LDURSBWi,  AArch64::LDURSHWi,  AArch64::LDURWi,
1776         AArch64::LDURXi  },
1777       { AArch64::LDURSBXi,  AArch64::LDURSHXi,  AArch64::LDURSWi,
1778         AArch64::LDURXi  },
1779       { AArch64::LDRSBWui,  AArch64::LDRSHWui,  AArch64::LDRWui,
1780         AArch64::LDRXui  },
1781       { AArch64::LDRSBXui,  AArch64::LDRSHXui,  AArch64::LDRSWui,
1782         AArch64::LDRXui  },
1783       { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784         AArch64::LDRXroX },
1785       { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786         AArch64::LDRXroX },
1787       { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788         AArch64::LDRXroW },
1789       { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790         AArch64::LDRXroW }
1791     },
1792     // Zero-extend.
1793     { { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1794         AArch64::LDURXi  },
1795       { AArch64::LDURBBi,   AArch64::LDURHHi,   AArch64::LDURWi,
1796         AArch64::LDURXi  },
1797       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1798         AArch64::LDRXui  },
1799       { AArch64::LDRBBui,   AArch64::LDRHHui,   AArch64::LDRWui,
1800         AArch64::LDRXui  },
1801       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1802         AArch64::LDRXroX },
1803       { AArch64::LDRBBroX,  AArch64::LDRHHroX,  AArch64::LDRWroX,
1804         AArch64::LDRXroX },
1805       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1806         AArch64::LDRXroW },
1807       { AArch64::LDRBBroW,  AArch64::LDRHHroW,  AArch64::LDRWroW,
1808         AArch64::LDRXroW }
1809     }
1810   };
1811 
1812   static const unsigned FPOpcTable[4][2] = {
1813     { AArch64::LDURSi,  AArch64::LDURDi  },
1814     { AArch64::LDRSui,  AArch64::LDRDui  },
1815     { AArch64::LDRSroX, AArch64::LDRDroX },
1816     { AArch64::LDRSroW, AArch64::LDRDroW }
1817   };
1818 
1819   unsigned Opc;
1820   const TargetRegisterClass *RC;
1821   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822                       Addr.getOffsetReg();
1823   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824   if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825       Addr.getExtendType() == AArch64_AM::SXTW)
1826     Idx++;
1827 
1828   bool IsRet64Bit = RetVT == MVT::i64;
1829   switch (VT.SimpleTy) {
1830   default:
1831     llvm_unreachable("Unexpected value type.");
1832   case MVT::i1: // Intentional fall-through.
1833   case MVT::i8:
1834     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835     RC = (IsRet64Bit && !WantZExt) ?
1836              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837     break;
1838   case MVT::i16:
1839     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840     RC = (IsRet64Bit && !WantZExt) ?
1841              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842     break;
1843   case MVT::i32:
1844     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845     RC = (IsRet64Bit && !WantZExt) ?
1846              &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847     break;
1848   case MVT::i64:
1849     Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850     RC = &AArch64::GPR64RegClass;
1851     break;
1852   case MVT::f32:
1853     Opc = FPOpcTable[Idx][0];
1854     RC = &AArch64::FPR32RegClass;
1855     break;
1856   case MVT::f64:
1857     Opc = FPOpcTable[Idx][1];
1858     RC = &AArch64::FPR64RegClass;
1859     break;
1860   }
1861 
1862   // Create the base instruction, then add the operands.
1863   Register ResultReg = createResultReg(RC);
1864   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1865                                     TII.get(Opc), ResultReg);
1866   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867 
1868   // Loading an i1 requires special handling.
1869   if (VT == MVT::i1) {
1870     unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1871     assert(ANDReg && "Unexpected AND instruction emission failure.");
1872     ResultReg = ANDReg;
1873   }
1874 
1875   // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876   // the 32bit reg to a 64bit reg.
1877   if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878     Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1880             TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881         .addImm(0)
1882         .addReg(ResultReg, getKillRegState(true))
1883         .addImm(AArch64::sub_32);
1884     ResultReg = Reg64;
1885   }
1886   return ResultReg;
1887 }
1888 
selectAddSub(const Instruction * I)1889 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890   MVT VT;
1891   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892     return false;
1893 
1894   if (VT.isVector())
1895     return selectOperator(I, I->getOpcode());
1896 
1897   unsigned ResultReg;
1898   switch (I->getOpcode()) {
1899   default:
1900     llvm_unreachable("Unexpected instruction.");
1901   case Instruction::Add:
1902     ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903     break;
1904   case Instruction::Sub:
1905     ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906     break;
1907   }
1908   if (!ResultReg)
1909     return false;
1910 
1911   updateValueMap(I, ResultReg);
1912   return true;
1913 }
1914 
selectLogicalOp(const Instruction * I)1915 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916   MVT VT;
1917   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918     return false;
1919 
1920   if (VT.isVector())
1921     return selectOperator(I, I->getOpcode());
1922 
1923   unsigned ResultReg;
1924   switch (I->getOpcode()) {
1925   default:
1926     llvm_unreachable("Unexpected instruction.");
1927   case Instruction::And:
1928     ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929     break;
1930   case Instruction::Or:
1931     ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932     break;
1933   case Instruction::Xor:
1934     ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935     break;
1936   }
1937   if (!ResultReg)
1938     return false;
1939 
1940   updateValueMap(I, ResultReg);
1941   return true;
1942 }
1943 
selectLoad(const Instruction * I)1944 bool AArch64FastISel::selectLoad(const Instruction *I) {
1945   MVT VT;
1946   // Verify we have a legal type before going any further.  Currently, we handle
1947   // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950       cast<LoadInst>(I)->isAtomic())
1951     return false;
1952 
1953   const Value *SV = I->getOperand(0);
1954   if (TLI.supportSwiftError()) {
1955     // Swifterror values can come from either a function parameter with
1956     // swifterror attribute or an alloca with swifterror attribute.
1957     if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958       if (Arg->hasSwiftErrorAttr())
1959         return false;
1960     }
1961 
1962     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963       if (Alloca->isSwiftError())
1964         return false;
1965     }
1966   }
1967 
1968   // See if we can handle this address.
1969   Address Addr;
1970   if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971     return false;
1972 
1973   // Fold the following sign-/zero-extend into the load instruction.
1974   bool WantZExt = true;
1975   MVT RetVT = VT;
1976   const Value *IntExtVal = nullptr;
1977   if (I->hasOneUse()) {
1978     if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979       if (isTypeSupported(ZE->getType(), RetVT))
1980         IntExtVal = ZE;
1981       else
1982         RetVT = VT;
1983     } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984       if (isTypeSupported(SE->getType(), RetVT))
1985         IntExtVal = SE;
1986       else
1987         RetVT = VT;
1988       WantZExt = false;
1989     }
1990   }
1991 
1992   unsigned ResultReg =
1993       emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994   if (!ResultReg)
1995     return false;
1996 
1997   // There are a few different cases we have to handle, because the load or the
1998   // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999   // SelectionDAG. There is also an ordering issue when both instructions are in
2000   // different basic blocks.
2001   // 1.) The load instruction is selected by FastISel, but the integer extend
2002   //     not. This usually happens when the integer extend is in a different
2003   //     basic block and SelectionDAG took over for that basic block.
2004   // 2.) The load instruction is selected before the integer extend. This only
2005   //     happens when the integer extend is in a different basic block.
2006   // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007   //     by FastISel. This happens if there are instructions between the load
2008   //     and the integer extend that couldn't be selected by FastISel.
2009   if (IntExtVal) {
2010     // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011     // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012     // it when it selects the integer extend.
2013     Register Reg = lookUpRegForValue(IntExtVal);
2014     auto *MI = MRI.getUniqueVRegDef(Reg);
2015     if (!MI) {
2016       if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017         if (WantZExt) {
2018           // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019           MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2020           ResultReg = std::prev(I)->getOperand(0).getReg();
2021           removeDeadCode(I, std::next(I));
2022         } else
2023           ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2024                                                  AArch64::sub_32);
2025       }
2026       updateValueMap(I, ResultReg);
2027       return true;
2028     }
2029 
2030     // The integer extend has already been emitted - delete all the instructions
2031     // that have been emitted by the integer extend lowering code and use the
2032     // result from the load instruction directly.
2033     while (MI) {
2034       Reg = 0;
2035       for (auto &Opnd : MI->uses()) {
2036         if (Opnd.isReg()) {
2037           Reg = Opnd.getReg();
2038           break;
2039         }
2040       }
2041       MachineBasicBlock::iterator I(MI);
2042       removeDeadCode(I, std::next(I));
2043       MI = nullptr;
2044       if (Reg)
2045         MI = MRI.getUniqueVRegDef(Reg);
2046     }
2047     updateValueMap(IntExtVal, ResultReg);
2048     return true;
2049   }
2050 
2051   updateValueMap(I, ResultReg);
2052   return true;
2053 }
2054 
emitStoreRelease(MVT VT,unsigned SrcReg,unsigned AddrReg,MachineMemOperand * MMO)2055 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2056                                        unsigned AddrReg,
2057                                        MachineMemOperand *MMO) {
2058   unsigned Opc;
2059   switch (VT.SimpleTy) {
2060   default: return false;
2061   case MVT::i8:  Opc = AArch64::STLRB; break;
2062   case MVT::i16: Opc = AArch64::STLRH; break;
2063   case MVT::i32: Opc = AArch64::STLRW; break;
2064   case MVT::i64: Opc = AArch64::STLRX; break;
2065   }
2066 
2067   const MCInstrDesc &II = TII.get(Opc);
2068   SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2069   AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2070   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2071       .addReg(SrcReg)
2072       .addReg(AddrReg)
2073       .addMemOperand(MMO);
2074   return true;
2075 }
2076 
emitStore(MVT VT,unsigned SrcReg,Address Addr,MachineMemOperand * MMO)2077 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2078                                 MachineMemOperand *MMO) {
2079   if (!TLI.allowsMisalignedMemoryAccesses(VT))
2080     return false;
2081 
2082   // Simplify this down to something we can handle.
2083   if (!simplifyAddress(Addr, VT))
2084     return false;
2085 
2086   unsigned ScaleFactor = getImplicitScaleFactor(VT);
2087   if (!ScaleFactor)
2088     llvm_unreachable("Unexpected value type.");
2089 
2090   // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2091   // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2092   bool UseScaled = true;
2093   if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2094     UseScaled = false;
2095     ScaleFactor = 1;
2096   }
2097 
2098   static const unsigned OpcTable[4][6] = {
2099     { AArch64::STURBBi,  AArch64::STURHHi,  AArch64::STURWi,  AArch64::STURXi,
2100       AArch64::STURSi,   AArch64::STURDi },
2101     { AArch64::STRBBui,  AArch64::STRHHui,  AArch64::STRWui,  AArch64::STRXui,
2102       AArch64::STRSui,   AArch64::STRDui },
2103     { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2104       AArch64::STRSroX,  AArch64::STRDroX },
2105     { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2106       AArch64::STRSroW,  AArch64::STRDroW }
2107   };
2108 
2109   unsigned Opc;
2110   bool VTIsi1 = false;
2111   bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2112                       Addr.getOffsetReg();
2113   unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2114   if (Addr.getExtendType() == AArch64_AM::UXTW ||
2115       Addr.getExtendType() == AArch64_AM::SXTW)
2116     Idx++;
2117 
2118   switch (VT.SimpleTy) {
2119   default: llvm_unreachable("Unexpected value type.");
2120   case MVT::i1:  VTIsi1 = true; [[fallthrough]];
2121   case MVT::i8:  Opc = OpcTable[Idx][0]; break;
2122   case MVT::i16: Opc = OpcTable[Idx][1]; break;
2123   case MVT::i32: Opc = OpcTable[Idx][2]; break;
2124   case MVT::i64: Opc = OpcTable[Idx][3]; break;
2125   case MVT::f32: Opc = OpcTable[Idx][4]; break;
2126   case MVT::f64: Opc = OpcTable[Idx][5]; break;
2127   }
2128 
2129   // Storing an i1 requires special handling.
2130   if (VTIsi1 && SrcReg != AArch64::WZR) {
2131     unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2132     assert(ANDReg && "Unexpected AND instruction emission failure.");
2133     SrcReg = ANDReg;
2134   }
2135   // Create the base instruction, then add the operands.
2136   const MCInstrDesc &II = TII.get(Opc);
2137   SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2138   MachineInstrBuilder MIB =
2139       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2140   addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2141 
2142   return true;
2143 }
2144 
selectStore(const Instruction * I)2145 bool AArch64FastISel::selectStore(const Instruction *I) {
2146   MVT VT;
2147   const Value *Op0 = I->getOperand(0);
2148   // Verify we have a legal type before going any further.  Currently, we handle
2149   // simple types that will directly fit in a register (i32/f32/i64/f64) or
2150   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2151   if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2152     return false;
2153 
2154   const Value *PtrV = I->getOperand(1);
2155   if (TLI.supportSwiftError()) {
2156     // Swifterror values can come from either a function parameter with
2157     // swifterror attribute or an alloca with swifterror attribute.
2158     if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2159       if (Arg->hasSwiftErrorAttr())
2160         return false;
2161     }
2162 
2163     if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2164       if (Alloca->isSwiftError())
2165         return false;
2166     }
2167   }
2168 
2169   // Get the value to be stored into a register. Use the zero register directly
2170   // when possible to avoid an unnecessary copy and a wasted register.
2171   unsigned SrcReg = 0;
2172   if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2173     if (CI->isZero())
2174       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2175   } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2176     if (CF->isZero() && !CF->isNegative()) {
2177       VT = MVT::getIntegerVT(VT.getSizeInBits());
2178       SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2179     }
2180   }
2181 
2182   if (!SrcReg)
2183     SrcReg = getRegForValue(Op0);
2184 
2185   if (!SrcReg)
2186     return false;
2187 
2188   auto *SI = cast<StoreInst>(I);
2189 
2190   // Try to emit a STLR for seq_cst/release.
2191   if (SI->isAtomic()) {
2192     AtomicOrdering Ord = SI->getOrdering();
2193     // The non-atomic instructions are sufficient for relaxed stores.
2194     if (isReleaseOrStronger(Ord)) {
2195       // The STLR addressing mode only supports a base reg; pass that directly.
2196       Register AddrReg = getRegForValue(PtrV);
2197       return emitStoreRelease(VT, SrcReg, AddrReg,
2198                               createMachineMemOperandFor(I));
2199     }
2200   }
2201 
2202   // See if we can handle this address.
2203   Address Addr;
2204   if (!computeAddress(PtrV, Addr, Op0->getType()))
2205     return false;
2206 
2207   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2208     return false;
2209   return true;
2210 }
2211 
getCompareCC(CmpInst::Predicate Pred)2212 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2213   switch (Pred) {
2214   case CmpInst::FCMP_ONE:
2215   case CmpInst::FCMP_UEQ:
2216   default:
2217     // AL is our "false" for now. The other two need more compares.
2218     return AArch64CC::AL;
2219   case CmpInst::ICMP_EQ:
2220   case CmpInst::FCMP_OEQ:
2221     return AArch64CC::EQ;
2222   case CmpInst::ICMP_SGT:
2223   case CmpInst::FCMP_OGT:
2224     return AArch64CC::GT;
2225   case CmpInst::ICMP_SGE:
2226   case CmpInst::FCMP_OGE:
2227     return AArch64CC::GE;
2228   case CmpInst::ICMP_UGT:
2229   case CmpInst::FCMP_UGT:
2230     return AArch64CC::HI;
2231   case CmpInst::FCMP_OLT:
2232     return AArch64CC::MI;
2233   case CmpInst::ICMP_ULE:
2234   case CmpInst::FCMP_OLE:
2235     return AArch64CC::LS;
2236   case CmpInst::FCMP_ORD:
2237     return AArch64CC::VC;
2238   case CmpInst::FCMP_UNO:
2239     return AArch64CC::VS;
2240   case CmpInst::FCMP_UGE:
2241     return AArch64CC::PL;
2242   case CmpInst::ICMP_SLT:
2243   case CmpInst::FCMP_ULT:
2244     return AArch64CC::LT;
2245   case CmpInst::ICMP_SLE:
2246   case CmpInst::FCMP_ULE:
2247     return AArch64CC::LE;
2248   case CmpInst::FCMP_UNE:
2249   case CmpInst::ICMP_NE:
2250     return AArch64CC::NE;
2251   case CmpInst::ICMP_UGE:
2252     return AArch64CC::HS;
2253   case CmpInst::ICMP_ULT:
2254     return AArch64CC::LO;
2255   }
2256 }
2257 
2258 /// Try to emit a combined compare-and-branch instruction.
emitCompareAndBranch(const BranchInst * BI)2259 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2260   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2261   // will not be produced, as they are conditional branch instructions that do
2262   // not set flags.
2263   if (FuncInfo.MF->getFunction().hasFnAttribute(
2264           Attribute::SpeculativeLoadHardening))
2265     return false;
2266 
2267   assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2268   const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2269   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2270 
2271   const Value *LHS = CI->getOperand(0);
2272   const Value *RHS = CI->getOperand(1);
2273 
2274   MVT VT;
2275   if (!isTypeSupported(LHS->getType(), VT))
2276     return false;
2277 
2278   unsigned BW = VT.getSizeInBits();
2279   if (BW > 64)
2280     return false;
2281 
2282   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2283   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2284 
2285   // Try to take advantage of fallthrough opportunities.
2286   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2287     std::swap(TBB, FBB);
2288     Predicate = CmpInst::getInversePredicate(Predicate);
2289   }
2290 
2291   int TestBit = -1;
2292   bool IsCmpNE;
2293   switch (Predicate) {
2294   default:
2295     return false;
2296   case CmpInst::ICMP_EQ:
2297   case CmpInst::ICMP_NE:
2298     if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2299       std::swap(LHS, RHS);
2300 
2301     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2302       return false;
2303 
2304     if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2305       if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2306         const Value *AndLHS = AI->getOperand(0);
2307         const Value *AndRHS = AI->getOperand(1);
2308 
2309         if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2310           if (C->getValue().isPowerOf2())
2311             std::swap(AndLHS, AndRHS);
2312 
2313         if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2314           if (C->getValue().isPowerOf2()) {
2315             TestBit = C->getValue().logBase2();
2316             LHS = AndLHS;
2317           }
2318       }
2319 
2320     if (VT == MVT::i1)
2321       TestBit = 0;
2322 
2323     IsCmpNE = Predicate == CmpInst::ICMP_NE;
2324     break;
2325   case CmpInst::ICMP_SLT:
2326   case CmpInst::ICMP_SGE:
2327     if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2328       return false;
2329 
2330     TestBit = BW - 1;
2331     IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2332     break;
2333   case CmpInst::ICMP_SGT:
2334   case CmpInst::ICMP_SLE:
2335     if (!isa<ConstantInt>(RHS))
2336       return false;
2337 
2338     if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2339       return false;
2340 
2341     TestBit = BW - 1;
2342     IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2343     break;
2344   } // end switch
2345 
2346   static const unsigned OpcTable[2][2][2] = {
2347     { {AArch64::CBZW,  AArch64::CBZX },
2348       {AArch64::CBNZW, AArch64::CBNZX} },
2349     { {AArch64::TBZW,  AArch64::TBZX },
2350       {AArch64::TBNZW, AArch64::TBNZX} }
2351   };
2352 
2353   bool IsBitTest = TestBit != -1;
2354   bool Is64Bit = BW == 64;
2355   if (TestBit < 32 && TestBit >= 0)
2356     Is64Bit = false;
2357 
2358   unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2359   const MCInstrDesc &II = TII.get(Opc);
2360 
2361   Register SrcReg = getRegForValue(LHS);
2362   if (!SrcReg)
2363     return false;
2364 
2365   if (BW == 64 && !Is64Bit)
2366     SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2367 
2368   if ((BW < 32) && !IsBitTest)
2369     SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2370 
2371   // Emit the combined compare and branch instruction.
2372   SrcReg = constrainOperandRegClass(II, SrcReg,  II.getNumDefs());
2373   MachineInstrBuilder MIB =
2374       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2375           .addReg(SrcReg);
2376   if (IsBitTest)
2377     MIB.addImm(TestBit);
2378   MIB.addMBB(TBB);
2379 
2380   finishCondBranch(BI->getParent(), TBB, FBB);
2381   return true;
2382 }
2383 
selectBranch(const Instruction * I)2384 bool AArch64FastISel::selectBranch(const Instruction *I) {
2385   const BranchInst *BI = cast<BranchInst>(I);
2386   if (BI->isUnconditional()) {
2387     MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2388     fastEmitBranch(MSucc, BI->getDebugLoc());
2389     return true;
2390   }
2391 
2392   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2393   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2394 
2395   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2396     if (CI->hasOneUse() && isValueAvailable(CI)) {
2397       // Try to optimize or fold the cmp.
2398       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2399       switch (Predicate) {
2400       default:
2401         break;
2402       case CmpInst::FCMP_FALSE:
2403         fastEmitBranch(FBB, MIMD.getDL());
2404         return true;
2405       case CmpInst::FCMP_TRUE:
2406         fastEmitBranch(TBB, MIMD.getDL());
2407         return true;
2408       }
2409 
2410       // Try to emit a combined compare-and-branch first.
2411       if (emitCompareAndBranch(BI))
2412         return true;
2413 
2414       // Try to take advantage of fallthrough opportunities.
2415       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2416         std::swap(TBB, FBB);
2417         Predicate = CmpInst::getInversePredicate(Predicate);
2418       }
2419 
2420       // Emit the cmp.
2421       if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2422         return false;
2423 
2424       // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2425       // instruction.
2426       AArch64CC::CondCode CC = getCompareCC(Predicate);
2427       AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2428       switch (Predicate) {
2429       default:
2430         break;
2431       case CmpInst::FCMP_UEQ:
2432         ExtraCC = AArch64CC::EQ;
2433         CC = AArch64CC::VS;
2434         break;
2435       case CmpInst::FCMP_ONE:
2436         ExtraCC = AArch64CC::MI;
2437         CC = AArch64CC::GT;
2438         break;
2439       }
2440       assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2441 
2442       // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2443       if (ExtraCC != AArch64CC::AL) {
2444         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2445             .addImm(ExtraCC)
2446             .addMBB(TBB);
2447       }
2448 
2449       // Emit the branch.
2450       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2451           .addImm(CC)
2452           .addMBB(TBB);
2453 
2454       finishCondBranch(BI->getParent(), TBB, FBB);
2455       return true;
2456     }
2457   } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2458     uint64_t Imm = CI->getZExtValue();
2459     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2460     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2461         .addMBB(Target);
2462 
2463     // Obtain the branch probability and add the target to the successor list.
2464     if (FuncInfo.BPI) {
2465       auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2466           BI->getParent(), Target->getBasicBlock());
2467       FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2468     } else
2469       FuncInfo.MBB->addSuccessorWithoutProb(Target);
2470     return true;
2471   } else {
2472     AArch64CC::CondCode CC = AArch64CC::NE;
2473     if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2474       // Fake request the condition, otherwise the intrinsic might be completely
2475       // optimized away.
2476       Register CondReg = getRegForValue(BI->getCondition());
2477       if (!CondReg)
2478         return false;
2479 
2480       // Emit the branch.
2481       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2482         .addImm(CC)
2483         .addMBB(TBB);
2484 
2485       finishCondBranch(BI->getParent(), TBB, FBB);
2486       return true;
2487     }
2488   }
2489 
2490   Register CondReg = getRegForValue(BI->getCondition());
2491   if (CondReg == 0)
2492     return false;
2493 
2494   // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495   unsigned Opcode = AArch64::TBNZW;
2496   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497     std::swap(TBB, FBB);
2498     Opcode = AArch64::TBZW;
2499   }
2500 
2501   const MCInstrDesc &II = TII.get(Opcode);
2502   Register ConstrainedCondReg
2503     = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2505       .addReg(ConstrainedCondReg)
2506       .addImm(0)
2507       .addMBB(TBB);
2508 
2509   finishCondBranch(BI->getParent(), TBB, FBB);
2510   return true;
2511 }
2512 
selectIndirectBr(const Instruction * I)2513 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514   const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515   Register AddrReg = getRegForValue(BI->getOperand(0));
2516   if (AddrReg == 0)
2517     return false;
2518 
2519   // Authenticated indirectbr is not implemented yet.
2520   if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2521     return false;
2522 
2523   // Emit the indirect branch.
2524   const MCInstrDesc &II = TII.get(AArch64::BR);
2525   AddrReg = constrainOperandRegClass(II, AddrReg,  II.getNumDefs());
2526   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2527 
2528   // Make sure the CFG is up-to-date.
2529   for (const auto *Succ : BI->successors())
2530     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2531 
2532   return true;
2533 }
2534 
selectCmp(const Instruction * I)2535 bool AArch64FastISel::selectCmp(const Instruction *I) {
2536   const CmpInst *CI = cast<CmpInst>(I);
2537 
2538   // Vectors of i1 are weird: bail out.
2539   if (CI->getType()->isVectorTy())
2540     return false;
2541 
2542   // Try to optimize or fold the cmp.
2543   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2544   unsigned ResultReg = 0;
2545   switch (Predicate) {
2546   default:
2547     break;
2548   case CmpInst::FCMP_FALSE:
2549     ResultReg = createResultReg(&AArch64::GPR32RegClass);
2550     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2551             TII.get(TargetOpcode::COPY), ResultReg)
2552         .addReg(AArch64::WZR, getKillRegState(true));
2553     break;
2554   case CmpInst::FCMP_TRUE:
2555     ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2556     break;
2557   }
2558 
2559   if (ResultReg) {
2560     updateValueMap(I, ResultReg);
2561     return true;
2562   }
2563 
2564   // Emit the cmp.
2565   if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2566     return false;
2567 
2568   ResultReg = createResultReg(&AArch64::GPR32RegClass);
2569 
2570   // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2571   // condition codes are inverted, because they are used by CSINC.
2572   static unsigned CondCodeTable[2][2] = {
2573     { AArch64CC::NE, AArch64CC::VC },
2574     { AArch64CC::PL, AArch64CC::LE }
2575   };
2576   unsigned *CondCodes = nullptr;
2577   switch (Predicate) {
2578   default:
2579     break;
2580   case CmpInst::FCMP_UEQ:
2581     CondCodes = &CondCodeTable[0][0];
2582     break;
2583   case CmpInst::FCMP_ONE:
2584     CondCodes = &CondCodeTable[1][0];
2585     break;
2586   }
2587 
2588   if (CondCodes) {
2589     Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2590     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591             TmpReg1)
2592         .addReg(AArch64::WZR, getKillRegState(true))
2593         .addReg(AArch64::WZR, getKillRegState(true))
2594         .addImm(CondCodes[0]);
2595     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2596             ResultReg)
2597         .addReg(TmpReg1, getKillRegState(true))
2598         .addReg(AArch64::WZR, getKillRegState(true))
2599         .addImm(CondCodes[1]);
2600 
2601     updateValueMap(I, ResultReg);
2602     return true;
2603   }
2604 
2605   // Now set a register based on the comparison.
2606   AArch64CC::CondCode CC = getCompareCC(Predicate);
2607   assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2608   AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2609   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2610           ResultReg)
2611       .addReg(AArch64::WZR, getKillRegState(true))
2612       .addReg(AArch64::WZR, getKillRegState(true))
2613       .addImm(invertedCC);
2614 
2615   updateValueMap(I, ResultReg);
2616   return true;
2617 }
2618 
2619 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2620 /// value.
optimizeSelect(const SelectInst * SI)2621 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2622   if (!SI->getType()->isIntegerTy(1))
2623     return false;
2624 
2625   const Value *Src1Val, *Src2Val;
2626   unsigned Opc = 0;
2627   bool NeedExtraOp = false;
2628   if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2629     if (CI->isOne()) {
2630       Src1Val = SI->getCondition();
2631       Src2Val = SI->getFalseValue();
2632       Opc = AArch64::ORRWrr;
2633     } else {
2634       assert(CI->isZero());
2635       Src1Val = SI->getFalseValue();
2636       Src2Val = SI->getCondition();
2637       Opc = AArch64::BICWrr;
2638     }
2639   } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2640     if (CI->isOne()) {
2641       Src1Val = SI->getCondition();
2642       Src2Val = SI->getTrueValue();
2643       Opc = AArch64::ORRWrr;
2644       NeedExtraOp = true;
2645     } else {
2646       assert(CI->isZero());
2647       Src1Val = SI->getCondition();
2648       Src2Val = SI->getTrueValue();
2649       Opc = AArch64::ANDWrr;
2650     }
2651   }
2652 
2653   if (!Opc)
2654     return false;
2655 
2656   Register Src1Reg = getRegForValue(Src1Val);
2657   if (!Src1Reg)
2658     return false;
2659 
2660   Register Src2Reg = getRegForValue(Src2Val);
2661   if (!Src2Reg)
2662     return false;
2663 
2664   if (NeedExtraOp)
2665     Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2666 
2667   Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2668                                        Src2Reg);
2669   updateValueMap(SI, ResultReg);
2670   return true;
2671 }
2672 
selectSelect(const Instruction * I)2673 bool AArch64FastISel::selectSelect(const Instruction *I) {
2674   assert(isa<SelectInst>(I) && "Expected a select instruction.");
2675   MVT VT;
2676   if (!isTypeSupported(I->getType(), VT))
2677     return false;
2678 
2679   unsigned Opc;
2680   const TargetRegisterClass *RC;
2681   switch (VT.SimpleTy) {
2682   default:
2683     return false;
2684   case MVT::i1:
2685   case MVT::i8:
2686   case MVT::i16:
2687   case MVT::i32:
2688     Opc = AArch64::CSELWr;
2689     RC = &AArch64::GPR32RegClass;
2690     break;
2691   case MVT::i64:
2692     Opc = AArch64::CSELXr;
2693     RC = &AArch64::GPR64RegClass;
2694     break;
2695   case MVT::f32:
2696     Opc = AArch64::FCSELSrrr;
2697     RC = &AArch64::FPR32RegClass;
2698     break;
2699   case MVT::f64:
2700     Opc = AArch64::FCSELDrrr;
2701     RC = &AArch64::FPR64RegClass;
2702     break;
2703   }
2704 
2705   const SelectInst *SI = cast<SelectInst>(I);
2706   const Value *Cond = SI->getCondition();
2707   AArch64CC::CondCode CC = AArch64CC::NE;
2708   AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2709 
2710   if (optimizeSelect(SI))
2711     return true;
2712 
2713   // Try to pickup the flags, so we don't have to emit another compare.
2714   if (foldXALUIntrinsic(CC, I, Cond)) {
2715     // Fake request the condition to force emission of the XALU intrinsic.
2716     Register CondReg = getRegForValue(Cond);
2717     if (!CondReg)
2718       return false;
2719   } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2720              isValueAvailable(Cond)) {
2721     const auto *Cmp = cast<CmpInst>(Cond);
2722     // Try to optimize or fold the cmp.
2723     CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2724     const Value *FoldSelect = nullptr;
2725     switch (Predicate) {
2726     default:
2727       break;
2728     case CmpInst::FCMP_FALSE:
2729       FoldSelect = SI->getFalseValue();
2730       break;
2731     case CmpInst::FCMP_TRUE:
2732       FoldSelect = SI->getTrueValue();
2733       break;
2734     }
2735 
2736     if (FoldSelect) {
2737       Register SrcReg = getRegForValue(FoldSelect);
2738       if (!SrcReg)
2739         return false;
2740 
2741       updateValueMap(I, SrcReg);
2742       return true;
2743     }
2744 
2745     // Emit the cmp.
2746     if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2747       return false;
2748 
2749     // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2750     CC = getCompareCC(Predicate);
2751     switch (Predicate) {
2752     default:
2753       break;
2754     case CmpInst::FCMP_UEQ:
2755       ExtraCC = AArch64CC::EQ;
2756       CC = AArch64CC::VS;
2757       break;
2758     case CmpInst::FCMP_ONE:
2759       ExtraCC = AArch64CC::MI;
2760       CC = AArch64CC::GT;
2761       break;
2762     }
2763     assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2764   } else {
2765     Register CondReg = getRegForValue(Cond);
2766     if (!CondReg)
2767       return false;
2768 
2769     const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2770     CondReg = constrainOperandRegClass(II, CondReg, 1);
2771 
2772     // Emit a TST instruction (ANDS wzr, reg, #imm).
2773     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2774             AArch64::WZR)
2775         .addReg(CondReg)
2776         .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2777   }
2778 
2779   Register Src1Reg = getRegForValue(SI->getTrueValue());
2780   Register Src2Reg = getRegForValue(SI->getFalseValue());
2781 
2782   if (!Src1Reg || !Src2Reg)
2783     return false;
2784 
2785   if (ExtraCC != AArch64CC::AL)
2786     Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2787 
2788   Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2789   updateValueMap(I, ResultReg);
2790   return true;
2791 }
2792 
selectFPExt(const Instruction * I)2793 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2794   Value *V = I->getOperand(0);
2795   if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2796     return false;
2797 
2798   Register Op = getRegForValue(V);
2799   if (Op == 0)
2800     return false;
2801 
2802   Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2803   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2804           ResultReg).addReg(Op);
2805   updateValueMap(I, ResultReg);
2806   return true;
2807 }
2808 
selectFPTrunc(const Instruction * I)2809 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2810   Value *V = I->getOperand(0);
2811   if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2812     return false;
2813 
2814   Register Op = getRegForValue(V);
2815   if (Op == 0)
2816     return false;
2817 
2818   Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2819   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2820           ResultReg).addReg(Op);
2821   updateValueMap(I, ResultReg);
2822   return true;
2823 }
2824 
2825 // FPToUI and FPToSI
selectFPToInt(const Instruction * I,bool Signed)2826 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2827   MVT DestVT;
2828   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2829     return false;
2830 
2831   Register SrcReg = getRegForValue(I->getOperand(0));
2832   if (SrcReg == 0)
2833     return false;
2834 
2835   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2836   if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2837     return false;
2838 
2839   unsigned Opc;
2840   if (SrcVT == MVT::f64) {
2841     if (Signed)
2842       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2843     else
2844       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2845   } else {
2846     if (Signed)
2847       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2848     else
2849       Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2850   }
2851   Register ResultReg = createResultReg(
2852       DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2853   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2854       .addReg(SrcReg);
2855   updateValueMap(I, ResultReg);
2856   return true;
2857 }
2858 
selectIntToFP(const Instruction * I,bool Signed)2859 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2860   MVT DestVT;
2861   if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2862     return false;
2863   // Let regular ISEL handle FP16
2864   if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2865     return false;
2866 
2867   assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2868          "Unexpected value type.");
2869 
2870   Register SrcReg = getRegForValue(I->getOperand(0));
2871   if (!SrcReg)
2872     return false;
2873 
2874   EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2875 
2876   // Handle sign-extension.
2877   if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2878     SrcReg =
2879         emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2880     if (!SrcReg)
2881       return false;
2882   }
2883 
2884   unsigned Opc;
2885   if (SrcVT == MVT::i64) {
2886     if (Signed)
2887       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2888     else
2889       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2890   } else {
2891     if (Signed)
2892       Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2893     else
2894       Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2895   }
2896 
2897   Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2898   updateValueMap(I, ResultReg);
2899   return true;
2900 }
2901 
fastLowerArguments()2902 bool AArch64FastISel::fastLowerArguments() {
2903   if (!FuncInfo.CanLowerReturn)
2904     return false;
2905 
2906   const Function *F = FuncInfo.Fn;
2907   if (F->isVarArg())
2908     return false;
2909 
2910   CallingConv::ID CC = F->getCallingConv();
2911   if (CC != CallingConv::C && CC != CallingConv::Swift)
2912     return false;
2913 
2914   if (Subtarget->hasCustomCallingConv())
2915     return false;
2916 
2917   // Only handle simple cases of up to 8 GPR and FPR each.
2918   unsigned GPRCnt = 0;
2919   unsigned FPRCnt = 0;
2920   for (auto const &Arg : F->args()) {
2921     if (Arg.hasAttribute(Attribute::ByVal) ||
2922         Arg.hasAttribute(Attribute::InReg) ||
2923         Arg.hasAttribute(Attribute::StructRet) ||
2924         Arg.hasAttribute(Attribute::SwiftSelf) ||
2925         Arg.hasAttribute(Attribute::SwiftAsync) ||
2926         Arg.hasAttribute(Attribute::SwiftError) ||
2927         Arg.hasAttribute(Attribute::Nest))
2928       return false;
2929 
2930     Type *ArgTy = Arg.getType();
2931     if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2932       return false;
2933 
2934     EVT ArgVT = TLI.getValueType(DL, ArgTy);
2935     if (!ArgVT.isSimple())
2936       return false;
2937 
2938     MVT VT = ArgVT.getSimpleVT().SimpleTy;
2939     if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2940       return false;
2941 
2942     if (VT.isVector() &&
2943         (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2944       return false;
2945 
2946     if (VT >= MVT::i1 && VT <= MVT::i64)
2947       ++GPRCnt;
2948     else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2949              VT.is128BitVector())
2950       ++FPRCnt;
2951     else
2952       return false;
2953 
2954     if (GPRCnt > 8 || FPRCnt > 8)
2955       return false;
2956   }
2957 
2958   static const MCPhysReg Registers[6][8] = {
2959     { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2960       AArch64::W5, AArch64::W6, AArch64::W7 },
2961     { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2962       AArch64::X5, AArch64::X6, AArch64::X7 },
2963     { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2964       AArch64::H5, AArch64::H6, AArch64::H7 },
2965     { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2966       AArch64::S5, AArch64::S6, AArch64::S7 },
2967     { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2968       AArch64::D5, AArch64::D6, AArch64::D7 },
2969     { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2970       AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2971   };
2972 
2973   unsigned GPRIdx = 0;
2974   unsigned FPRIdx = 0;
2975   for (auto const &Arg : F->args()) {
2976     MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2977     unsigned SrcReg;
2978     const TargetRegisterClass *RC;
2979     if (VT >= MVT::i1 && VT <= MVT::i32) {
2980       SrcReg = Registers[0][GPRIdx++];
2981       RC = &AArch64::GPR32RegClass;
2982       VT = MVT::i32;
2983     } else if (VT == MVT::i64) {
2984       SrcReg = Registers[1][GPRIdx++];
2985       RC = &AArch64::GPR64RegClass;
2986     } else if (VT == MVT::f16 || VT == MVT::bf16) {
2987       SrcReg = Registers[2][FPRIdx++];
2988       RC = &AArch64::FPR16RegClass;
2989     } else if (VT ==  MVT::f32) {
2990       SrcReg = Registers[3][FPRIdx++];
2991       RC = &AArch64::FPR32RegClass;
2992     } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2993       SrcReg = Registers[4][FPRIdx++];
2994       RC = &AArch64::FPR64RegClass;
2995     } else if (VT.is128BitVector()) {
2996       SrcReg = Registers[5][FPRIdx++];
2997       RC = &AArch64::FPR128RegClass;
2998     } else
2999       llvm_unreachable("Unexpected value type.");
3000 
3001     Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3002     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3003     // Without this, EmitLiveInCopies may eliminate the livein if its only
3004     // use is a bitcast (which isn't turned into an instruction).
3005     Register ResultReg = createResultReg(RC);
3006     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3007             TII.get(TargetOpcode::COPY), ResultReg)
3008         .addReg(DstReg, getKillRegState(true));
3009     updateValueMap(&Arg, ResultReg);
3010   }
3011   return true;
3012 }
3013 
processCallArgs(CallLoweringInfo & CLI,SmallVectorImpl<MVT> & OutVTs,unsigned & NumBytes)3014 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3015                                       SmallVectorImpl<MVT> &OutVTs,
3016                                       unsigned &NumBytes) {
3017   CallingConv::ID CC = CLI.CallConv;
3018   SmallVector<CCValAssign, 16> ArgLocs;
3019   CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3020   CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3021 
3022   // Get a count of how many bytes are to be pushed on the stack.
3023   NumBytes = CCInfo.getStackSize();
3024 
3025   // Issue CALLSEQ_START
3026   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3027   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3028     .addImm(NumBytes).addImm(0);
3029 
3030   // Process the args.
3031   for (CCValAssign &VA : ArgLocs) {
3032     const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3033     MVT ArgVT = OutVTs[VA.getValNo()];
3034 
3035     Register ArgReg = getRegForValue(ArgVal);
3036     if (!ArgReg)
3037       return false;
3038 
3039     // Handle arg promotion: SExt, ZExt, AExt.
3040     switch (VA.getLocInfo()) {
3041     case CCValAssign::Full:
3042       break;
3043     case CCValAssign::SExt: {
3044       MVT DestVT = VA.getLocVT();
3045       MVT SrcVT = ArgVT;
3046       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3047       if (!ArgReg)
3048         return false;
3049       break;
3050     }
3051     case CCValAssign::AExt:
3052     // Intentional fall-through.
3053     case CCValAssign::ZExt: {
3054       MVT DestVT = VA.getLocVT();
3055       MVT SrcVT = ArgVT;
3056       ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3057       if (!ArgReg)
3058         return false;
3059       break;
3060     }
3061     default:
3062       llvm_unreachable("Unknown arg promotion!");
3063     }
3064 
3065     // Now copy/store arg to correct locations.
3066     if (VA.isRegLoc() && !VA.needsCustom()) {
3067       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3068               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3069       CLI.OutRegs.push_back(VA.getLocReg());
3070     } else if (VA.needsCustom()) {
3071       // FIXME: Handle custom args.
3072       return false;
3073     } else {
3074       assert(VA.isMemLoc() && "Assuming store on stack.");
3075 
3076       // Don't emit stores for undef values.
3077       if (isa<UndefValue>(ArgVal))
3078         continue;
3079 
3080       // Need to store on the stack.
3081       unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3082 
3083       unsigned BEAlign = 0;
3084       if (ArgSize < 8 && !Subtarget->isLittleEndian())
3085         BEAlign = 8 - ArgSize;
3086 
3087       Address Addr;
3088       Addr.setKind(Address::RegBase);
3089       Addr.setReg(AArch64::SP);
3090       Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3091 
3092       Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3093       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3094           MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3095           MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3096 
3097       if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3098         return false;
3099     }
3100   }
3101   return true;
3102 }
3103 
finishCall(CallLoweringInfo & CLI,unsigned NumBytes)3104 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3105   CallingConv::ID CC = CLI.CallConv;
3106 
3107   // Issue CALLSEQ_END
3108   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3109   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3110     .addImm(NumBytes).addImm(0);
3111 
3112   // Now the return values.
3113   SmallVector<CCValAssign, 16> RVLocs;
3114   CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3115   CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3116 
3117   Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3118   for (unsigned i = 0; i != RVLocs.size(); ++i) {
3119     CCValAssign &VA = RVLocs[i];
3120     MVT CopyVT = VA.getValVT();
3121     unsigned CopyReg = ResultReg + i;
3122 
3123     // TODO: Handle big-endian results
3124     if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3125       return false;
3126 
3127     // Copy result out of their specified physreg.
3128     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3129             CopyReg)
3130         .addReg(VA.getLocReg());
3131     CLI.InRegs.push_back(VA.getLocReg());
3132   }
3133 
3134   CLI.ResultReg = ResultReg;
3135   CLI.NumResultRegs = RVLocs.size();
3136 
3137   return true;
3138 }
3139 
fastLowerCall(CallLoweringInfo & CLI)3140 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3141   CallingConv::ID CC  = CLI.CallConv;
3142   bool IsTailCall     = CLI.IsTailCall;
3143   bool IsVarArg       = CLI.IsVarArg;
3144   const Value *Callee = CLI.Callee;
3145   MCSymbol *Symbol = CLI.Symbol;
3146 
3147   if (!Callee && !Symbol)
3148     return false;
3149 
3150   // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3151   // a bti instruction following the call.
3152   if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3153       !Subtarget->noBTIAtReturnTwice() &&
3154       MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3155     return false;
3156 
3157   // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3158   if (CLI.CB && CLI.CB->isIndirectCall() &&
3159       CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3160     return false;
3161 
3162   // Allow SelectionDAG isel to handle tail calls.
3163   if (IsTailCall)
3164     return false;
3165 
3166   // FIXME: we could and should support this, but for now correctness at -O0 is
3167   // more important.
3168   if (Subtarget->isTargetILP32())
3169     return false;
3170 
3171   CodeModel::Model CM = TM.getCodeModel();
3172   // Only support the small-addressing and large code models.
3173   if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3174     return false;
3175 
3176   // FIXME: Add large code model support for ELF.
3177   if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3178     return false;
3179 
3180   // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3181   // attribute. Check "RtLibUseGOT" instead.
3182   if (MF->getFunction().getParent()->getRtLibUseGOT())
3183     return false;
3184 
3185   // Let SDISel handle vararg functions.
3186   if (IsVarArg)
3187     return false;
3188 
3189   if (Subtarget->isWindowsArm64EC())
3190     return false;
3191 
3192   for (auto Flag : CLI.OutFlags)
3193     if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3194         Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3195       return false;
3196 
3197   // Set up the argument vectors.
3198   SmallVector<MVT, 16> OutVTs;
3199   OutVTs.reserve(CLI.OutVals.size());
3200 
3201   for (auto *Val : CLI.OutVals) {
3202     MVT VT;
3203     if (!isTypeLegal(Val->getType(), VT) &&
3204         !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3205       return false;
3206 
3207     // We don't handle vector parameters yet.
3208     if (VT.isVector() || VT.getSizeInBits() > 64)
3209       return false;
3210 
3211     OutVTs.push_back(VT);
3212   }
3213 
3214   Address Addr;
3215   if (Callee && !computeCallAddress(Callee, Addr))
3216     return false;
3217 
3218   // The weak function target may be zero; in that case we must use indirect
3219   // addressing via a stub on windows as it may be out of range for a
3220   // PC-relative jump.
3221   if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3222       Addr.getGlobalValue()->hasExternalWeakLinkage())
3223     return false;
3224 
3225   // Handle the arguments now that we've gotten them.
3226   unsigned NumBytes;
3227   if (!processCallArgs(CLI, OutVTs, NumBytes))
3228     return false;
3229 
3230   const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3231   if (RegInfo->isAnyArgRegReserved(*MF))
3232     RegInfo->emitReservedArgRegCallError(*MF);
3233 
3234   // Issue the call.
3235   MachineInstrBuilder MIB;
3236   if (Subtarget->useSmallAddressing()) {
3237     const MCInstrDesc &II =
3238         TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3239     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3240     if (Symbol)
3241       MIB.addSym(Symbol, 0);
3242     else if (Addr.getGlobalValue())
3243       MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3244     else if (Addr.getReg()) {
3245       Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3246       MIB.addReg(Reg);
3247     } else
3248       return false;
3249   } else {
3250     unsigned CallReg = 0;
3251     if (Symbol) {
3252       Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3253       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3254               ADRPReg)
3255           .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3256 
3257       CallReg = createResultReg(&AArch64::GPR64RegClass);
3258       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3259               TII.get(AArch64::LDRXui), CallReg)
3260           .addReg(ADRPReg)
3261           .addSym(Symbol,
3262                   AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3263     } else if (Addr.getGlobalValue())
3264       CallReg = materializeGV(Addr.getGlobalValue());
3265     else if (Addr.getReg())
3266       CallReg = Addr.getReg();
3267 
3268     if (!CallReg)
3269       return false;
3270 
3271     const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3272     CallReg = constrainOperandRegClass(II, CallReg, 0);
3273     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3274   }
3275 
3276   // Add implicit physical register uses to the call.
3277   for (auto Reg : CLI.OutRegs)
3278     MIB.addReg(Reg, RegState::Implicit);
3279 
3280   // Add a register mask with the call-preserved registers.
3281   // Proper defs for return values will be added by setPhysRegsDeadExcept().
3282   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3283 
3284   CLI.Call = MIB;
3285 
3286   // Finish off the call including any return values.
3287   return finishCall(CLI, NumBytes);
3288 }
3289 
isMemCpySmall(uint64_t Len,MaybeAlign Alignment)3290 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3291   if (Alignment)
3292     return Len / Alignment->value() <= 4;
3293   else
3294     return Len < 32;
3295 }
3296 
tryEmitSmallMemCpy(Address Dest,Address Src,uint64_t Len,MaybeAlign Alignment)3297 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3298                                          uint64_t Len, MaybeAlign Alignment) {
3299   // Make sure we don't bloat code by inlining very large memcpy's.
3300   if (!isMemCpySmall(Len, Alignment))
3301     return false;
3302 
3303   int64_t UnscaledOffset = 0;
3304   Address OrigDest = Dest;
3305   Address OrigSrc = Src;
3306 
3307   while (Len) {
3308     MVT VT;
3309     if (!Alignment || *Alignment >= 8) {
3310       if (Len >= 8)
3311         VT = MVT::i64;
3312       else if (Len >= 4)
3313         VT = MVT::i32;
3314       else if (Len >= 2)
3315         VT = MVT::i16;
3316       else {
3317         VT = MVT::i8;
3318       }
3319     } else {
3320       assert(Alignment && "Alignment is set in this branch");
3321       // Bound based on alignment.
3322       if (Len >= 4 && *Alignment == 4)
3323         VT = MVT::i32;
3324       else if (Len >= 2 && *Alignment == 2)
3325         VT = MVT::i16;
3326       else {
3327         VT = MVT::i8;
3328       }
3329     }
3330 
3331     unsigned ResultReg = emitLoad(VT, VT, Src);
3332     if (!ResultReg)
3333       return false;
3334 
3335     if (!emitStore(VT, ResultReg, Dest))
3336       return false;
3337 
3338     int64_t Size = VT.getSizeInBits() / 8;
3339     Len -= Size;
3340     UnscaledOffset += Size;
3341 
3342     // We need to recompute the unscaled offset for each iteration.
3343     Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3344     Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3345   }
3346 
3347   return true;
3348 }
3349 
3350 /// Check if it is possible to fold the condition from the XALU intrinsic
3351 /// into the user. The condition code will only be updated on success.
foldXALUIntrinsic(AArch64CC::CondCode & CC,const Instruction * I,const Value * Cond)3352 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3353                                         const Instruction *I,
3354                                         const Value *Cond) {
3355   if (!isa<ExtractValueInst>(Cond))
3356     return false;
3357 
3358   const auto *EV = cast<ExtractValueInst>(Cond);
3359   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3360     return false;
3361 
3362   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3363   MVT RetVT;
3364   const Function *Callee = II->getCalledFunction();
3365   Type *RetTy =
3366   cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3367   if (!isTypeLegal(RetTy, RetVT))
3368     return false;
3369 
3370   if (RetVT != MVT::i32 && RetVT != MVT::i64)
3371     return false;
3372 
3373   const Value *LHS = II->getArgOperand(0);
3374   const Value *RHS = II->getArgOperand(1);
3375 
3376   // Canonicalize immediate to the RHS.
3377   if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3378     std::swap(LHS, RHS);
3379 
3380   // Simplify multiplies.
3381   Intrinsic::ID IID = II->getIntrinsicID();
3382   switch (IID) {
3383   default:
3384     break;
3385   case Intrinsic::smul_with_overflow:
3386     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3387       if (C->getValue() == 2)
3388         IID = Intrinsic::sadd_with_overflow;
3389     break;
3390   case Intrinsic::umul_with_overflow:
3391     if (const auto *C = dyn_cast<ConstantInt>(RHS))
3392       if (C->getValue() == 2)
3393         IID = Intrinsic::uadd_with_overflow;
3394     break;
3395   }
3396 
3397   AArch64CC::CondCode TmpCC;
3398   switch (IID) {
3399   default:
3400     return false;
3401   case Intrinsic::sadd_with_overflow:
3402   case Intrinsic::ssub_with_overflow:
3403     TmpCC = AArch64CC::VS;
3404     break;
3405   case Intrinsic::uadd_with_overflow:
3406     TmpCC = AArch64CC::HS;
3407     break;
3408   case Intrinsic::usub_with_overflow:
3409     TmpCC = AArch64CC::LO;
3410     break;
3411   case Intrinsic::smul_with_overflow:
3412   case Intrinsic::umul_with_overflow:
3413     TmpCC = AArch64CC::NE;
3414     break;
3415   }
3416 
3417   // Check if both instructions are in the same basic block.
3418   if (!isValueAvailable(II))
3419     return false;
3420 
3421   // Make sure nothing is in the way
3422   BasicBlock::const_iterator Start(I);
3423   BasicBlock::const_iterator End(II);
3424   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3425     // We only expect extractvalue instructions between the intrinsic and the
3426     // instruction to be selected.
3427     if (!isa<ExtractValueInst>(Itr))
3428       return false;
3429 
3430     // Check that the extractvalue operand comes from the intrinsic.
3431     const auto *EVI = cast<ExtractValueInst>(Itr);
3432     if (EVI->getAggregateOperand() != II)
3433       return false;
3434   }
3435 
3436   CC = TmpCC;
3437   return true;
3438 }
3439 
fastLowerIntrinsicCall(const IntrinsicInst * II)3440 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3441   // FIXME: Handle more intrinsics.
3442   switch (II->getIntrinsicID()) {
3443   default: return false;
3444   case Intrinsic::frameaddress: {
3445     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3446     MFI.setFrameAddressIsTaken(true);
3447 
3448     const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3449     Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3450     Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3451     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3452             TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3453     // Recursively load frame address
3454     // ldr x0, [fp]
3455     // ldr x0, [x0]
3456     // ldr x0, [x0]
3457     // ...
3458     unsigned DestReg;
3459     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3460     while (Depth--) {
3461       DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3462                                 SrcReg, 0);
3463       assert(DestReg && "Unexpected LDR instruction emission failure.");
3464       SrcReg = DestReg;
3465     }
3466 
3467     updateValueMap(II, SrcReg);
3468     return true;
3469   }
3470   case Intrinsic::sponentry: {
3471     MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3472 
3473     // SP = FP + Fixed Object + 16
3474     int FI = MFI.CreateFixedObject(4, 0, false);
3475     Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3476     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3477             TII.get(AArch64::ADDXri), ResultReg)
3478             .addFrameIndex(FI)
3479             .addImm(0)
3480             .addImm(0);
3481 
3482     updateValueMap(II, ResultReg);
3483     return true;
3484   }
3485   case Intrinsic::memcpy:
3486   case Intrinsic::memmove: {
3487     const auto *MTI = cast<MemTransferInst>(II);
3488     // Don't handle volatile.
3489     if (MTI->isVolatile())
3490       return false;
3491 
3492     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
3493     // we would emit dead code because we don't currently handle memmoves.
3494     bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3495     if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3496       // Small memcpy's are common enough that we want to do them without a call
3497       // if possible.
3498       uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3499       MaybeAlign Alignment;
3500       if (MTI->getDestAlign() || MTI->getSourceAlign())
3501         Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3502                              MTI->getSourceAlign().valueOrOne());
3503       if (isMemCpySmall(Len, Alignment)) {
3504         Address Dest, Src;
3505         if (!computeAddress(MTI->getRawDest(), Dest) ||
3506             !computeAddress(MTI->getRawSource(), Src))
3507           return false;
3508         if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3509           return true;
3510       }
3511     }
3512 
3513     if (!MTI->getLength()->getType()->isIntegerTy(64))
3514       return false;
3515 
3516     if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3517       // Fast instruction selection doesn't support the special
3518       // address spaces.
3519       return false;
3520 
3521     const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3522     return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3523   }
3524   case Intrinsic::memset: {
3525     const MemSetInst *MSI = cast<MemSetInst>(II);
3526     // Don't handle volatile.
3527     if (MSI->isVolatile())
3528       return false;
3529 
3530     if (!MSI->getLength()->getType()->isIntegerTy(64))
3531       return false;
3532 
3533     if (MSI->getDestAddressSpace() > 255)
3534       // Fast instruction selection doesn't support the special
3535       // address spaces.
3536       return false;
3537 
3538     return lowerCallTo(II, "memset", II->arg_size() - 1);
3539   }
3540   case Intrinsic::sin:
3541   case Intrinsic::cos:
3542   case Intrinsic::tan:
3543   case Intrinsic::pow: {
3544     MVT RetVT;
3545     if (!isTypeLegal(II->getType(), RetVT))
3546       return false;
3547 
3548     if (RetVT != MVT::f32 && RetVT != MVT::f64)
3549       return false;
3550 
3551     static const RTLIB::Libcall LibCallTable[4][2] = {
3552         {RTLIB::SIN_F32, RTLIB::SIN_F64},
3553         {RTLIB::COS_F32, RTLIB::COS_F64},
3554         {RTLIB::TAN_F32, RTLIB::TAN_F64},
3555         {RTLIB::POW_F32, RTLIB::POW_F64}};
3556     RTLIB::Libcall LC;
3557     bool Is64Bit = RetVT == MVT::f64;
3558     switch (II->getIntrinsicID()) {
3559     default:
3560       llvm_unreachable("Unexpected intrinsic.");
3561     case Intrinsic::sin:
3562       LC = LibCallTable[0][Is64Bit];
3563       break;
3564     case Intrinsic::cos:
3565       LC = LibCallTable[1][Is64Bit];
3566       break;
3567     case Intrinsic::tan:
3568       LC = LibCallTable[2][Is64Bit];
3569       break;
3570     case Intrinsic::pow:
3571       LC = LibCallTable[3][Is64Bit];
3572       break;
3573     }
3574 
3575     ArgListTy Args;
3576     Args.reserve(II->arg_size());
3577 
3578     // Populate the argument list.
3579     for (auto &Arg : II->args()) {
3580       ArgListEntry Entry;
3581       Entry.Val = Arg;
3582       Entry.Ty = Arg->getType();
3583       Args.push_back(Entry);
3584     }
3585 
3586     CallLoweringInfo CLI;
3587     MCContext &Ctx = MF->getContext();
3588     CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3589                   TLI.getLibcallName(LC), std::move(Args));
3590     if (!lowerCallTo(CLI))
3591       return false;
3592     updateValueMap(II, CLI.ResultReg);
3593     return true;
3594   }
3595   case Intrinsic::fabs: {
3596     MVT VT;
3597     if (!isTypeLegal(II->getType(), VT))
3598       return false;
3599 
3600     unsigned Opc;
3601     switch (VT.SimpleTy) {
3602     default:
3603       return false;
3604     case MVT::f32:
3605       Opc = AArch64::FABSSr;
3606       break;
3607     case MVT::f64:
3608       Opc = AArch64::FABSDr;
3609       break;
3610     }
3611     Register SrcReg = getRegForValue(II->getOperand(0));
3612     if (!SrcReg)
3613       return false;
3614     Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3615     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3616       .addReg(SrcReg);
3617     updateValueMap(II, ResultReg);
3618     return true;
3619   }
3620   case Intrinsic::trap:
3621     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3622         .addImm(1);
3623     return true;
3624   case Intrinsic::debugtrap:
3625     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3626         .addImm(0xF000);
3627     return true;
3628 
3629   case Intrinsic::sqrt: {
3630     Type *RetTy = II->getCalledFunction()->getReturnType();
3631 
3632     MVT VT;
3633     if (!isTypeLegal(RetTy, VT))
3634       return false;
3635 
3636     Register Op0Reg = getRegForValue(II->getOperand(0));
3637     if (!Op0Reg)
3638       return false;
3639 
3640     unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3641     if (!ResultReg)
3642       return false;
3643 
3644     updateValueMap(II, ResultReg);
3645     return true;
3646   }
3647   case Intrinsic::sadd_with_overflow:
3648   case Intrinsic::uadd_with_overflow:
3649   case Intrinsic::ssub_with_overflow:
3650   case Intrinsic::usub_with_overflow:
3651   case Intrinsic::smul_with_overflow:
3652   case Intrinsic::umul_with_overflow: {
3653     // This implements the basic lowering of the xalu with overflow intrinsics.
3654     const Function *Callee = II->getCalledFunction();
3655     auto *Ty = cast<StructType>(Callee->getReturnType());
3656     Type *RetTy = Ty->getTypeAtIndex(0U);
3657 
3658     MVT VT;
3659     if (!isTypeLegal(RetTy, VT))
3660       return false;
3661 
3662     if (VT != MVT::i32 && VT != MVT::i64)
3663       return false;
3664 
3665     const Value *LHS = II->getArgOperand(0);
3666     const Value *RHS = II->getArgOperand(1);
3667     // Canonicalize immediate to the RHS.
3668     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3669       std::swap(LHS, RHS);
3670 
3671     // Simplify multiplies.
3672     Intrinsic::ID IID = II->getIntrinsicID();
3673     switch (IID) {
3674     default:
3675       break;
3676     case Intrinsic::smul_with_overflow:
3677       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3678         if (C->getValue() == 2) {
3679           IID = Intrinsic::sadd_with_overflow;
3680           RHS = LHS;
3681         }
3682       break;
3683     case Intrinsic::umul_with_overflow:
3684       if (const auto *C = dyn_cast<ConstantInt>(RHS))
3685         if (C->getValue() == 2) {
3686           IID = Intrinsic::uadd_with_overflow;
3687           RHS = LHS;
3688         }
3689       break;
3690     }
3691 
3692     unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3693     AArch64CC::CondCode CC = AArch64CC::Invalid;
3694     switch (IID) {
3695     default: llvm_unreachable("Unexpected intrinsic!");
3696     case Intrinsic::sadd_with_overflow:
3697       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3698       CC = AArch64CC::VS;
3699       break;
3700     case Intrinsic::uadd_with_overflow:
3701       ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3702       CC = AArch64CC::HS;
3703       break;
3704     case Intrinsic::ssub_with_overflow:
3705       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3706       CC = AArch64CC::VS;
3707       break;
3708     case Intrinsic::usub_with_overflow:
3709       ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3710       CC = AArch64CC::LO;
3711       break;
3712     case Intrinsic::smul_with_overflow: {
3713       CC = AArch64CC::NE;
3714       Register LHSReg = getRegForValue(LHS);
3715       if (!LHSReg)
3716         return false;
3717 
3718       Register RHSReg = getRegForValue(RHS);
3719       if (!RHSReg)
3720         return false;
3721 
3722       if (VT == MVT::i32) {
3723         MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3724         Register MulSubReg =
3725             fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3726         // cmp xreg, wreg, sxtw
3727         emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3728                       AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3729                       /*WantResult=*/false);
3730         MulReg = MulSubReg;
3731       } else {
3732         assert(VT == MVT::i64 && "Unexpected value type.");
3733         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3734         // reused in the next instruction.
3735         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3736         unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3737         emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3738                     /*WantResult=*/false);
3739       }
3740       break;
3741     }
3742     case Intrinsic::umul_with_overflow: {
3743       CC = AArch64CC::NE;
3744       Register LHSReg = getRegForValue(LHS);
3745       if (!LHSReg)
3746         return false;
3747 
3748       Register RHSReg = getRegForValue(RHS);
3749       if (!RHSReg)
3750         return false;
3751 
3752       if (VT == MVT::i32) {
3753         MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3754         // tst xreg, #0xffffffff00000000
3755         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3756                 TII.get(AArch64::ANDSXri), AArch64::XZR)
3757             .addReg(MulReg)
3758             .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3759         MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3760       } else {
3761         assert(VT == MVT::i64 && "Unexpected value type.");
3762         // LHSReg and RHSReg cannot be killed by this Mul, since they are
3763         // reused in the next instruction.
3764         MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3765         unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3766         emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3767       }
3768       break;
3769     }
3770     }
3771 
3772     if (MulReg) {
3773       ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3774       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3775               TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3776     }
3777 
3778     if (!ResultReg1)
3779       return false;
3780 
3781     ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3782                                   AArch64::WZR, AArch64::WZR,
3783                                   getInvertedCondCode(CC));
3784     (void)ResultReg2;
3785     assert((ResultReg1 + 1) == ResultReg2 &&
3786            "Nonconsecutive result registers.");
3787     updateValueMap(II, ResultReg1, 2);
3788     return true;
3789   }
3790   case Intrinsic::aarch64_crc32b:
3791   case Intrinsic::aarch64_crc32h:
3792   case Intrinsic::aarch64_crc32w:
3793   case Intrinsic::aarch64_crc32x:
3794   case Intrinsic::aarch64_crc32cb:
3795   case Intrinsic::aarch64_crc32ch:
3796   case Intrinsic::aarch64_crc32cw:
3797   case Intrinsic::aarch64_crc32cx: {
3798     if (!Subtarget->hasCRC())
3799       return false;
3800 
3801     unsigned Opc;
3802     switch (II->getIntrinsicID()) {
3803     default:
3804       llvm_unreachable("Unexpected intrinsic!");
3805     case Intrinsic::aarch64_crc32b:
3806       Opc = AArch64::CRC32Brr;
3807       break;
3808     case Intrinsic::aarch64_crc32h:
3809       Opc = AArch64::CRC32Hrr;
3810       break;
3811     case Intrinsic::aarch64_crc32w:
3812       Opc = AArch64::CRC32Wrr;
3813       break;
3814     case Intrinsic::aarch64_crc32x:
3815       Opc = AArch64::CRC32Xrr;
3816       break;
3817     case Intrinsic::aarch64_crc32cb:
3818       Opc = AArch64::CRC32CBrr;
3819       break;
3820     case Intrinsic::aarch64_crc32ch:
3821       Opc = AArch64::CRC32CHrr;
3822       break;
3823     case Intrinsic::aarch64_crc32cw:
3824       Opc = AArch64::CRC32CWrr;
3825       break;
3826     case Intrinsic::aarch64_crc32cx:
3827       Opc = AArch64::CRC32CXrr;
3828       break;
3829     }
3830 
3831     Register LHSReg = getRegForValue(II->getArgOperand(0));
3832     Register RHSReg = getRegForValue(II->getArgOperand(1));
3833     if (!LHSReg || !RHSReg)
3834       return false;
3835 
3836     Register ResultReg =
3837         fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3838     updateValueMap(II, ResultReg);
3839     return true;
3840   }
3841   }
3842   return false;
3843 }
3844 
selectRet(const Instruction * I)3845 bool AArch64FastISel::selectRet(const Instruction *I) {
3846   const ReturnInst *Ret = cast<ReturnInst>(I);
3847   const Function &F = *I->getParent()->getParent();
3848 
3849   if (!FuncInfo.CanLowerReturn)
3850     return false;
3851 
3852   if (F.isVarArg())
3853     return false;
3854 
3855   if (TLI.supportSwiftError() &&
3856       F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3857     return false;
3858 
3859   if (TLI.supportSplitCSR(FuncInfo.MF))
3860     return false;
3861 
3862   // Build a list of return value registers.
3863   SmallVector<unsigned, 4> RetRegs;
3864 
3865   if (Ret->getNumOperands() > 0) {
3866     CallingConv::ID CC = F.getCallingConv();
3867     SmallVector<ISD::OutputArg, 4> Outs;
3868     GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3869 
3870     // Analyze operands of the call, assigning locations to each operand.
3871     SmallVector<CCValAssign, 16> ValLocs;
3872     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3873     CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3874 
3875     // Only handle a single return value for now.
3876     if (ValLocs.size() != 1)
3877       return false;
3878 
3879     CCValAssign &VA = ValLocs[0];
3880     const Value *RV = Ret->getOperand(0);
3881 
3882     // Don't bother handling odd stuff for now.
3883     if ((VA.getLocInfo() != CCValAssign::Full) &&
3884         (VA.getLocInfo() != CCValAssign::BCvt))
3885       return false;
3886 
3887     // Only handle register returns for now.
3888     if (!VA.isRegLoc())
3889       return false;
3890 
3891     Register Reg = getRegForValue(RV);
3892     if (Reg == 0)
3893       return false;
3894 
3895     unsigned SrcReg = Reg + VA.getValNo();
3896     Register DestReg = VA.getLocReg();
3897     // Avoid a cross-class copy. This is very unlikely.
3898     if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3899       return false;
3900 
3901     EVT RVEVT = TLI.getValueType(DL, RV->getType());
3902     if (!RVEVT.isSimple())
3903       return false;
3904 
3905     // Vectors (of > 1 lane) in big endian need tricky handling.
3906     if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3907         !Subtarget->isLittleEndian())
3908       return false;
3909 
3910     MVT RVVT = RVEVT.getSimpleVT();
3911     if (RVVT == MVT::f128)
3912       return false;
3913 
3914     MVT DestVT = VA.getValVT();
3915     // Special handling for extended integers.
3916     if (RVVT != DestVT) {
3917       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3918         return false;
3919 
3920       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3921         return false;
3922 
3923       bool IsZExt = Outs[0].Flags.isZExt();
3924       SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3925       if (SrcReg == 0)
3926         return false;
3927     }
3928 
3929     // "Callee" (i.e. value producer) zero extends pointers at function
3930     // boundary.
3931     if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3932       SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3933 
3934     // Make the copy.
3935     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3936             TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3937 
3938     // Add register to return instruction.
3939     RetRegs.push_back(VA.getLocReg());
3940   }
3941 
3942   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3943                                     TII.get(AArch64::RET_ReallyLR));
3944   for (unsigned RetReg : RetRegs)
3945     MIB.addReg(RetReg, RegState::Implicit);
3946   return true;
3947 }
3948 
selectTrunc(const Instruction * I)3949 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3950   Type *DestTy = I->getType();
3951   Value *Op = I->getOperand(0);
3952   Type *SrcTy = Op->getType();
3953 
3954   EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3955   EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3956   if (!SrcEVT.isSimple())
3957     return false;
3958   if (!DestEVT.isSimple())
3959     return false;
3960 
3961   MVT SrcVT = SrcEVT.getSimpleVT();
3962   MVT DestVT = DestEVT.getSimpleVT();
3963 
3964   if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3965       SrcVT != MVT::i8)
3966     return false;
3967   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3968       DestVT != MVT::i1)
3969     return false;
3970 
3971   Register SrcReg = getRegForValue(Op);
3972   if (!SrcReg)
3973     return false;
3974 
3975   // If we're truncating from i64 to a smaller non-legal type then generate an
3976   // AND. Otherwise, we know the high bits are undefined and a truncate only
3977   // generate a COPY. We cannot mark the source register also as result
3978   // register, because this can incorrectly transfer the kill flag onto the
3979   // source register.
3980   unsigned ResultReg;
3981   if (SrcVT == MVT::i64) {
3982     uint64_t Mask = 0;
3983     switch (DestVT.SimpleTy) {
3984     default:
3985       // Trunc i64 to i32 is handled by the target-independent fast-isel.
3986       return false;
3987     case MVT::i1:
3988       Mask = 0x1;
3989       break;
3990     case MVT::i8:
3991       Mask = 0xff;
3992       break;
3993     case MVT::i16:
3994       Mask = 0xffff;
3995       break;
3996     }
3997     // Issue an extract_subreg to get the lower 32-bits.
3998     Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3999                                                 AArch64::sub_32);
4000     // Create the AND instruction which performs the actual truncation.
4001     ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
4002     assert(ResultReg && "Unexpected AND instruction emission failure.");
4003   } else {
4004     ResultReg = createResultReg(&AArch64::GPR32RegClass);
4005     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4006             TII.get(TargetOpcode::COPY), ResultReg)
4007         .addReg(SrcReg);
4008   }
4009 
4010   updateValueMap(I, ResultReg);
4011   return true;
4012 }
4013 
emiti1Ext(unsigned SrcReg,MVT DestVT,bool IsZExt)4014 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4015   assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4016           DestVT == MVT::i64) &&
4017          "Unexpected value type.");
4018   // Handle i8 and i16 as i32.
4019   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4020     DestVT = MVT::i32;
4021 
4022   if (IsZExt) {
4023     unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4024     assert(ResultReg && "Unexpected AND instruction emission failure.");
4025     if (DestVT == MVT::i64) {
4026       // We're ZExt i1 to i64.  The ANDWri Wd, Ws, #1 implicitly clears the
4027       // upper 32 bits.  Emit a SUBREG_TO_REG to extend from Wd to Xd.
4028       Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4029       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4030               TII.get(AArch64::SUBREG_TO_REG), Reg64)
4031           .addImm(0)
4032           .addReg(ResultReg)
4033           .addImm(AArch64::sub_32);
4034       ResultReg = Reg64;
4035     }
4036     return ResultReg;
4037   } else {
4038     if (DestVT == MVT::i64) {
4039       // FIXME: We're SExt i1 to i64.
4040       return 0;
4041     }
4042     return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4043                             0, 0);
4044   }
4045 }
4046 
emitMul_rr(MVT RetVT,unsigned Op0,unsigned Op1)4047 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4048   unsigned Opc, ZReg;
4049   switch (RetVT.SimpleTy) {
4050   default: return 0;
4051   case MVT::i8:
4052   case MVT::i16:
4053   case MVT::i32:
4054     RetVT = MVT::i32;
4055     Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4056   case MVT::i64:
4057     Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4058   }
4059 
4060   const TargetRegisterClass *RC =
4061       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4062   return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4063 }
4064 
emitSMULL_rr(MVT RetVT,unsigned Op0,unsigned Op1)4065 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4066   if (RetVT != MVT::i64)
4067     return 0;
4068 
4069   return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4070                           Op0, Op1, AArch64::XZR);
4071 }
4072 
emitUMULL_rr(MVT RetVT,unsigned Op0,unsigned Op1)4073 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4074   if (RetVT != MVT::i64)
4075     return 0;
4076 
4077   return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4078                           Op0, Op1, AArch64::XZR);
4079 }
4080 
emitLSL_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4081 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4082                                      unsigned Op1Reg) {
4083   unsigned Opc = 0;
4084   bool NeedTrunc = false;
4085   uint64_t Mask = 0;
4086   switch (RetVT.SimpleTy) {
4087   default: return 0;
4088   case MVT::i8:  Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff;   break;
4089   case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4090   case MVT::i32: Opc = AArch64::LSLVWr;                                  break;
4091   case MVT::i64: Opc = AArch64::LSLVXr;                                  break;
4092   }
4093 
4094   const TargetRegisterClass *RC =
4095       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4096   if (NeedTrunc)
4097     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4098 
4099   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4100   if (NeedTrunc)
4101     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4102   return ResultReg;
4103 }
4104 
emitLSL_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4105 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4106                                      uint64_t Shift, bool IsZExt) {
4107   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4108          "Unexpected source/return type pair.");
4109   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4110           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4111          "Unexpected source value type.");
4112   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4113           RetVT == MVT::i64) && "Unexpected return value type.");
4114 
4115   bool Is64Bit = (RetVT == MVT::i64);
4116   unsigned RegSize = Is64Bit ? 64 : 32;
4117   unsigned DstBits = RetVT.getSizeInBits();
4118   unsigned SrcBits = SrcVT.getSizeInBits();
4119   const TargetRegisterClass *RC =
4120       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4121 
4122   // Just emit a copy for "zero" shifts.
4123   if (Shift == 0) {
4124     if (RetVT == SrcVT) {
4125       Register ResultReg = createResultReg(RC);
4126       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4127               TII.get(TargetOpcode::COPY), ResultReg)
4128           .addReg(Op0);
4129       return ResultReg;
4130     } else
4131       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4132   }
4133 
4134   // Don't deal with undefined shifts.
4135   if (Shift >= DstBits)
4136     return 0;
4137 
4138   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4139   // {S|U}BFM Wd, Wn, #r, #s
4140   // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4141 
4142   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4143   // %2 = shl i16 %1, 4
4144   // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4145   // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4146   // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4147   // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4148 
4149   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4150   // %2 = shl i16 %1, 8
4151   // Wd<32+7-24,32-24> = Wn<7:0>
4152   // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4153   // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4154   // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4155 
4156   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4157   // %2 = shl i16 %1, 12
4158   // Wd<32+3-20,32-20> = Wn<3:0>
4159   // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4160   // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4161   // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4162 
4163   unsigned ImmR = RegSize - Shift;
4164   // Limit the width to the length of the source type.
4165   unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4166   static const unsigned OpcTable[2][2] = {
4167     {AArch64::SBFMWri, AArch64::SBFMXri},
4168     {AArch64::UBFMWri, AArch64::UBFMXri}
4169   };
4170   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4171   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4172     Register TmpReg = MRI.createVirtualRegister(RC);
4173     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4174             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4175         .addImm(0)
4176         .addReg(Op0)
4177         .addImm(AArch64::sub_32);
4178     Op0 = TmpReg;
4179   }
4180   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4181 }
4182 
emitLSR_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4183 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4184                                      unsigned Op1Reg) {
4185   unsigned Opc = 0;
4186   bool NeedTrunc = false;
4187   uint64_t Mask = 0;
4188   switch (RetVT.SimpleTy) {
4189   default: return 0;
4190   case MVT::i8:  Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff;   break;
4191   case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4192   case MVT::i32: Opc = AArch64::LSRVWr; break;
4193   case MVT::i64: Opc = AArch64::LSRVXr; break;
4194   }
4195 
4196   const TargetRegisterClass *RC =
4197       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4198   if (NeedTrunc) {
4199     Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4200     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4201   }
4202   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4203   if (NeedTrunc)
4204     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4205   return ResultReg;
4206 }
4207 
emitLSR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4208 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4209                                      uint64_t Shift, bool IsZExt) {
4210   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4211          "Unexpected source/return type pair.");
4212   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4213           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4214          "Unexpected source value type.");
4215   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4216           RetVT == MVT::i64) && "Unexpected return value type.");
4217 
4218   bool Is64Bit = (RetVT == MVT::i64);
4219   unsigned RegSize = Is64Bit ? 64 : 32;
4220   unsigned DstBits = RetVT.getSizeInBits();
4221   unsigned SrcBits = SrcVT.getSizeInBits();
4222   const TargetRegisterClass *RC =
4223       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4224 
4225   // Just emit a copy for "zero" shifts.
4226   if (Shift == 0) {
4227     if (RetVT == SrcVT) {
4228       Register ResultReg = createResultReg(RC);
4229       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4230               TII.get(TargetOpcode::COPY), ResultReg)
4231       .addReg(Op0);
4232       return ResultReg;
4233     } else
4234       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4235   }
4236 
4237   // Don't deal with undefined shifts.
4238   if (Shift >= DstBits)
4239     return 0;
4240 
4241   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4242   // {S|U}BFM Wd, Wn, #r, #s
4243   // Wd<s-r:0> = Wn<s:r> when r <= s
4244 
4245   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4246   // %2 = lshr i16 %1, 4
4247   // Wd<7-4:0> = Wn<7:4>
4248   // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4249   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4250   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4251 
4252   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4253   // %2 = lshr i16 %1, 8
4254   // Wd<7-7,0> = Wn<7:7>
4255   // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4256   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4257   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4258 
4259   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4260   // %2 = lshr i16 %1, 12
4261   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4262   // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4263   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4264   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4265 
4266   if (Shift >= SrcBits && IsZExt)
4267     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4268 
4269   // It is not possible to fold a sign-extend into the LShr instruction. In this
4270   // case emit a sign-extend.
4271   if (!IsZExt) {
4272     Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4273     if (!Op0)
4274       return 0;
4275     SrcVT = RetVT;
4276     SrcBits = SrcVT.getSizeInBits();
4277     IsZExt = true;
4278   }
4279 
4280   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4281   unsigned ImmS = SrcBits - 1;
4282   static const unsigned OpcTable[2][2] = {
4283     {AArch64::SBFMWri, AArch64::SBFMXri},
4284     {AArch64::UBFMWri, AArch64::UBFMXri}
4285   };
4286   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4287   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4288     Register TmpReg = MRI.createVirtualRegister(RC);
4289     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4290             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4291         .addImm(0)
4292         .addReg(Op0)
4293         .addImm(AArch64::sub_32);
4294     Op0 = TmpReg;
4295   }
4296   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4297 }
4298 
emitASR_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4299 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4300                                      unsigned Op1Reg) {
4301   unsigned Opc = 0;
4302   bool NeedTrunc = false;
4303   uint64_t Mask = 0;
4304   switch (RetVT.SimpleTy) {
4305   default: return 0;
4306   case MVT::i8:  Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff;   break;
4307   case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4308   case MVT::i32: Opc = AArch64::ASRVWr;                                  break;
4309   case MVT::i64: Opc = AArch64::ASRVXr;                                  break;
4310   }
4311 
4312   const TargetRegisterClass *RC =
4313       (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4314   if (NeedTrunc) {
4315     Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4316     Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4317   }
4318   Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4319   if (NeedTrunc)
4320     ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4321   return ResultReg;
4322 }
4323 
emitASR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4324 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4325                                      uint64_t Shift, bool IsZExt) {
4326   assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4327          "Unexpected source/return type pair.");
4328   assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4329           SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4330          "Unexpected source value type.");
4331   assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4332           RetVT == MVT::i64) && "Unexpected return value type.");
4333 
4334   bool Is64Bit = (RetVT == MVT::i64);
4335   unsigned RegSize = Is64Bit ? 64 : 32;
4336   unsigned DstBits = RetVT.getSizeInBits();
4337   unsigned SrcBits = SrcVT.getSizeInBits();
4338   const TargetRegisterClass *RC =
4339       Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4340 
4341   // Just emit a copy for "zero" shifts.
4342   if (Shift == 0) {
4343     if (RetVT == SrcVT) {
4344       Register ResultReg = createResultReg(RC);
4345       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4346               TII.get(TargetOpcode::COPY), ResultReg)
4347       .addReg(Op0);
4348       return ResultReg;
4349     } else
4350       return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4351   }
4352 
4353   // Don't deal with undefined shifts.
4354   if (Shift >= DstBits)
4355     return 0;
4356 
4357   // For immediate shifts we can fold the zero-/sign-extension into the shift.
4358   // {S|U}BFM Wd, Wn, #r, #s
4359   // Wd<s-r:0> = Wn<s:r> when r <= s
4360 
4361   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4362   // %2 = ashr i16 %1, 4
4363   // Wd<7-4:0> = Wn<7:4>
4364   // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4365   // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4366   // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4367 
4368   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4369   // %2 = ashr i16 %1, 8
4370   // Wd<7-7,0> = Wn<7:7>
4371   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4372   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4373   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4374 
4375   // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4376   // %2 = ashr i16 %1, 12
4377   // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4378   // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4379   // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4380   // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4381 
4382   if (Shift >= SrcBits && IsZExt)
4383     return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4384 
4385   unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4386   unsigned ImmS = SrcBits - 1;
4387   static const unsigned OpcTable[2][2] = {
4388     {AArch64::SBFMWri, AArch64::SBFMXri},
4389     {AArch64::UBFMWri, AArch64::UBFMXri}
4390   };
4391   unsigned Opc = OpcTable[IsZExt][Is64Bit];
4392   if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4393     Register TmpReg = MRI.createVirtualRegister(RC);
4394     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4395             TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4396         .addImm(0)
4397         .addReg(Op0)
4398         .addImm(AArch64::sub_32);
4399     Op0 = TmpReg;
4400   }
4401   return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4402 }
4403 
emitIntExt(MVT SrcVT,unsigned SrcReg,MVT DestVT,bool IsZExt)4404 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4405                                      bool IsZExt) {
4406   assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4407 
4408   // FastISel does not have plumbing to deal with extensions where the SrcVT or
4409   // DestVT are odd things, so test to make sure that they are both types we can
4410   // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4411   // bail out to SelectionDAG.
4412   if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4413        (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4414       ((SrcVT !=  MVT::i1) && (SrcVT !=  MVT::i8) &&
4415        (SrcVT !=  MVT::i16) && (SrcVT !=  MVT::i32)))
4416     return 0;
4417 
4418   unsigned Opc;
4419   unsigned Imm = 0;
4420 
4421   switch (SrcVT.SimpleTy) {
4422   default:
4423     return 0;
4424   case MVT::i1:
4425     return emiti1Ext(SrcReg, DestVT, IsZExt);
4426   case MVT::i8:
4427     if (DestVT == MVT::i64)
4428       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4429     else
4430       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4431     Imm = 7;
4432     break;
4433   case MVT::i16:
4434     if (DestVT == MVT::i64)
4435       Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4436     else
4437       Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4438     Imm = 15;
4439     break;
4440   case MVT::i32:
4441     assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4442     Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4443     Imm = 31;
4444     break;
4445   }
4446 
4447   // Handle i8 and i16 as i32.
4448   if (DestVT == MVT::i8 || DestVT == MVT::i16)
4449     DestVT = MVT::i32;
4450   else if (DestVT == MVT::i64) {
4451     Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4452     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4453             TII.get(AArch64::SUBREG_TO_REG), Src64)
4454         .addImm(0)
4455         .addReg(SrcReg)
4456         .addImm(AArch64::sub_32);
4457     SrcReg = Src64;
4458   }
4459 
4460   const TargetRegisterClass *RC =
4461       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4462   return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4463 }
4464 
isZExtLoad(const MachineInstr * LI)4465 static bool isZExtLoad(const MachineInstr *LI) {
4466   switch (LI->getOpcode()) {
4467   default:
4468     return false;
4469   case AArch64::LDURBBi:
4470   case AArch64::LDURHHi:
4471   case AArch64::LDURWi:
4472   case AArch64::LDRBBui:
4473   case AArch64::LDRHHui:
4474   case AArch64::LDRWui:
4475   case AArch64::LDRBBroX:
4476   case AArch64::LDRHHroX:
4477   case AArch64::LDRWroX:
4478   case AArch64::LDRBBroW:
4479   case AArch64::LDRHHroW:
4480   case AArch64::LDRWroW:
4481     return true;
4482   }
4483 }
4484 
isSExtLoad(const MachineInstr * LI)4485 static bool isSExtLoad(const MachineInstr *LI) {
4486   switch (LI->getOpcode()) {
4487   default:
4488     return false;
4489   case AArch64::LDURSBWi:
4490   case AArch64::LDURSHWi:
4491   case AArch64::LDURSBXi:
4492   case AArch64::LDURSHXi:
4493   case AArch64::LDURSWi:
4494   case AArch64::LDRSBWui:
4495   case AArch64::LDRSHWui:
4496   case AArch64::LDRSBXui:
4497   case AArch64::LDRSHXui:
4498   case AArch64::LDRSWui:
4499   case AArch64::LDRSBWroX:
4500   case AArch64::LDRSHWroX:
4501   case AArch64::LDRSBXroX:
4502   case AArch64::LDRSHXroX:
4503   case AArch64::LDRSWroX:
4504   case AArch64::LDRSBWroW:
4505   case AArch64::LDRSHWroW:
4506   case AArch64::LDRSBXroW:
4507   case AArch64::LDRSHXroW:
4508   case AArch64::LDRSWroW:
4509     return true;
4510   }
4511 }
4512 
optimizeIntExtLoad(const Instruction * I,MVT RetVT,MVT SrcVT)4513 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4514                                          MVT SrcVT) {
4515   const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4516   if (!LI || !LI->hasOneUse())
4517     return false;
4518 
4519   // Check if the load instruction has already been selected.
4520   Register Reg = lookUpRegForValue(LI);
4521   if (!Reg)
4522     return false;
4523 
4524   MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4525   if (!MI)
4526     return false;
4527 
4528   // Check if the correct load instruction has been emitted - SelectionDAG might
4529   // have emitted a zero-extending load, but we need a sign-extending load.
4530   bool IsZExt = isa<ZExtInst>(I);
4531   const auto *LoadMI = MI;
4532   if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4533       LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4534     Register LoadReg = MI->getOperand(1).getReg();
4535     LoadMI = MRI.getUniqueVRegDef(LoadReg);
4536     assert(LoadMI && "Expected valid instruction");
4537   }
4538   if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4539     return false;
4540 
4541   // Nothing to be done.
4542   if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4543     updateValueMap(I, Reg);
4544     return true;
4545   }
4546 
4547   if (IsZExt) {
4548     Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4549     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4550             TII.get(AArch64::SUBREG_TO_REG), Reg64)
4551         .addImm(0)
4552         .addReg(Reg, getKillRegState(true))
4553         .addImm(AArch64::sub_32);
4554     Reg = Reg64;
4555   } else {
4556     assert((MI->getOpcode() == TargetOpcode::COPY &&
4557             MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4558            "Expected copy instruction");
4559     Reg = MI->getOperand(1).getReg();
4560     MachineBasicBlock::iterator I(MI);
4561     removeDeadCode(I, std::next(I));
4562   }
4563   updateValueMap(I, Reg);
4564   return true;
4565 }
4566 
selectIntExt(const Instruction * I)4567 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4568   assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4569          "Unexpected integer extend instruction.");
4570   MVT RetVT;
4571   MVT SrcVT;
4572   if (!isTypeSupported(I->getType(), RetVT))
4573     return false;
4574 
4575   if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4576     return false;
4577 
4578   // Try to optimize already sign-/zero-extended values from load instructions.
4579   if (optimizeIntExtLoad(I, RetVT, SrcVT))
4580     return true;
4581 
4582   Register SrcReg = getRegForValue(I->getOperand(0));
4583   if (!SrcReg)
4584     return false;
4585 
4586   // Try to optimize already sign-/zero-extended values from function arguments.
4587   bool IsZExt = isa<ZExtInst>(I);
4588   if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4589     if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4590       if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4591         Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4592         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4593                 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4594             .addImm(0)
4595             .addReg(SrcReg)
4596             .addImm(AArch64::sub_32);
4597         SrcReg = ResultReg;
4598       }
4599 
4600       updateValueMap(I, SrcReg);
4601       return true;
4602     }
4603   }
4604 
4605   unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4606   if (!ResultReg)
4607     return false;
4608 
4609   updateValueMap(I, ResultReg);
4610   return true;
4611 }
4612 
selectRem(const Instruction * I,unsigned ISDOpcode)4613 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4614   EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4615   if (!DestEVT.isSimple())
4616     return false;
4617 
4618   MVT DestVT = DestEVT.getSimpleVT();
4619   if (DestVT != MVT::i64 && DestVT != MVT::i32)
4620     return false;
4621 
4622   unsigned DivOpc;
4623   bool Is64bit = (DestVT == MVT::i64);
4624   switch (ISDOpcode) {
4625   default:
4626     return false;
4627   case ISD::SREM:
4628     DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4629     break;
4630   case ISD::UREM:
4631     DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4632     break;
4633   }
4634   unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4635   Register Src0Reg = getRegForValue(I->getOperand(0));
4636   if (!Src0Reg)
4637     return false;
4638 
4639   Register Src1Reg = getRegForValue(I->getOperand(1));
4640   if (!Src1Reg)
4641     return false;
4642 
4643   const TargetRegisterClass *RC =
4644       (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4645   Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4646   assert(QuotReg && "Unexpected DIV instruction emission failure.");
4647   // The remainder is computed as numerator - (quotient * denominator) using the
4648   // MSUB instruction.
4649   Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4650   updateValueMap(I, ResultReg);
4651   return true;
4652 }
4653 
selectMul(const Instruction * I)4654 bool AArch64FastISel::selectMul(const Instruction *I) {
4655   MVT VT;
4656   if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4657     return false;
4658 
4659   if (VT.isVector())
4660     return selectBinaryOp(I, ISD::MUL);
4661 
4662   const Value *Src0 = I->getOperand(0);
4663   const Value *Src1 = I->getOperand(1);
4664   if (const auto *C = dyn_cast<ConstantInt>(Src0))
4665     if (C->getValue().isPowerOf2())
4666       std::swap(Src0, Src1);
4667 
4668   // Try to simplify to a shift instruction.
4669   if (const auto *C = dyn_cast<ConstantInt>(Src1))
4670     if (C->getValue().isPowerOf2()) {
4671       uint64_t ShiftVal = C->getValue().logBase2();
4672       MVT SrcVT = VT;
4673       bool IsZExt = true;
4674       if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4675         if (!isIntExtFree(ZExt)) {
4676           MVT VT;
4677           if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4678             SrcVT = VT;
4679             IsZExt = true;
4680             Src0 = ZExt->getOperand(0);
4681           }
4682         }
4683       } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4684         if (!isIntExtFree(SExt)) {
4685           MVT VT;
4686           if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4687             SrcVT = VT;
4688             IsZExt = false;
4689             Src0 = SExt->getOperand(0);
4690           }
4691         }
4692       }
4693 
4694       Register Src0Reg = getRegForValue(Src0);
4695       if (!Src0Reg)
4696         return false;
4697 
4698       unsigned ResultReg =
4699           emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4700 
4701       if (ResultReg) {
4702         updateValueMap(I, ResultReg);
4703         return true;
4704       }
4705     }
4706 
4707   Register Src0Reg = getRegForValue(I->getOperand(0));
4708   if (!Src0Reg)
4709     return false;
4710 
4711   Register Src1Reg = getRegForValue(I->getOperand(1));
4712   if (!Src1Reg)
4713     return false;
4714 
4715   unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4716 
4717   if (!ResultReg)
4718     return false;
4719 
4720   updateValueMap(I, ResultReg);
4721   return true;
4722 }
4723 
selectShift(const Instruction * I)4724 bool AArch64FastISel::selectShift(const Instruction *I) {
4725   MVT RetVT;
4726   if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4727     return false;
4728 
4729   if (RetVT.isVector())
4730     return selectOperator(I, I->getOpcode());
4731 
4732   if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4733     unsigned ResultReg = 0;
4734     uint64_t ShiftVal = C->getZExtValue();
4735     MVT SrcVT = RetVT;
4736     bool IsZExt = I->getOpcode() != Instruction::AShr;
4737     const Value *Op0 = I->getOperand(0);
4738     if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4739       if (!isIntExtFree(ZExt)) {
4740         MVT TmpVT;
4741         if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4742           SrcVT = TmpVT;
4743           IsZExt = true;
4744           Op0 = ZExt->getOperand(0);
4745         }
4746       }
4747     } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4748       if (!isIntExtFree(SExt)) {
4749         MVT TmpVT;
4750         if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4751           SrcVT = TmpVT;
4752           IsZExt = false;
4753           Op0 = SExt->getOperand(0);
4754         }
4755       }
4756     }
4757 
4758     Register Op0Reg = getRegForValue(Op0);
4759     if (!Op0Reg)
4760       return false;
4761 
4762     switch (I->getOpcode()) {
4763     default: llvm_unreachable("Unexpected instruction.");
4764     case Instruction::Shl:
4765       ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4766       break;
4767     case Instruction::AShr:
4768       ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4769       break;
4770     case Instruction::LShr:
4771       ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4772       break;
4773     }
4774     if (!ResultReg)
4775       return false;
4776 
4777     updateValueMap(I, ResultReg);
4778     return true;
4779   }
4780 
4781   Register Op0Reg = getRegForValue(I->getOperand(0));
4782   if (!Op0Reg)
4783     return false;
4784 
4785   Register Op1Reg = getRegForValue(I->getOperand(1));
4786   if (!Op1Reg)
4787     return false;
4788 
4789   unsigned ResultReg = 0;
4790   switch (I->getOpcode()) {
4791   default: llvm_unreachable("Unexpected instruction.");
4792   case Instruction::Shl:
4793     ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4794     break;
4795   case Instruction::AShr:
4796     ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4797     break;
4798   case Instruction::LShr:
4799     ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4800     break;
4801   }
4802 
4803   if (!ResultReg)
4804     return false;
4805 
4806   updateValueMap(I, ResultReg);
4807   return true;
4808 }
4809 
selectBitCast(const Instruction * I)4810 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4811   MVT RetVT, SrcVT;
4812 
4813   if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4814     return false;
4815   if (!isTypeLegal(I->getType(), RetVT))
4816     return false;
4817 
4818   unsigned Opc;
4819   if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4820     Opc = AArch64::FMOVWSr;
4821   else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4822     Opc = AArch64::FMOVXDr;
4823   else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4824     Opc = AArch64::FMOVSWr;
4825   else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4826     Opc = AArch64::FMOVDXr;
4827   else
4828     return false;
4829 
4830   const TargetRegisterClass *RC = nullptr;
4831   switch (RetVT.SimpleTy) {
4832   default: llvm_unreachable("Unexpected value type.");
4833   case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4834   case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4835   case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4836   case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4837   }
4838   Register Op0Reg = getRegForValue(I->getOperand(0));
4839   if (!Op0Reg)
4840     return false;
4841 
4842   Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4843   if (!ResultReg)
4844     return false;
4845 
4846   updateValueMap(I, ResultReg);
4847   return true;
4848 }
4849 
selectFRem(const Instruction * I)4850 bool AArch64FastISel::selectFRem(const Instruction *I) {
4851   MVT RetVT;
4852   if (!isTypeLegal(I->getType(), RetVT))
4853     return false;
4854 
4855   RTLIB::Libcall LC;
4856   switch (RetVT.SimpleTy) {
4857   default:
4858     return false;
4859   case MVT::f32:
4860     LC = RTLIB::REM_F32;
4861     break;
4862   case MVT::f64:
4863     LC = RTLIB::REM_F64;
4864     break;
4865   }
4866 
4867   ArgListTy Args;
4868   Args.reserve(I->getNumOperands());
4869 
4870   // Populate the argument list.
4871   for (auto &Arg : I->operands()) {
4872     ArgListEntry Entry;
4873     Entry.Val = Arg;
4874     Entry.Ty = Arg->getType();
4875     Args.push_back(Entry);
4876   }
4877 
4878   CallLoweringInfo CLI;
4879   MCContext &Ctx = MF->getContext();
4880   CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4881                 TLI.getLibcallName(LC), std::move(Args));
4882   if (!lowerCallTo(CLI))
4883     return false;
4884   updateValueMap(I, CLI.ResultReg);
4885   return true;
4886 }
4887 
selectSDiv(const Instruction * I)4888 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4889   MVT VT;
4890   if (!isTypeLegal(I->getType(), VT))
4891     return false;
4892 
4893   if (!isa<ConstantInt>(I->getOperand(1)))
4894     return selectBinaryOp(I, ISD::SDIV);
4895 
4896   const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4897   if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4898       !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4899     return selectBinaryOp(I, ISD::SDIV);
4900 
4901   unsigned Lg2 = C.countr_zero();
4902   Register Src0Reg = getRegForValue(I->getOperand(0));
4903   if (!Src0Reg)
4904     return false;
4905 
4906   if (cast<BinaryOperator>(I)->isExact()) {
4907     unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4908     if (!ResultReg)
4909       return false;
4910     updateValueMap(I, ResultReg);
4911     return true;
4912   }
4913 
4914   int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4915   unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4916   if (!AddReg)
4917     return false;
4918 
4919   // (Src0 < 0) ? Pow2 - 1 : 0;
4920   if (!emitICmp_ri(VT, Src0Reg, 0))
4921     return false;
4922 
4923   unsigned SelectOpc;
4924   const TargetRegisterClass *RC;
4925   if (VT == MVT::i64) {
4926     SelectOpc = AArch64::CSELXr;
4927     RC = &AArch64::GPR64RegClass;
4928   } else {
4929     SelectOpc = AArch64::CSELWr;
4930     RC = &AArch64::GPR32RegClass;
4931   }
4932   Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4933                                         AArch64CC::LT);
4934   if (!SelectReg)
4935     return false;
4936 
4937   // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4938   // negate the result.
4939   unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4940   unsigned ResultReg;
4941   if (C.isNegative())
4942     ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4943                               AArch64_AM::ASR, Lg2);
4944   else
4945     ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4946 
4947   if (!ResultReg)
4948     return false;
4949 
4950   updateValueMap(I, ResultReg);
4951   return true;
4952 }
4953 
4954 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4955 /// have to duplicate it for AArch64, because otherwise we would fail during the
4956 /// sign-extend emission.
getRegForGEPIndex(const Value * Idx)4957 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4958   Register IdxN = getRegForValue(Idx);
4959   if (IdxN == 0)
4960     // Unhandled operand. Halt "fast" selection and bail.
4961     return 0;
4962 
4963   // If the index is smaller or larger than intptr_t, truncate or extend it.
4964   MVT PtrVT = TLI.getPointerTy(DL);
4965   EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4966   if (IdxVT.bitsLT(PtrVT)) {
4967     IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4968   } else if (IdxVT.bitsGT(PtrVT))
4969     llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4970   return IdxN;
4971 }
4972 
4973 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4974 /// duplicate it for AArch64, because otherwise we would bail out even for
4975 /// simple cases. This is because the standard fastEmit functions don't cover
4976 /// MUL at all and ADD is lowered very inefficientily.
selectGetElementPtr(const Instruction * I)4977 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4978   if (Subtarget->isTargetILP32())
4979     return false;
4980 
4981   Register N = getRegForValue(I->getOperand(0));
4982   if (!N)
4983     return false;
4984 
4985   // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4986   // into a single N = N + TotalOffset.
4987   uint64_t TotalOffs = 0;
4988   MVT VT = TLI.getPointerTy(DL);
4989   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4990        GTI != E; ++GTI) {
4991     const Value *Idx = GTI.getOperand();
4992     if (auto *StTy = GTI.getStructTypeOrNull()) {
4993       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4994       // N = N + Offset
4995       if (Field)
4996         TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4997     } else {
4998       // If this is a constant subscript, handle it quickly.
4999       if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
5000         if (CI->isZero())
5001           continue;
5002         // N = N + Offset
5003         TotalOffs += GTI.getSequentialElementStride(DL) *
5004                      cast<ConstantInt>(CI)->getSExtValue();
5005         continue;
5006       }
5007       if (TotalOffs) {
5008         N = emitAdd_ri_(VT, N, TotalOffs);
5009         if (!N)
5010           return false;
5011         TotalOffs = 0;
5012       }
5013 
5014       // N = N + Idx * ElementSize;
5015       uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5016       unsigned IdxN = getRegForGEPIndex(Idx);
5017       if (!IdxN)
5018         return false;
5019 
5020       if (ElementSize != 1) {
5021         unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5022         if (!C)
5023           return false;
5024         IdxN = emitMul_rr(VT, IdxN, C);
5025         if (!IdxN)
5026           return false;
5027       }
5028       N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5029       if (!N)
5030         return false;
5031     }
5032   }
5033   if (TotalOffs) {
5034     N = emitAdd_ri_(VT, N, TotalOffs);
5035     if (!N)
5036       return false;
5037   }
5038   updateValueMap(I, N);
5039   return true;
5040 }
5041 
selectAtomicCmpXchg(const AtomicCmpXchgInst * I)5042 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5043   assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5044          "cmpxchg survived AtomicExpand at optlevel > -O0");
5045 
5046   auto *RetPairTy = cast<StructType>(I->getType());
5047   Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5048   assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5049          "cmpxchg has a non-i1 status result");
5050 
5051   MVT VT;
5052   if (!isTypeLegal(RetTy, VT))
5053     return false;
5054 
5055   const TargetRegisterClass *ResRC;
5056   unsigned Opc, CmpOpc;
5057   // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5058   // extractvalue selection doesn't support that.
5059   if (VT == MVT::i32) {
5060     Opc = AArch64::CMP_SWAP_32;
5061     CmpOpc = AArch64::SUBSWrs;
5062     ResRC = &AArch64::GPR32RegClass;
5063   } else if (VT == MVT::i64) {
5064     Opc = AArch64::CMP_SWAP_64;
5065     CmpOpc = AArch64::SUBSXrs;
5066     ResRC = &AArch64::GPR64RegClass;
5067   } else {
5068     return false;
5069   }
5070 
5071   const MCInstrDesc &II = TII.get(Opc);
5072 
5073   const Register AddrReg = constrainOperandRegClass(
5074       II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5075   const Register DesiredReg = constrainOperandRegClass(
5076       II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5077   const Register NewReg = constrainOperandRegClass(
5078       II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5079 
5080   const Register ResultReg1 = createResultReg(ResRC);
5081   const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5082   const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5083 
5084   // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5085   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5086       .addDef(ResultReg1)
5087       .addDef(ScratchReg)
5088       .addUse(AddrReg)
5089       .addUse(DesiredReg)
5090       .addUse(NewReg);
5091 
5092   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5093       .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5094       .addUse(ResultReg1)
5095       .addUse(DesiredReg)
5096       .addImm(0);
5097 
5098   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5099       .addDef(ResultReg2)
5100       .addUse(AArch64::WZR)
5101       .addUse(AArch64::WZR)
5102       .addImm(AArch64CC::NE);
5103 
5104   assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5105   updateValueMap(I, ResultReg1, 2);
5106   return true;
5107 }
5108 
fastSelectInstruction(const Instruction * I)5109 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5110   if (TLI.fallBackToDAGISel(*I))
5111     return false;
5112   switch (I->getOpcode()) {
5113   default:
5114     break;
5115   case Instruction::Add:
5116   case Instruction::Sub:
5117     return selectAddSub(I);
5118   case Instruction::Mul:
5119     return selectMul(I);
5120   case Instruction::SDiv:
5121     return selectSDiv(I);
5122   case Instruction::SRem:
5123     if (!selectBinaryOp(I, ISD::SREM))
5124       return selectRem(I, ISD::SREM);
5125     return true;
5126   case Instruction::URem:
5127     if (!selectBinaryOp(I, ISD::UREM))
5128       return selectRem(I, ISD::UREM);
5129     return true;
5130   case Instruction::Shl:
5131   case Instruction::LShr:
5132   case Instruction::AShr:
5133     return selectShift(I);
5134   case Instruction::And:
5135   case Instruction::Or:
5136   case Instruction::Xor:
5137     return selectLogicalOp(I);
5138   case Instruction::Br:
5139     return selectBranch(I);
5140   case Instruction::IndirectBr:
5141     return selectIndirectBr(I);
5142   case Instruction::BitCast:
5143     if (!FastISel::selectBitCast(I))
5144       return selectBitCast(I);
5145     return true;
5146   case Instruction::FPToSI:
5147     if (!selectCast(I, ISD::FP_TO_SINT))
5148       return selectFPToInt(I, /*Signed=*/true);
5149     return true;
5150   case Instruction::FPToUI:
5151     return selectFPToInt(I, /*Signed=*/false);
5152   case Instruction::ZExt:
5153   case Instruction::SExt:
5154     return selectIntExt(I);
5155   case Instruction::Trunc:
5156     if (!selectCast(I, ISD::TRUNCATE))
5157       return selectTrunc(I);
5158     return true;
5159   case Instruction::FPExt:
5160     return selectFPExt(I);
5161   case Instruction::FPTrunc:
5162     return selectFPTrunc(I);
5163   case Instruction::SIToFP:
5164     if (!selectCast(I, ISD::SINT_TO_FP))
5165       return selectIntToFP(I, /*Signed=*/true);
5166     return true;
5167   case Instruction::UIToFP:
5168     return selectIntToFP(I, /*Signed=*/false);
5169   case Instruction::Load:
5170     return selectLoad(I);
5171   case Instruction::Store:
5172     return selectStore(I);
5173   case Instruction::FCmp:
5174   case Instruction::ICmp:
5175     return selectCmp(I);
5176   case Instruction::Select:
5177     return selectSelect(I);
5178   case Instruction::Ret:
5179     return selectRet(I);
5180   case Instruction::FRem:
5181     return selectFRem(I);
5182   case Instruction::GetElementPtr:
5183     return selectGetElementPtr(I);
5184   case Instruction::AtomicCmpXchg:
5185     return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5186   }
5187 
5188   // fall-back to target-independent instruction selection.
5189   return selectOperator(I, I->getOpcode());
5190 }
5191 
createFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)5192 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5193                                         const TargetLibraryInfo *LibInfo) {
5194 
5195   SMEAttrs CallerAttrs(*FuncInfo.Fn);
5196   if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5197       CallerAttrs.hasStreamingInterfaceOrBody() ||
5198       CallerAttrs.hasStreamingCompatibleInterface())
5199     return nullptr;
5200   return new AArch64FastISel(FuncInfo, LibInfo);
5201 }
5202