1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the AArch64-specific support for the FastISel class. Some
10 // of the target-specific code is generated by tablegen in the file
11 // AArch64GenFastISel.inc, which is #included here.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "AArch64.h"
16 #include "AArch64CallingConvention.h"
17 #include "AArch64MachineFunctionInfo.h"
18 #include "AArch64RegisterInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/APFloat.h"
23 #include "llvm/ADT/APInt.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/Analysis/BranchProbabilityInfo.h"
27 #include "llvm/CodeGen/CallingConvLower.h"
28 #include "llvm/CodeGen/FastISel.h"
29 #include "llvm/CodeGen/FunctionLoweringInfo.h"
30 #include "llvm/CodeGen/ISDOpcodes.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineConstantPool.h"
33 #include "llvm/CodeGen/MachineFrameInfo.h"
34 #include "llvm/CodeGen/MachineInstr.h"
35 #include "llvm/CodeGen/MachineInstrBuilder.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineRegisterInfo.h"
38 #include "llvm/CodeGen/RuntimeLibcallUtil.h"
39 #include "llvm/CodeGen/ValueTypes.h"
40 #include "llvm/CodeGenTypes/MachineValueType.h"
41 #include "llvm/IR/Argument.h"
42 #include "llvm/IR/Attributes.h"
43 #include "llvm/IR/BasicBlock.h"
44 #include "llvm/IR/CallingConv.h"
45 #include "llvm/IR/Constant.h"
46 #include "llvm/IR/Constants.h"
47 #include "llvm/IR/DataLayout.h"
48 #include "llvm/IR/DerivedTypes.h"
49 #include "llvm/IR/Function.h"
50 #include "llvm/IR/GetElementPtrTypeIterator.h"
51 #include "llvm/IR/GlobalValue.h"
52 #include "llvm/IR/InstrTypes.h"
53 #include "llvm/IR/Instruction.h"
54 #include "llvm/IR/Instructions.h"
55 #include "llvm/IR/IntrinsicInst.h"
56 #include "llvm/IR/Intrinsics.h"
57 #include "llvm/IR/IntrinsicsAArch64.h"
58 #include "llvm/IR/Module.h"
59 #include "llvm/IR/Operator.h"
60 #include "llvm/IR/Type.h"
61 #include "llvm/IR/User.h"
62 #include "llvm/IR/Value.h"
63 #include "llvm/MC/MCInstrDesc.h"
64 #include "llvm/MC/MCRegisterInfo.h"
65 #include "llvm/MC/MCSymbol.h"
66 #include "llvm/Support/AtomicOrdering.h"
67 #include "llvm/Support/Casting.h"
68 #include "llvm/Support/CodeGen.h"
69 #include "llvm/Support/Compiler.h"
70 #include "llvm/Support/ErrorHandling.h"
71 #include "llvm/Support/MathExtras.h"
72 #include <algorithm>
73 #include <cassert>
74 #include <cstdint>
75 #include <iterator>
76 #include <utility>
77
78 using namespace llvm;
79
80 namespace {
81
82 class AArch64FastISel final : public FastISel {
83 class Address {
84 public:
85 using BaseKind = enum {
86 RegBase,
87 FrameIndexBase
88 };
89
90 private:
91 BaseKind Kind = RegBase;
92 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend;
93 union {
94 unsigned Reg;
95 int FI;
96 } Base;
97 unsigned OffsetReg = 0;
98 unsigned Shift = 0;
99 int64_t Offset = 0;
100 const GlobalValue *GV = nullptr;
101
102 public:
Address()103 Address() { Base.Reg = 0; }
104
setKind(BaseKind K)105 void setKind(BaseKind K) { Kind = K; }
getKind() const106 BaseKind getKind() const { return Kind; }
setExtendType(AArch64_AM::ShiftExtendType E)107 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; }
getExtendType() const108 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; }
isRegBase() const109 bool isRegBase() const { return Kind == RegBase; }
isFIBase() const110 bool isFIBase() const { return Kind == FrameIndexBase; }
111
setReg(unsigned Reg)112 void setReg(unsigned Reg) {
113 assert(isRegBase() && "Invalid base register access!");
114 Base.Reg = Reg;
115 }
116
getReg() const117 unsigned getReg() const {
118 assert(isRegBase() && "Invalid base register access!");
119 return Base.Reg;
120 }
121
setOffsetReg(unsigned Reg)122 void setOffsetReg(unsigned Reg) {
123 OffsetReg = Reg;
124 }
125
getOffsetReg() const126 unsigned getOffsetReg() const {
127 return OffsetReg;
128 }
129
setFI(unsigned FI)130 void setFI(unsigned FI) {
131 assert(isFIBase() && "Invalid base frame index access!");
132 Base.FI = FI;
133 }
134
getFI() const135 unsigned getFI() const {
136 assert(isFIBase() && "Invalid base frame index access!");
137 return Base.FI;
138 }
139
setOffset(int64_t O)140 void setOffset(int64_t O) { Offset = O; }
getOffset()141 int64_t getOffset() { return Offset; }
setShift(unsigned S)142 void setShift(unsigned S) { Shift = S; }
getShift()143 unsigned getShift() { return Shift; }
144
setGlobalValue(const GlobalValue * G)145 void setGlobalValue(const GlobalValue *G) { GV = G; }
getGlobalValue()146 const GlobalValue *getGlobalValue() { return GV; }
147 };
148
149 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
150 /// make the right decision when generating code for different targets.
151 const AArch64Subtarget *Subtarget;
152 LLVMContext *Context;
153
154 bool fastLowerArguments() override;
155 bool fastLowerCall(CallLoweringInfo &CLI) override;
156 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
157
158 private:
159 // Selection routines.
160 bool selectAddSub(const Instruction *I);
161 bool selectLogicalOp(const Instruction *I);
162 bool selectLoad(const Instruction *I);
163 bool selectStore(const Instruction *I);
164 bool selectBranch(const Instruction *I);
165 bool selectIndirectBr(const Instruction *I);
166 bool selectCmp(const Instruction *I);
167 bool selectSelect(const Instruction *I);
168 bool selectFPExt(const Instruction *I);
169 bool selectFPTrunc(const Instruction *I);
170 bool selectFPToInt(const Instruction *I, bool Signed);
171 bool selectIntToFP(const Instruction *I, bool Signed);
172 bool selectRem(const Instruction *I, unsigned ISDOpcode);
173 bool selectRet(const Instruction *I);
174 bool selectTrunc(const Instruction *I);
175 bool selectIntExt(const Instruction *I);
176 bool selectMul(const Instruction *I);
177 bool selectShift(const Instruction *I);
178 bool selectBitCast(const Instruction *I);
179 bool selectFRem(const Instruction *I);
180 bool selectSDiv(const Instruction *I);
181 bool selectGetElementPtr(const Instruction *I);
182 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
183
184 // Utility helper routines.
185 bool isTypeLegal(Type *Ty, MVT &VT);
186 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false);
187 bool isValueAvailable(const Value *V) const;
188 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr);
189 bool computeCallAddress(const Value *V, Address &Addr);
190 bool simplifyAddress(Address &Addr, MVT VT);
191 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB,
192 MachineMemOperand::Flags Flags,
193 unsigned ScaleFactor, MachineMemOperand *MMO);
194 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment);
195 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
196 MaybeAlign Alignment);
197 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
198 const Value *Cond);
199 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT);
200 bool optimizeSelect(const SelectInst *SI);
201 unsigned getRegForGEPIndex(const Value *Idx);
202
203 // Emit helper routines.
204 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
205 const Value *RHS, bool SetFlags = false,
206 bool WantResult = true, bool IsZExt = false);
207 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
208 unsigned RHSReg, bool SetFlags = false,
209 bool WantResult = true);
210 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
211 uint64_t Imm, bool SetFlags = false,
212 bool WantResult = true);
213 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
214 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType,
215 uint64_t ShiftImm, bool SetFlags = false,
216 bool WantResult = true);
217 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
218 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType,
219 uint64_t ShiftImm, bool SetFlags = false,
220 bool WantResult = true);
221
222 // Emit functions.
223 bool emitCompareAndBranch(const BranchInst *BI);
224 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt);
225 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt);
226 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
227 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS);
228 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true,
229 MachineMemOperand *MMO = nullptr);
230 bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
231 MachineMemOperand *MMO = nullptr);
232 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
233 MachineMemOperand *MMO = nullptr);
234 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
235 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
236 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
237 bool SetFlags = false, bool WantResult = true,
238 bool IsZExt = false);
239 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm);
240 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
241 bool SetFlags = false, bool WantResult = true,
242 bool IsZExt = false);
243 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
244 bool WantResult = true);
245 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg,
246 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm,
247 bool WantResult = true);
248 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS,
249 const Value *RHS);
250 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
251 uint64_t Imm);
252 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg,
253 unsigned RHSReg, uint64_t ShiftImm);
254 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm);
255 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1);
256 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
257 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1);
258 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
259 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
260 bool IsZExt = true);
261 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
262 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
263 bool IsZExt = true);
264 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg);
265 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm,
266 bool IsZExt = false);
267
268 unsigned materializeInt(const ConstantInt *CI, MVT VT);
269 unsigned materializeFP(const ConstantFP *CFP, MVT VT);
270 unsigned materializeGV(const GlobalValue *GV);
271
272 // Call handling routines.
273 private:
274 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const;
275 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
276 unsigned &NumBytes);
277 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes);
278
279 public:
280 // Backend specific FastISel code.
281 unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
282 unsigned fastMaterializeConstant(const Constant *C) override;
283 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override;
284
AArch64FastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)285 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo,
286 const TargetLibraryInfo *LibInfo)
287 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) {
288 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>();
289 Context = &FuncInfo.Fn->getContext();
290 }
291
292 bool fastSelectInstruction(const Instruction *I) override;
293
294 #include "AArch64GenFastISel.inc"
295 };
296
297 } // end anonymous namespace
298
299 /// Check if the sign-/zero-extend will be a noop.
isIntExtFree(const Instruction * I)300 static bool isIntExtFree(const Instruction *I) {
301 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
302 "Unexpected integer extend instruction.");
303 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() &&
304 "Unexpected value type.");
305 bool IsZExt = isa<ZExtInst>(I);
306
307 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)))
308 if (LI->hasOneUse())
309 return true;
310
311 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0)))
312 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr()))
313 return true;
314
315 return false;
316 }
317
318 /// Determine the implicit scale factor that is applied by a memory
319 /// operation for a given value type.
getImplicitScaleFactor(MVT VT)320 static unsigned getImplicitScaleFactor(MVT VT) {
321 switch (VT.SimpleTy) {
322 default:
323 return 0; // invalid
324 case MVT::i1: // fall-through
325 case MVT::i8:
326 return 1;
327 case MVT::i16:
328 return 2;
329 case MVT::i32: // fall-through
330 case MVT::f32:
331 return 4;
332 case MVT::i64: // fall-through
333 case MVT::f64:
334 return 8;
335 }
336 }
337
CCAssignFnForCall(CallingConv::ID CC) const338 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const {
339 if (CC == CallingConv::GHC)
340 return CC_AArch64_GHC;
341 if (CC == CallingConv::CFGuard_Check)
342 return CC_AArch64_Win64_CFGuard_Check;
343 if (Subtarget->isTargetDarwin())
344 return CC_AArch64_DarwinPCS;
345 if (Subtarget->isTargetWindows())
346 return CC_AArch64_Win64PCS;
347 return CC_AArch64_AAPCS;
348 }
349
fastMaterializeAlloca(const AllocaInst * AI)350 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) {
351 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 &&
352 "Alloca should always return a pointer.");
353
354 // Don't handle dynamic allocas.
355 if (!FuncInfo.StaticAllocaMap.count(AI))
356 return 0;
357
358 DenseMap<const AllocaInst *, int>::iterator SI =
359 FuncInfo.StaticAllocaMap.find(AI);
360
361 if (SI != FuncInfo.StaticAllocaMap.end()) {
362 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
363 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
364 ResultReg)
365 .addFrameIndex(SI->second)
366 .addImm(0)
367 .addImm(0);
368 return ResultReg;
369 }
370
371 return 0;
372 }
373
materializeInt(const ConstantInt * CI,MVT VT)374 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) {
375 if (VT > MVT::i64)
376 return 0;
377
378 if (!CI->isZero())
379 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
380
381 // Create a copy from the zero register to materialize a "0" value.
382 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass
383 : &AArch64::GPR32RegClass;
384 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
385 Register ResultReg = createResultReg(RC);
386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
387 ResultReg).addReg(ZeroReg, getKillRegState(true));
388 return ResultReg;
389 }
390
materializeFP(const ConstantFP * CFP,MVT VT)391 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) {
392 // Positive zero (+0.0) has to be materialized with a fmov from the zero
393 // register, because the immediate version of fmov cannot encode zero.
394 if (CFP->isNullValue())
395 return fastMaterializeFloatZero(CFP);
396
397 if (VT != MVT::f32 && VT != MVT::f64)
398 return 0;
399
400 const APFloat Val = CFP->getValueAPF();
401 bool Is64Bit = (VT == MVT::f64);
402 // This checks to see if we can use FMOV instructions to materialize
403 // a constant, otherwise we have to materialize via the constant pool.
404 int Imm =
405 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val);
406 if (Imm != -1) {
407 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi;
408 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
409 }
410
411 // For the large code model materialize the FP constant in code.
412 if (TM.getCodeModel() == CodeModel::Large) {
413 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm;
414 const TargetRegisterClass *RC = Is64Bit ?
415 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
416
417 Register TmpReg = createResultReg(RC);
418 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg)
419 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
420
421 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
423 TII.get(TargetOpcode::COPY), ResultReg)
424 .addReg(TmpReg, getKillRegState(true));
425
426 return ResultReg;
427 }
428
429 // Materialize via constant pool. MachineConstantPool wants an explicit
430 // alignment.
431 Align Alignment = DL.getPrefTypeAlign(CFP->getType());
432
433 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment);
434 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
435 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
436 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE);
437
438 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui;
439 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
440 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
441 .addReg(ADRPReg)
442 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
443 return ResultReg;
444 }
445
materializeGV(const GlobalValue * GV)446 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) {
447 // We can't handle thread-local variables quickly yet.
448 if (GV->isThreadLocal())
449 return 0;
450
451 // MachO still uses GOT for large code-model accesses, but ELF requires
452 // movz/movk sequences, which FastISel doesn't handle yet.
453 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO())
454 return 0;
455
456 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM);
457
458 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true);
459 if (!DestEVT.isSimple())
460 return 0;
461
462 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
463 unsigned ResultReg;
464
465 if (OpFlags & AArch64II::MO_GOT) {
466 // ADRP + LDRX
467 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
468 ADRPReg)
469 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
470
471 unsigned LdrOpc;
472 if (Subtarget->isTargetILP32()) {
473 ResultReg = createResultReg(&AArch64::GPR32RegClass);
474 LdrOpc = AArch64::LDRWui;
475 } else {
476 ResultReg = createResultReg(&AArch64::GPR64RegClass);
477 LdrOpc = AArch64::LDRXui;
478 }
479 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc),
480 ResultReg)
481 .addReg(ADRPReg)
482 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF |
483 AArch64II::MO_NC | OpFlags);
484 if (!Subtarget->isTargetILP32())
485 return ResultReg;
486
487 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits
488 // so we must extend the result on ILP32.
489 Register Result64 = createResultReg(&AArch64::GPR64RegClass);
490 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
491 TII.get(TargetOpcode::SUBREG_TO_REG))
492 .addDef(Result64)
493 .addImm(0)
494 .addReg(ResultReg, RegState::Kill)
495 .addImm(AArch64::sub_32);
496 return Result64;
497 } else {
498 // ADRP + ADDX
499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
500 ADRPReg)
501 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags);
502
503 if (OpFlags & AArch64II::MO_TAGGED) {
504 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
505 // We do so by creating a MOVK that sets bits 48-63 of the register to
506 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
507 // the small code model so we can assume a binary size of <= 4GB, which
508 // makes the untagged PC relative offset positive. The binary must also be
509 // loaded into address range [0, 2^48). Both of these properties need to
510 // be ensured at runtime when using tagged addresses.
511 //
512 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that
513 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands
514 // are not exactly 1:1 with FastISel so we cannot easily abstract this
515 // out. At some point, it would be nice to find a way to not have this
516 // duplciate code.
517 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass);
518 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi),
519 DstReg)
520 .addReg(ADRPReg)
521 .addGlobalAddress(GV, /*Offset=*/0x100000000,
522 AArch64II::MO_PREL | AArch64II::MO_G3)
523 .addImm(48);
524 ADRPReg = DstReg;
525 }
526
527 ResultReg = createResultReg(&AArch64::GPR64spRegClass);
528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
529 ResultReg)
530 .addReg(ADRPReg)
531 .addGlobalAddress(GV, 0,
532 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags)
533 .addImm(0);
534 }
535 return ResultReg;
536 }
537
fastMaterializeConstant(const Constant * C)538 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) {
539 EVT CEVT = TLI.getValueType(DL, C->getType(), true);
540
541 // Only handle simple types.
542 if (!CEVT.isSimple())
543 return 0;
544 MVT VT = CEVT.getSimpleVT();
545 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that,
546 // 'null' pointers need to have a somewhat special treatment.
547 if (isa<ConstantPointerNull>(C)) {
548 assert(VT == MVT::i64 && "Expected 64-bit pointers");
549 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT);
550 }
551
552 if (const auto *CI = dyn_cast<ConstantInt>(C))
553 return materializeInt(CI, VT);
554 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
555 return materializeFP(CFP, VT);
556 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
557 return materializeGV(GV);
558
559 return 0;
560 }
561
fastMaterializeFloatZero(const ConstantFP * CFP)562 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) {
563 assert(CFP->isNullValue() &&
564 "Floating-point constant is not a positive zero.");
565 MVT VT;
566 if (!isTypeLegal(CFP->getType(), VT))
567 return 0;
568
569 if (VT != MVT::f32 && VT != MVT::f64)
570 return 0;
571
572 bool Is64Bit = (VT == MVT::f64);
573 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
574 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr;
575 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg);
576 }
577
578 /// Check if the multiply is by a power-of-2 constant.
isMulPowOf2(const Value * I)579 static bool isMulPowOf2(const Value *I) {
580 if (const auto *MI = dyn_cast<MulOperator>(I)) {
581 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0)))
582 if (C->getValue().isPowerOf2())
583 return true;
584 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1)))
585 if (C->getValue().isPowerOf2())
586 return true;
587 }
588 return false;
589 }
590
591 // Computes the address to get to an object.
computeAddress(const Value * Obj,Address & Addr,Type * Ty)592 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
593 {
594 const User *U = nullptr;
595 unsigned Opcode = Instruction::UserOp1;
596 if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
597 // Don't walk into other basic blocks unless the object is an alloca from
598 // another block, otherwise it may not have a virtual register assigned.
599 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
600 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
601 Opcode = I->getOpcode();
602 U = I;
603 }
604 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
605 Opcode = C->getOpcode();
606 U = C;
607 }
608
609 if (auto *Ty = dyn_cast<PointerType>(Obj->getType()))
610 if (Ty->getAddressSpace() > 255)
611 // Fast instruction selection doesn't support the special
612 // address spaces.
613 return false;
614
615 switch (Opcode) {
616 default:
617 break;
618 case Instruction::BitCast:
619 // Look through bitcasts.
620 return computeAddress(U->getOperand(0), Addr, Ty);
621
622 case Instruction::IntToPtr:
623 // Look past no-op inttoptrs.
624 if (TLI.getValueType(DL, U->getOperand(0)->getType()) ==
625 TLI.getPointerTy(DL))
626 return computeAddress(U->getOperand(0), Addr, Ty);
627 break;
628
629 case Instruction::PtrToInt:
630 // Look past no-op ptrtoints.
631 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
632 return computeAddress(U->getOperand(0), Addr, Ty);
633 break;
634
635 case Instruction::GetElementPtr: {
636 Address SavedAddr = Addr;
637 uint64_t TmpOffset = Addr.getOffset();
638
639 // Iterate through the GEP folding the constants into offsets where
640 // we can.
641 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
642 GTI != E; ++GTI) {
643 const Value *Op = GTI.getOperand();
644 if (StructType *STy = GTI.getStructTypeOrNull()) {
645 const StructLayout *SL = DL.getStructLayout(STy);
646 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
647 TmpOffset += SL->getElementOffset(Idx);
648 } else {
649 uint64_t S = GTI.getSequentialElementStride(DL);
650 while (true) {
651 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
652 // Constant-offset addressing.
653 TmpOffset += CI->getSExtValue() * S;
654 break;
655 }
656 if (canFoldAddIntoGEP(U, Op)) {
657 // A compatible add with a constant operand. Fold the constant.
658 ConstantInt *CI =
659 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
660 TmpOffset += CI->getSExtValue() * S;
661 // Iterate on the other operand.
662 Op = cast<AddOperator>(Op)->getOperand(0);
663 continue;
664 }
665 // Unsupported
666 goto unsupported_gep;
667 }
668 }
669 }
670
671 // Try to grab the base operand now.
672 Addr.setOffset(TmpOffset);
673 if (computeAddress(U->getOperand(0), Addr, Ty))
674 return true;
675
676 // We failed, restore everything and try the other options.
677 Addr = SavedAddr;
678
679 unsupported_gep:
680 break;
681 }
682 case Instruction::Alloca: {
683 const AllocaInst *AI = cast<AllocaInst>(Obj);
684 DenseMap<const AllocaInst *, int>::iterator SI =
685 FuncInfo.StaticAllocaMap.find(AI);
686 if (SI != FuncInfo.StaticAllocaMap.end()) {
687 Addr.setKind(Address::FrameIndexBase);
688 Addr.setFI(SI->second);
689 return true;
690 }
691 break;
692 }
693 case Instruction::Add: {
694 // Adds of constants are common and easy enough.
695 const Value *LHS = U->getOperand(0);
696 const Value *RHS = U->getOperand(1);
697
698 if (isa<ConstantInt>(LHS))
699 std::swap(LHS, RHS);
700
701 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
702 Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
703 return computeAddress(LHS, Addr, Ty);
704 }
705
706 Address Backup = Addr;
707 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty))
708 return true;
709 Addr = Backup;
710
711 break;
712 }
713 case Instruction::Sub: {
714 // Subs of constants are common and easy enough.
715 const Value *LHS = U->getOperand(0);
716 const Value *RHS = U->getOperand(1);
717
718 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
719 Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
720 return computeAddress(LHS, Addr, Ty);
721 }
722 break;
723 }
724 case Instruction::Shl: {
725 if (Addr.getOffsetReg())
726 break;
727
728 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1));
729 if (!CI)
730 break;
731
732 unsigned Val = CI->getZExtValue();
733 if (Val < 1 || Val > 3)
734 break;
735
736 uint64_t NumBytes = 0;
737 if (Ty && Ty->isSized()) {
738 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
739 NumBytes = NumBits / 8;
740 if (!isPowerOf2_64(NumBits))
741 NumBytes = 0;
742 }
743
744 if (NumBytes != (1ULL << Val))
745 break;
746
747 Addr.setShift(Val);
748 Addr.setExtendType(AArch64_AM::LSL);
749
750 const Value *Src = U->getOperand(0);
751 if (const auto *I = dyn_cast<Instruction>(Src)) {
752 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
753 // Fold the zext or sext when it won't become a noop.
754 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
755 if (!isIntExtFree(ZE) &&
756 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
757 Addr.setExtendType(AArch64_AM::UXTW);
758 Src = ZE->getOperand(0);
759 }
760 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
761 if (!isIntExtFree(SE) &&
762 SE->getOperand(0)->getType()->isIntegerTy(32)) {
763 Addr.setExtendType(AArch64_AM::SXTW);
764 Src = SE->getOperand(0);
765 }
766 }
767 }
768 }
769
770 if (const auto *AI = dyn_cast<BinaryOperator>(Src))
771 if (AI->getOpcode() == Instruction::And) {
772 const Value *LHS = AI->getOperand(0);
773 const Value *RHS = AI->getOperand(1);
774
775 if (const auto *C = dyn_cast<ConstantInt>(LHS))
776 if (C->getValue() == 0xffffffff)
777 std::swap(LHS, RHS);
778
779 if (const auto *C = dyn_cast<ConstantInt>(RHS))
780 if (C->getValue() == 0xffffffff) {
781 Addr.setExtendType(AArch64_AM::UXTW);
782 Register Reg = getRegForValue(LHS);
783 if (!Reg)
784 return false;
785 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
786 Addr.setOffsetReg(Reg);
787 return true;
788 }
789 }
790
791 Register Reg = getRegForValue(Src);
792 if (!Reg)
793 return false;
794 Addr.setOffsetReg(Reg);
795 return true;
796 }
797 case Instruction::Mul: {
798 if (Addr.getOffsetReg())
799 break;
800
801 if (!isMulPowOf2(U))
802 break;
803
804 const Value *LHS = U->getOperand(0);
805 const Value *RHS = U->getOperand(1);
806
807 // Canonicalize power-of-2 value to the RHS.
808 if (const auto *C = dyn_cast<ConstantInt>(LHS))
809 if (C->getValue().isPowerOf2())
810 std::swap(LHS, RHS);
811
812 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt.");
813 const auto *C = cast<ConstantInt>(RHS);
814 unsigned Val = C->getValue().logBase2();
815 if (Val < 1 || Val > 3)
816 break;
817
818 uint64_t NumBytes = 0;
819 if (Ty && Ty->isSized()) {
820 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
821 NumBytes = NumBits / 8;
822 if (!isPowerOf2_64(NumBits))
823 NumBytes = 0;
824 }
825
826 if (NumBytes != (1ULL << Val))
827 break;
828
829 Addr.setShift(Val);
830 Addr.setExtendType(AArch64_AM::LSL);
831
832 const Value *Src = LHS;
833 if (const auto *I = dyn_cast<Instruction>(Src)) {
834 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
835 // Fold the zext or sext when it won't become a noop.
836 if (const auto *ZE = dyn_cast<ZExtInst>(I)) {
837 if (!isIntExtFree(ZE) &&
838 ZE->getOperand(0)->getType()->isIntegerTy(32)) {
839 Addr.setExtendType(AArch64_AM::UXTW);
840 Src = ZE->getOperand(0);
841 }
842 } else if (const auto *SE = dyn_cast<SExtInst>(I)) {
843 if (!isIntExtFree(SE) &&
844 SE->getOperand(0)->getType()->isIntegerTy(32)) {
845 Addr.setExtendType(AArch64_AM::SXTW);
846 Src = SE->getOperand(0);
847 }
848 }
849 }
850 }
851
852 Register Reg = getRegForValue(Src);
853 if (!Reg)
854 return false;
855 Addr.setOffsetReg(Reg);
856 return true;
857 }
858 case Instruction::And: {
859 if (Addr.getOffsetReg())
860 break;
861
862 if (!Ty || DL.getTypeSizeInBits(Ty) != 8)
863 break;
864
865 const Value *LHS = U->getOperand(0);
866 const Value *RHS = U->getOperand(1);
867
868 if (const auto *C = dyn_cast<ConstantInt>(LHS))
869 if (C->getValue() == 0xffffffff)
870 std::swap(LHS, RHS);
871
872 if (const auto *C = dyn_cast<ConstantInt>(RHS))
873 if (C->getValue() == 0xffffffff) {
874 Addr.setShift(0);
875 Addr.setExtendType(AArch64_AM::LSL);
876 Addr.setExtendType(AArch64_AM::UXTW);
877
878 Register Reg = getRegForValue(LHS);
879 if (!Reg)
880 return false;
881 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32);
882 Addr.setOffsetReg(Reg);
883 return true;
884 }
885 break;
886 }
887 case Instruction::SExt:
888 case Instruction::ZExt: {
889 if (!Addr.getReg() || Addr.getOffsetReg())
890 break;
891
892 const Value *Src = nullptr;
893 // Fold the zext or sext when it won't become a noop.
894 if (const auto *ZE = dyn_cast<ZExtInst>(U)) {
895 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) {
896 Addr.setExtendType(AArch64_AM::UXTW);
897 Src = ZE->getOperand(0);
898 }
899 } else if (const auto *SE = dyn_cast<SExtInst>(U)) {
900 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) {
901 Addr.setExtendType(AArch64_AM::SXTW);
902 Src = SE->getOperand(0);
903 }
904 }
905
906 if (!Src)
907 break;
908
909 Addr.setShift(0);
910 Register Reg = getRegForValue(Src);
911 if (!Reg)
912 return false;
913 Addr.setOffsetReg(Reg);
914 return true;
915 }
916 } // end switch
917
918 if (Addr.isRegBase() && !Addr.getReg()) {
919 Register Reg = getRegForValue(Obj);
920 if (!Reg)
921 return false;
922 Addr.setReg(Reg);
923 return true;
924 }
925
926 if (!Addr.getOffsetReg()) {
927 Register Reg = getRegForValue(Obj);
928 if (!Reg)
929 return false;
930 Addr.setOffsetReg(Reg);
931 return true;
932 }
933
934 return false;
935 }
936
computeCallAddress(const Value * V,Address & Addr)937 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) {
938 const User *U = nullptr;
939 unsigned Opcode = Instruction::UserOp1;
940 bool InMBB = true;
941
942 if (const auto *I = dyn_cast<Instruction>(V)) {
943 Opcode = I->getOpcode();
944 U = I;
945 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
946 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) {
947 Opcode = C->getOpcode();
948 U = C;
949 }
950
951 switch (Opcode) {
952 default: break;
953 case Instruction::BitCast:
954 // Look past bitcasts if its operand is in the same BB.
955 if (InMBB)
956 return computeCallAddress(U->getOperand(0), Addr);
957 break;
958 case Instruction::IntToPtr:
959 // Look past no-op inttoptrs if its operand is in the same BB.
960 if (InMBB &&
961 TLI.getValueType(DL, U->getOperand(0)->getType()) ==
962 TLI.getPointerTy(DL))
963 return computeCallAddress(U->getOperand(0), Addr);
964 break;
965 case Instruction::PtrToInt:
966 // Look past no-op ptrtoints if its operand is in the same BB.
967 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL))
968 return computeCallAddress(U->getOperand(0), Addr);
969 break;
970 }
971
972 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
973 Addr.setGlobalValue(GV);
974 return true;
975 }
976
977 // If all else fails, try to materialize the value in a register.
978 if (!Addr.getGlobalValue()) {
979 Addr.setReg(getRegForValue(V));
980 return Addr.getReg() != 0;
981 }
982
983 return false;
984 }
985
isTypeLegal(Type * Ty,MVT & VT)986 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) {
987 EVT evt = TLI.getValueType(DL, Ty, true);
988
989 if (Subtarget->isTargetILP32() && Ty->isPointerTy())
990 return false;
991
992 // Only handle simple types.
993 if (evt == MVT::Other || !evt.isSimple())
994 return false;
995 VT = evt.getSimpleVT();
996
997 // This is a legal type, but it's not something we handle in fast-isel.
998 if (VT == MVT::f128)
999 return false;
1000
1001 // Handle all other legal types, i.e. a register that will directly hold this
1002 // value.
1003 return TLI.isTypeLegal(VT);
1004 }
1005
1006 /// Determine if the value type is supported by FastISel.
1007 ///
1008 /// FastISel for AArch64 can handle more value types than are legal. This adds
1009 /// simple value type such as i1, i8, and i16.
isTypeSupported(Type * Ty,MVT & VT,bool IsVectorAllowed)1010 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) {
1011 if (Ty->isVectorTy() && !IsVectorAllowed)
1012 return false;
1013
1014 if (isTypeLegal(Ty, VT))
1015 return true;
1016
1017 // If this is a type than can be sign or zero-extended to a basic operation
1018 // go ahead and accept it now.
1019 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
1020 return true;
1021
1022 return false;
1023 }
1024
isValueAvailable(const Value * V) const1025 bool AArch64FastISel::isValueAvailable(const Value *V) const {
1026 if (!isa<Instruction>(V))
1027 return true;
1028
1029 const auto *I = cast<Instruction>(V);
1030 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB;
1031 }
1032
simplifyAddress(Address & Addr,MVT VT)1033 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) {
1034 if (Subtarget->isTargetILP32())
1035 return false;
1036
1037 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1038 if (!ScaleFactor)
1039 return false;
1040
1041 bool ImmediateOffsetNeedsLowering = false;
1042 bool RegisterOffsetNeedsLowering = false;
1043 int64_t Offset = Addr.getOffset();
1044 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset))
1045 ImmediateOffsetNeedsLowering = true;
1046 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) &&
1047 !isUInt<12>(Offset / ScaleFactor))
1048 ImmediateOffsetNeedsLowering = true;
1049
1050 // Cannot encode an offset register and an immediate offset in the same
1051 // instruction. Fold the immediate offset into the load/store instruction and
1052 // emit an additional add to take care of the offset register.
1053 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg())
1054 RegisterOffsetNeedsLowering = true;
1055
1056 // Cannot encode zero register as base.
1057 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg())
1058 RegisterOffsetNeedsLowering = true;
1059
1060 // If this is a stack pointer and the offset needs to be simplified then put
1061 // the alloca address into a register, set the base type back to register and
1062 // continue. This should almost never happen.
1063 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase())
1064 {
1065 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
1066 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri),
1067 ResultReg)
1068 .addFrameIndex(Addr.getFI())
1069 .addImm(0)
1070 .addImm(0);
1071 Addr.setKind(Address::RegBase);
1072 Addr.setReg(ResultReg);
1073 }
1074
1075 if (RegisterOffsetNeedsLowering) {
1076 unsigned ResultReg = 0;
1077 if (Addr.getReg()) {
1078 if (Addr.getExtendType() == AArch64_AM::SXTW ||
1079 Addr.getExtendType() == AArch64_AM::UXTW )
1080 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1081 Addr.getOffsetReg(), Addr.getExtendType(),
1082 Addr.getShift());
1083 else
1084 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(),
1085 Addr.getOffsetReg(), AArch64_AM::LSL,
1086 Addr.getShift());
1087 } else {
1088 if (Addr.getExtendType() == AArch64_AM::UXTW)
1089 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1090 Addr.getShift(), /*IsZExt=*/true);
1091 else if (Addr.getExtendType() == AArch64_AM::SXTW)
1092 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(),
1093 Addr.getShift(), /*IsZExt=*/false);
1094 else
1095 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(),
1096 Addr.getShift());
1097 }
1098 if (!ResultReg)
1099 return false;
1100
1101 Addr.setReg(ResultReg);
1102 Addr.setOffsetReg(0);
1103 Addr.setShift(0);
1104 Addr.setExtendType(AArch64_AM::InvalidShiftExtend);
1105 }
1106
1107 // Since the offset is too large for the load/store instruction get the
1108 // reg+offset into a register.
1109 if (ImmediateOffsetNeedsLowering) {
1110 unsigned ResultReg;
1111 if (Addr.getReg())
1112 // Try to fold the immediate into the add instruction.
1113 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset);
1114 else
1115 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset);
1116
1117 if (!ResultReg)
1118 return false;
1119 Addr.setReg(ResultReg);
1120 Addr.setOffset(0);
1121 }
1122 return true;
1123 }
1124
addLoadStoreOperands(Address & Addr,const MachineInstrBuilder & MIB,MachineMemOperand::Flags Flags,unsigned ScaleFactor,MachineMemOperand * MMO)1125 void AArch64FastISel::addLoadStoreOperands(Address &Addr,
1126 const MachineInstrBuilder &MIB,
1127 MachineMemOperand::Flags Flags,
1128 unsigned ScaleFactor,
1129 MachineMemOperand *MMO) {
1130 int64_t Offset = Addr.getOffset() / ScaleFactor;
1131 // Frame base works a bit differently. Handle it separately.
1132 if (Addr.isFIBase()) {
1133 int FI = Addr.getFI();
1134 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size
1135 // and alignment should be based on the VT.
1136 MMO = FuncInfo.MF->getMachineMemOperand(
1137 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags,
1138 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
1139 // Now add the rest of the operands.
1140 MIB.addFrameIndex(FI).addImm(Offset);
1141 } else {
1142 assert(Addr.isRegBase() && "Unexpected address kind.");
1143 const MCInstrDesc &II = MIB->getDesc();
1144 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0;
1145 Addr.setReg(
1146 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx));
1147 Addr.setOffsetReg(
1148 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1));
1149 if (Addr.getOffsetReg()) {
1150 assert(Addr.getOffset() == 0 && "Unexpected offset");
1151 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW ||
1152 Addr.getExtendType() == AArch64_AM::SXTX;
1153 MIB.addReg(Addr.getReg());
1154 MIB.addReg(Addr.getOffsetReg());
1155 MIB.addImm(IsSigned);
1156 MIB.addImm(Addr.getShift() != 0);
1157 } else
1158 MIB.addReg(Addr.getReg()).addImm(Offset);
1159 }
1160
1161 if (MMO)
1162 MIB.addMemOperand(MMO);
1163 }
1164
emitAddSub(bool UseAdd,MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1165 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS,
1166 const Value *RHS, bool SetFlags,
1167 bool WantResult, bool IsZExt) {
1168 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend;
1169 bool NeedExtend = false;
1170 switch (RetVT.SimpleTy) {
1171 default:
1172 return 0;
1173 case MVT::i1:
1174 NeedExtend = true;
1175 break;
1176 case MVT::i8:
1177 NeedExtend = true;
1178 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB;
1179 break;
1180 case MVT::i16:
1181 NeedExtend = true;
1182 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH;
1183 break;
1184 case MVT::i32: // fall-through
1185 case MVT::i64:
1186 break;
1187 }
1188 MVT SrcVT = RetVT;
1189 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32);
1190
1191 // Canonicalize immediates to the RHS first.
1192 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS))
1193 std::swap(LHS, RHS);
1194
1195 // Canonicalize mul by power of 2 to the RHS.
1196 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1197 if (isMulPowOf2(LHS))
1198 std::swap(LHS, RHS);
1199
1200 // Canonicalize shift immediate to the RHS.
1201 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS))
1202 if (const auto *SI = dyn_cast<BinaryOperator>(LHS))
1203 if (isa<ConstantInt>(SI->getOperand(1)))
1204 if (SI->getOpcode() == Instruction::Shl ||
1205 SI->getOpcode() == Instruction::LShr ||
1206 SI->getOpcode() == Instruction::AShr )
1207 std::swap(LHS, RHS);
1208
1209 Register LHSReg = getRegForValue(LHS);
1210 if (!LHSReg)
1211 return 0;
1212
1213 if (NeedExtend)
1214 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt);
1215
1216 unsigned ResultReg = 0;
1217 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1218 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue();
1219 if (C->isNegative())
1220 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags,
1221 WantResult);
1222 else
1223 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags,
1224 WantResult);
1225 } else if (const auto *C = dyn_cast<Constant>(RHS))
1226 if (C->isNullValue())
1227 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult);
1228
1229 if (ResultReg)
1230 return ResultReg;
1231
1232 // Only extend the RHS within the instruction if there is a valid extend type.
1233 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() &&
1234 isValueAvailable(RHS)) {
1235 Register RHSReg = getRegForValue(RHS);
1236 if (!RHSReg)
1237 return 0;
1238 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0,
1239 SetFlags, WantResult);
1240 }
1241
1242 // Check if the mul can be folded into the instruction.
1243 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1244 if (isMulPowOf2(RHS)) {
1245 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1246 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1247
1248 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1249 if (C->getValue().isPowerOf2())
1250 std::swap(MulLHS, MulRHS);
1251
1252 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1253 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1254 Register RHSReg = getRegForValue(MulLHS);
1255 if (!RHSReg)
1256 return 0;
1257 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL,
1258 ShiftVal, SetFlags, WantResult);
1259 if (ResultReg)
1260 return ResultReg;
1261 }
1262 }
1263
1264 // Check if the shift can be folded into the instruction.
1265 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1266 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) {
1267 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1268 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend;
1269 switch (SI->getOpcode()) {
1270 default: break;
1271 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break;
1272 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break;
1273 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break;
1274 }
1275 uint64_t ShiftVal = C->getZExtValue();
1276 if (ShiftType != AArch64_AM::InvalidShiftExtend) {
1277 Register RHSReg = getRegForValue(SI->getOperand(0));
1278 if (!RHSReg)
1279 return 0;
1280 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType,
1281 ShiftVal, SetFlags, WantResult);
1282 if (ResultReg)
1283 return ResultReg;
1284 }
1285 }
1286 }
1287 }
1288
1289 Register RHSReg = getRegForValue(RHS);
1290 if (!RHSReg)
1291 return 0;
1292
1293 if (NeedExtend)
1294 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt);
1295
1296 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult);
1297 }
1298
emitAddSub_rr(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,bool SetFlags,bool WantResult)1299 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg,
1300 unsigned RHSReg, bool SetFlags,
1301 bool WantResult) {
1302 assert(LHSReg && RHSReg && "Invalid register number.");
1303
1304 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP ||
1305 RHSReg == AArch64::SP || RHSReg == AArch64::WSP)
1306 return 0;
1307
1308 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1309 return 0;
1310
1311 static const unsigned OpcTable[2][2][2] = {
1312 { { AArch64::SUBWrr, AArch64::SUBXrr },
1313 { AArch64::ADDWrr, AArch64::ADDXrr } },
1314 { { AArch64::SUBSWrr, AArch64::SUBSXrr },
1315 { AArch64::ADDSWrr, AArch64::ADDSXrr } }
1316 };
1317 bool Is64Bit = RetVT == MVT::i64;
1318 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1319 const TargetRegisterClass *RC =
1320 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1321 unsigned ResultReg;
1322 if (WantResult)
1323 ResultReg = createResultReg(RC);
1324 else
1325 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1326
1327 const MCInstrDesc &II = TII.get(Opc);
1328 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1329 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1330 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1331 .addReg(LHSReg)
1332 .addReg(RHSReg);
1333 return ResultReg;
1334 }
1335
emitAddSub_ri(bool UseAdd,MVT RetVT,unsigned LHSReg,uint64_t Imm,bool SetFlags,bool WantResult)1336 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg,
1337 uint64_t Imm, bool SetFlags,
1338 bool WantResult) {
1339 assert(LHSReg && "Invalid register number.");
1340
1341 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1342 return 0;
1343
1344 unsigned ShiftImm;
1345 if (isUInt<12>(Imm))
1346 ShiftImm = 0;
1347 else if ((Imm & 0xfff000) == Imm) {
1348 ShiftImm = 12;
1349 Imm >>= 12;
1350 } else
1351 return 0;
1352
1353 static const unsigned OpcTable[2][2][2] = {
1354 { { AArch64::SUBWri, AArch64::SUBXri },
1355 { AArch64::ADDWri, AArch64::ADDXri } },
1356 { { AArch64::SUBSWri, AArch64::SUBSXri },
1357 { AArch64::ADDSWri, AArch64::ADDSXri } }
1358 };
1359 bool Is64Bit = RetVT == MVT::i64;
1360 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1361 const TargetRegisterClass *RC;
1362 if (SetFlags)
1363 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1364 else
1365 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1366 unsigned ResultReg;
1367 if (WantResult)
1368 ResultReg = createResultReg(RC);
1369 else
1370 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1371
1372 const MCInstrDesc &II = TII.get(Opc);
1373 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1374 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1375 .addReg(LHSReg)
1376 .addImm(Imm)
1377 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm));
1378 return ResultReg;
1379 }
1380
emitAddSub_rs(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1381 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg,
1382 unsigned RHSReg,
1383 AArch64_AM::ShiftExtendType ShiftType,
1384 uint64_t ShiftImm, bool SetFlags,
1385 bool WantResult) {
1386 assert(LHSReg && RHSReg && "Invalid register number.");
1387 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP &&
1388 RHSReg != AArch64::SP && RHSReg != AArch64::WSP);
1389
1390 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1391 return 0;
1392
1393 // Don't deal with undefined shifts.
1394 if (ShiftImm >= RetVT.getSizeInBits())
1395 return 0;
1396
1397 static const unsigned OpcTable[2][2][2] = {
1398 { { AArch64::SUBWrs, AArch64::SUBXrs },
1399 { AArch64::ADDWrs, AArch64::ADDXrs } },
1400 { { AArch64::SUBSWrs, AArch64::SUBSXrs },
1401 { AArch64::ADDSWrs, AArch64::ADDSXrs } }
1402 };
1403 bool Is64Bit = RetVT == MVT::i64;
1404 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1405 const TargetRegisterClass *RC =
1406 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1407 unsigned ResultReg;
1408 if (WantResult)
1409 ResultReg = createResultReg(RC);
1410 else
1411 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1412
1413 const MCInstrDesc &II = TII.get(Opc);
1414 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1415 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1416 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1417 .addReg(LHSReg)
1418 .addReg(RHSReg)
1419 .addImm(getShifterImm(ShiftType, ShiftImm));
1420 return ResultReg;
1421 }
1422
emitAddSub_rx(bool UseAdd,MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ExtType,uint64_t ShiftImm,bool SetFlags,bool WantResult)1423 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg,
1424 unsigned RHSReg,
1425 AArch64_AM::ShiftExtendType ExtType,
1426 uint64_t ShiftImm, bool SetFlags,
1427 bool WantResult) {
1428 assert(LHSReg && RHSReg && "Invalid register number.");
1429 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR &&
1430 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR);
1431
1432 if (RetVT != MVT::i32 && RetVT != MVT::i64)
1433 return 0;
1434
1435 if (ShiftImm >= 4)
1436 return 0;
1437
1438 static const unsigned OpcTable[2][2][2] = {
1439 { { AArch64::SUBWrx, AArch64::SUBXrx },
1440 { AArch64::ADDWrx, AArch64::ADDXrx } },
1441 { { AArch64::SUBSWrx, AArch64::SUBSXrx },
1442 { AArch64::ADDSWrx, AArch64::ADDSXrx } }
1443 };
1444 bool Is64Bit = RetVT == MVT::i64;
1445 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit];
1446 const TargetRegisterClass *RC = nullptr;
1447 if (SetFlags)
1448 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
1449 else
1450 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass;
1451 unsigned ResultReg;
1452 if (WantResult)
1453 ResultReg = createResultReg(RC);
1454 else
1455 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
1456
1457 const MCInstrDesc &II = TII.get(Opc);
1458 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs());
1459 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1);
1460 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg)
1461 .addReg(LHSReg)
1462 .addReg(RHSReg)
1463 .addImm(getArithExtendImm(ExtType, ShiftImm));
1464 return ResultReg;
1465 }
1466
emitCmp(const Value * LHS,const Value * RHS,bool IsZExt)1467 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) {
1468 Type *Ty = LHS->getType();
1469 EVT EVT = TLI.getValueType(DL, Ty, true);
1470 if (!EVT.isSimple())
1471 return false;
1472 MVT VT = EVT.getSimpleVT();
1473
1474 switch (VT.SimpleTy) {
1475 default:
1476 return false;
1477 case MVT::i1:
1478 case MVT::i8:
1479 case MVT::i16:
1480 case MVT::i32:
1481 case MVT::i64:
1482 return emitICmp(VT, LHS, RHS, IsZExt);
1483 case MVT::f32:
1484 case MVT::f64:
1485 return emitFCmp(VT, LHS, RHS);
1486 }
1487 }
1488
emitICmp(MVT RetVT,const Value * LHS,const Value * RHS,bool IsZExt)1489 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS,
1490 bool IsZExt) {
1491 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false,
1492 IsZExt) != 0;
1493 }
1494
emitICmp_ri(MVT RetVT,unsigned LHSReg,uint64_t Imm)1495 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) {
1496 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm,
1497 /*SetFlags=*/true, /*WantResult=*/false) != 0;
1498 }
1499
emitFCmp(MVT RetVT,const Value * LHS,const Value * RHS)1500 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) {
1501 if (RetVT != MVT::f32 && RetVT != MVT::f64)
1502 return false;
1503
1504 // Check to see if the 2nd operand is a constant that we can encode directly
1505 // in the compare.
1506 bool UseImm = false;
1507 if (const auto *CFP = dyn_cast<ConstantFP>(RHS))
1508 if (CFP->isZero() && !CFP->isNegative())
1509 UseImm = true;
1510
1511 Register LHSReg = getRegForValue(LHS);
1512 if (!LHSReg)
1513 return false;
1514
1515 if (UseImm) {
1516 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri;
1517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1518 .addReg(LHSReg);
1519 return true;
1520 }
1521
1522 Register RHSReg = getRegForValue(RHS);
1523 if (!RHSReg)
1524 return false;
1525
1526 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr;
1527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
1528 .addReg(LHSReg)
1529 .addReg(RHSReg);
1530 return true;
1531 }
1532
emitAdd(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1533 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
1534 bool SetFlags, bool WantResult, bool IsZExt) {
1535 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult,
1536 IsZExt);
1537 }
1538
1539 /// This method is a wrapper to simplify add emission.
1540 ///
1541 /// First try to emit an add with an immediate operand using emitAddSub_ri. If
1542 /// that fails, then try to materialize the immediate into a register and use
1543 /// emitAddSub_rr instead.
emitAdd_ri_(MVT VT,unsigned Op0,int64_t Imm)1544 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) {
1545 unsigned ResultReg;
1546 if (Imm < 0)
1547 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm);
1548 else
1549 ResultReg = emitAddSub_ri(true, VT, Op0, Imm);
1550
1551 if (ResultReg)
1552 return ResultReg;
1553
1554 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm);
1555 if (!CReg)
1556 return 0;
1557
1558 ResultReg = emitAddSub_rr(true, VT, Op0, CReg);
1559 return ResultReg;
1560 }
1561
emitSub(MVT RetVT,const Value * LHS,const Value * RHS,bool SetFlags,bool WantResult,bool IsZExt)1562 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS,
1563 bool SetFlags, bool WantResult, bool IsZExt) {
1564 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult,
1565 IsZExt);
1566 }
1567
emitSubs_rr(MVT RetVT,unsigned LHSReg,unsigned RHSReg,bool WantResult)1568 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg,
1569 unsigned RHSReg, bool WantResult) {
1570 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg,
1571 /*SetFlags=*/true, WantResult);
1572 }
1573
emitSubs_rs(MVT RetVT,unsigned LHSReg,unsigned RHSReg,AArch64_AM::ShiftExtendType ShiftType,uint64_t ShiftImm,bool WantResult)1574 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg,
1575 unsigned RHSReg,
1576 AArch64_AM::ShiftExtendType ShiftType,
1577 uint64_t ShiftImm, bool WantResult) {
1578 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType,
1579 ShiftImm, /*SetFlags=*/true, WantResult);
1580 }
1581
emitLogicalOp(unsigned ISDOpc,MVT RetVT,const Value * LHS,const Value * RHS)1582 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
1583 const Value *LHS, const Value *RHS) {
1584 // Canonicalize immediates to the RHS first.
1585 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS))
1586 std::swap(LHS, RHS);
1587
1588 // Canonicalize mul by power-of-2 to the RHS.
1589 if (LHS->hasOneUse() && isValueAvailable(LHS))
1590 if (isMulPowOf2(LHS))
1591 std::swap(LHS, RHS);
1592
1593 // Canonicalize shift immediate to the RHS.
1594 if (LHS->hasOneUse() && isValueAvailable(LHS))
1595 if (const auto *SI = dyn_cast<ShlOperator>(LHS))
1596 if (isa<ConstantInt>(SI->getOperand(1)))
1597 std::swap(LHS, RHS);
1598
1599 Register LHSReg = getRegForValue(LHS);
1600 if (!LHSReg)
1601 return 0;
1602
1603 unsigned ResultReg = 0;
1604 if (const auto *C = dyn_cast<ConstantInt>(RHS)) {
1605 uint64_t Imm = C->getZExtValue();
1606 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm);
1607 }
1608 if (ResultReg)
1609 return ResultReg;
1610
1611 // Check if the mul can be folded into the instruction.
1612 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1613 if (isMulPowOf2(RHS)) {
1614 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0);
1615 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1);
1616
1617 if (const auto *C = dyn_cast<ConstantInt>(MulLHS))
1618 if (C->getValue().isPowerOf2())
1619 std::swap(MulLHS, MulRHS);
1620
1621 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt.");
1622 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2();
1623
1624 Register RHSReg = getRegForValue(MulLHS);
1625 if (!RHSReg)
1626 return 0;
1627 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1628 if (ResultReg)
1629 return ResultReg;
1630 }
1631 }
1632
1633 // Check if the shift can be folded into the instruction.
1634 if (RHS->hasOneUse() && isValueAvailable(RHS)) {
1635 if (const auto *SI = dyn_cast<ShlOperator>(RHS))
1636 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) {
1637 uint64_t ShiftVal = C->getZExtValue();
1638 Register RHSReg = getRegForValue(SI->getOperand(0));
1639 if (!RHSReg)
1640 return 0;
1641 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal);
1642 if (ResultReg)
1643 return ResultReg;
1644 }
1645 }
1646
1647 Register RHSReg = getRegForValue(RHS);
1648 if (!RHSReg)
1649 return 0;
1650
1651 MVT VT = std::max(MVT::i32, RetVT.SimpleTy);
1652 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg);
1653 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1654 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1655 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1656 }
1657 return ResultReg;
1658 }
1659
emitLogicalOp_ri(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,uint64_t Imm)1660 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT,
1661 unsigned LHSReg, uint64_t Imm) {
1662 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1663 "ISD nodes are not consecutive!");
1664 static const unsigned OpcTable[3][2] = {
1665 { AArch64::ANDWri, AArch64::ANDXri },
1666 { AArch64::ORRWri, AArch64::ORRXri },
1667 { AArch64::EORWri, AArch64::EORXri }
1668 };
1669 const TargetRegisterClass *RC;
1670 unsigned Opc;
1671 unsigned RegSize;
1672 switch (RetVT.SimpleTy) {
1673 default:
1674 return 0;
1675 case MVT::i1:
1676 case MVT::i8:
1677 case MVT::i16:
1678 case MVT::i32: {
1679 unsigned Idx = ISDOpc - ISD::AND;
1680 Opc = OpcTable[Idx][0];
1681 RC = &AArch64::GPR32spRegClass;
1682 RegSize = 32;
1683 break;
1684 }
1685 case MVT::i64:
1686 Opc = OpcTable[ISDOpc - ISD::AND][1];
1687 RC = &AArch64::GPR64spRegClass;
1688 RegSize = 64;
1689 break;
1690 }
1691
1692 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize))
1693 return 0;
1694
1695 Register ResultReg =
1696 fastEmitInst_ri(Opc, RC, LHSReg,
1697 AArch64_AM::encodeLogicalImmediate(Imm, RegSize));
1698 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) {
1699 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1700 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1701 }
1702 return ResultReg;
1703 }
1704
emitLogicalOp_rs(unsigned ISDOpc,MVT RetVT,unsigned LHSReg,unsigned RHSReg,uint64_t ShiftImm)1705 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT,
1706 unsigned LHSReg, unsigned RHSReg,
1707 uint64_t ShiftImm) {
1708 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR),
1709 "ISD nodes are not consecutive!");
1710 static const unsigned OpcTable[3][2] = {
1711 { AArch64::ANDWrs, AArch64::ANDXrs },
1712 { AArch64::ORRWrs, AArch64::ORRXrs },
1713 { AArch64::EORWrs, AArch64::EORXrs }
1714 };
1715
1716 // Don't deal with undefined shifts.
1717 if (ShiftImm >= RetVT.getSizeInBits())
1718 return 0;
1719
1720 const TargetRegisterClass *RC;
1721 unsigned Opc;
1722 switch (RetVT.SimpleTy) {
1723 default:
1724 return 0;
1725 case MVT::i1:
1726 case MVT::i8:
1727 case MVT::i16:
1728 case MVT::i32:
1729 Opc = OpcTable[ISDOpc - ISD::AND][0];
1730 RC = &AArch64::GPR32RegClass;
1731 break;
1732 case MVT::i64:
1733 Opc = OpcTable[ISDOpc - ISD::AND][1];
1734 RC = &AArch64::GPR64RegClass;
1735 break;
1736 }
1737 Register ResultReg =
1738 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg,
1739 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm));
1740 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) {
1741 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff;
1742 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
1743 }
1744 return ResultReg;
1745 }
1746
emitAnd_ri(MVT RetVT,unsigned LHSReg,uint64_t Imm)1747 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg,
1748 uint64_t Imm) {
1749 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm);
1750 }
1751
emitLoad(MVT VT,MVT RetVT,Address Addr,bool WantZExt,MachineMemOperand * MMO)1752 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr,
1753 bool WantZExt, MachineMemOperand *MMO) {
1754 if (!TLI.allowsMisalignedMemoryAccesses(VT))
1755 return 0;
1756
1757 // Simplify this down to something we can handle.
1758 if (!simplifyAddress(Addr, VT))
1759 return 0;
1760
1761 unsigned ScaleFactor = getImplicitScaleFactor(VT);
1762 if (!ScaleFactor)
1763 llvm_unreachable("Unexpected value type.");
1764
1765 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
1766 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
1767 bool UseScaled = true;
1768 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
1769 UseScaled = false;
1770 ScaleFactor = 1;
1771 }
1772
1773 static const unsigned GPOpcTable[2][8][4] = {
1774 // Sign-extend.
1775 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi,
1776 AArch64::LDURXi },
1777 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi,
1778 AArch64::LDURXi },
1779 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui,
1780 AArch64::LDRXui },
1781 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui,
1782 AArch64::LDRXui },
1783 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX,
1784 AArch64::LDRXroX },
1785 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX,
1786 AArch64::LDRXroX },
1787 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW,
1788 AArch64::LDRXroW },
1789 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW,
1790 AArch64::LDRXroW }
1791 },
1792 // Zero-extend.
1793 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1794 AArch64::LDURXi },
1795 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi,
1796 AArch64::LDURXi },
1797 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1798 AArch64::LDRXui },
1799 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui,
1800 AArch64::LDRXui },
1801 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1802 AArch64::LDRXroX },
1803 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX,
1804 AArch64::LDRXroX },
1805 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1806 AArch64::LDRXroW },
1807 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW,
1808 AArch64::LDRXroW }
1809 }
1810 };
1811
1812 static const unsigned FPOpcTable[4][2] = {
1813 { AArch64::LDURSi, AArch64::LDURDi },
1814 { AArch64::LDRSui, AArch64::LDRDui },
1815 { AArch64::LDRSroX, AArch64::LDRDroX },
1816 { AArch64::LDRSroW, AArch64::LDRDroW }
1817 };
1818
1819 unsigned Opc;
1820 const TargetRegisterClass *RC;
1821 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
1822 Addr.getOffsetReg();
1823 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
1824 if (Addr.getExtendType() == AArch64_AM::UXTW ||
1825 Addr.getExtendType() == AArch64_AM::SXTW)
1826 Idx++;
1827
1828 bool IsRet64Bit = RetVT == MVT::i64;
1829 switch (VT.SimpleTy) {
1830 default:
1831 llvm_unreachable("Unexpected value type.");
1832 case MVT::i1: // Intentional fall-through.
1833 case MVT::i8:
1834 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0];
1835 RC = (IsRet64Bit && !WantZExt) ?
1836 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1837 break;
1838 case MVT::i16:
1839 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1];
1840 RC = (IsRet64Bit && !WantZExt) ?
1841 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1842 break;
1843 case MVT::i32:
1844 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2];
1845 RC = (IsRet64Bit && !WantZExt) ?
1846 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass;
1847 break;
1848 case MVT::i64:
1849 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3];
1850 RC = &AArch64::GPR64RegClass;
1851 break;
1852 case MVT::f32:
1853 Opc = FPOpcTable[Idx][0];
1854 RC = &AArch64::FPR32RegClass;
1855 break;
1856 case MVT::f64:
1857 Opc = FPOpcTable[Idx][1];
1858 RC = &AArch64::FPR64RegClass;
1859 break;
1860 }
1861
1862 // Create the base instruction, then add the operands.
1863 Register ResultReg = createResultReg(RC);
1864 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1865 TII.get(Opc), ResultReg);
1866 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO);
1867
1868 // Loading an i1 requires special handling.
1869 if (VT == MVT::i1) {
1870 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1);
1871 assert(ANDReg && "Unexpected AND instruction emission failure.");
1872 ResultReg = ANDReg;
1873 }
1874
1875 // For zero-extending loads to 64bit we emit a 32bit load and then convert
1876 // the 32bit reg to a 64bit reg.
1877 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) {
1878 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
1879 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
1880 TII.get(AArch64::SUBREG_TO_REG), Reg64)
1881 .addImm(0)
1882 .addReg(ResultReg, getKillRegState(true))
1883 .addImm(AArch64::sub_32);
1884 ResultReg = Reg64;
1885 }
1886 return ResultReg;
1887 }
1888
selectAddSub(const Instruction * I)1889 bool AArch64FastISel::selectAddSub(const Instruction *I) {
1890 MVT VT;
1891 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1892 return false;
1893
1894 if (VT.isVector())
1895 return selectOperator(I, I->getOpcode());
1896
1897 unsigned ResultReg;
1898 switch (I->getOpcode()) {
1899 default:
1900 llvm_unreachable("Unexpected instruction.");
1901 case Instruction::Add:
1902 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1));
1903 break;
1904 case Instruction::Sub:
1905 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1));
1906 break;
1907 }
1908 if (!ResultReg)
1909 return false;
1910
1911 updateValueMap(I, ResultReg);
1912 return true;
1913 }
1914
selectLogicalOp(const Instruction * I)1915 bool AArch64FastISel::selectLogicalOp(const Instruction *I) {
1916 MVT VT;
1917 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
1918 return false;
1919
1920 if (VT.isVector())
1921 return selectOperator(I, I->getOpcode());
1922
1923 unsigned ResultReg;
1924 switch (I->getOpcode()) {
1925 default:
1926 llvm_unreachable("Unexpected instruction.");
1927 case Instruction::And:
1928 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1));
1929 break;
1930 case Instruction::Or:
1931 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1));
1932 break;
1933 case Instruction::Xor:
1934 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1));
1935 break;
1936 }
1937 if (!ResultReg)
1938 return false;
1939
1940 updateValueMap(I, ResultReg);
1941 return true;
1942 }
1943
selectLoad(const Instruction * I)1944 bool AArch64FastISel::selectLoad(const Instruction *I) {
1945 MVT VT;
1946 // Verify we have a legal type before going any further. Currently, we handle
1947 // simple types that will directly fit in a register (i32/f32/i64/f64) or
1948 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
1949 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) ||
1950 cast<LoadInst>(I)->isAtomic())
1951 return false;
1952
1953 const Value *SV = I->getOperand(0);
1954 if (TLI.supportSwiftError()) {
1955 // Swifterror values can come from either a function parameter with
1956 // swifterror attribute or an alloca with swifterror attribute.
1957 if (const Argument *Arg = dyn_cast<Argument>(SV)) {
1958 if (Arg->hasSwiftErrorAttr())
1959 return false;
1960 }
1961
1962 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
1963 if (Alloca->isSwiftError())
1964 return false;
1965 }
1966 }
1967
1968 // See if we can handle this address.
1969 Address Addr;
1970 if (!computeAddress(I->getOperand(0), Addr, I->getType()))
1971 return false;
1972
1973 // Fold the following sign-/zero-extend into the load instruction.
1974 bool WantZExt = true;
1975 MVT RetVT = VT;
1976 const Value *IntExtVal = nullptr;
1977 if (I->hasOneUse()) {
1978 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) {
1979 if (isTypeSupported(ZE->getType(), RetVT))
1980 IntExtVal = ZE;
1981 else
1982 RetVT = VT;
1983 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) {
1984 if (isTypeSupported(SE->getType(), RetVT))
1985 IntExtVal = SE;
1986 else
1987 RetVT = VT;
1988 WantZExt = false;
1989 }
1990 }
1991
1992 unsigned ResultReg =
1993 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I));
1994 if (!ResultReg)
1995 return false;
1996
1997 // There are a few different cases we have to handle, because the load or the
1998 // sign-/zero-extend might not be selected by FastISel if we fall-back to
1999 // SelectionDAG. There is also an ordering issue when both instructions are in
2000 // different basic blocks.
2001 // 1.) The load instruction is selected by FastISel, but the integer extend
2002 // not. This usually happens when the integer extend is in a different
2003 // basic block and SelectionDAG took over for that basic block.
2004 // 2.) The load instruction is selected before the integer extend. This only
2005 // happens when the integer extend is in a different basic block.
2006 // 3.) The load instruction is selected by SelectionDAG and the integer extend
2007 // by FastISel. This happens if there are instructions between the load
2008 // and the integer extend that couldn't be selected by FastISel.
2009 if (IntExtVal) {
2010 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG
2011 // could select it. Emit a copy to subreg if necessary. FastISel will remove
2012 // it when it selects the integer extend.
2013 Register Reg = lookUpRegForValue(IntExtVal);
2014 auto *MI = MRI.getUniqueVRegDef(Reg);
2015 if (!MI) {
2016 if (RetVT == MVT::i64 && VT <= MVT::i32) {
2017 if (WantZExt) {
2018 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG).
2019 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt));
2020 ResultReg = std::prev(I)->getOperand(0).getReg();
2021 removeDeadCode(I, std::next(I));
2022 } else
2023 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg,
2024 AArch64::sub_32);
2025 }
2026 updateValueMap(I, ResultReg);
2027 return true;
2028 }
2029
2030 // The integer extend has already been emitted - delete all the instructions
2031 // that have been emitted by the integer extend lowering code and use the
2032 // result from the load instruction directly.
2033 while (MI) {
2034 Reg = 0;
2035 for (auto &Opnd : MI->uses()) {
2036 if (Opnd.isReg()) {
2037 Reg = Opnd.getReg();
2038 break;
2039 }
2040 }
2041 MachineBasicBlock::iterator I(MI);
2042 removeDeadCode(I, std::next(I));
2043 MI = nullptr;
2044 if (Reg)
2045 MI = MRI.getUniqueVRegDef(Reg);
2046 }
2047 updateValueMap(IntExtVal, ResultReg);
2048 return true;
2049 }
2050
2051 updateValueMap(I, ResultReg);
2052 return true;
2053 }
2054
emitStoreRelease(MVT VT,unsigned SrcReg,unsigned AddrReg,MachineMemOperand * MMO)2055 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
2056 unsigned AddrReg,
2057 MachineMemOperand *MMO) {
2058 unsigned Opc;
2059 switch (VT.SimpleTy) {
2060 default: return false;
2061 case MVT::i8: Opc = AArch64::STLRB; break;
2062 case MVT::i16: Opc = AArch64::STLRH; break;
2063 case MVT::i32: Opc = AArch64::STLRW; break;
2064 case MVT::i64: Opc = AArch64::STLRX; break;
2065 }
2066
2067 const MCInstrDesc &II = TII.get(Opc);
2068 SrcReg = constrainOperandRegClass(II, SrcReg, 0);
2069 AddrReg = constrainOperandRegClass(II, AddrReg, 1);
2070 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2071 .addReg(SrcReg)
2072 .addReg(AddrReg)
2073 .addMemOperand(MMO);
2074 return true;
2075 }
2076
emitStore(MVT VT,unsigned SrcReg,Address Addr,MachineMemOperand * MMO)2077 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
2078 MachineMemOperand *MMO) {
2079 if (!TLI.allowsMisalignedMemoryAccesses(VT))
2080 return false;
2081
2082 // Simplify this down to something we can handle.
2083 if (!simplifyAddress(Addr, VT))
2084 return false;
2085
2086 unsigned ScaleFactor = getImplicitScaleFactor(VT);
2087 if (!ScaleFactor)
2088 llvm_unreachable("Unexpected value type.");
2089
2090 // Negative offsets require unscaled, 9-bit, signed immediate offsets.
2091 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets.
2092 bool UseScaled = true;
2093 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) {
2094 UseScaled = false;
2095 ScaleFactor = 1;
2096 }
2097
2098 static const unsigned OpcTable[4][6] = {
2099 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi,
2100 AArch64::STURSi, AArch64::STURDi },
2101 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui,
2102 AArch64::STRSui, AArch64::STRDui },
2103 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX,
2104 AArch64::STRSroX, AArch64::STRDroX },
2105 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW,
2106 AArch64::STRSroW, AArch64::STRDroW }
2107 };
2108
2109 unsigned Opc;
2110 bool VTIsi1 = false;
2111 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() &&
2112 Addr.getOffsetReg();
2113 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0;
2114 if (Addr.getExtendType() == AArch64_AM::UXTW ||
2115 Addr.getExtendType() == AArch64_AM::SXTW)
2116 Idx++;
2117
2118 switch (VT.SimpleTy) {
2119 default: llvm_unreachable("Unexpected value type.");
2120 case MVT::i1: VTIsi1 = true; [[fallthrough]];
2121 case MVT::i8: Opc = OpcTable[Idx][0]; break;
2122 case MVT::i16: Opc = OpcTable[Idx][1]; break;
2123 case MVT::i32: Opc = OpcTable[Idx][2]; break;
2124 case MVT::i64: Opc = OpcTable[Idx][3]; break;
2125 case MVT::f32: Opc = OpcTable[Idx][4]; break;
2126 case MVT::f64: Opc = OpcTable[Idx][5]; break;
2127 }
2128
2129 // Storing an i1 requires special handling.
2130 if (VTIsi1 && SrcReg != AArch64::WZR) {
2131 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1);
2132 assert(ANDReg && "Unexpected AND instruction emission failure.");
2133 SrcReg = ANDReg;
2134 }
2135 // Create the base instruction, then add the operands.
2136 const MCInstrDesc &II = TII.get(Opc);
2137 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2138 MachineInstrBuilder MIB =
2139 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg);
2140 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO);
2141
2142 return true;
2143 }
2144
selectStore(const Instruction * I)2145 bool AArch64FastISel::selectStore(const Instruction *I) {
2146 MVT VT;
2147 const Value *Op0 = I->getOperand(0);
2148 // Verify we have a legal type before going any further. Currently, we handle
2149 // simple types that will directly fit in a register (i32/f32/i64/f64) or
2150 // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
2151 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
2152 return false;
2153
2154 const Value *PtrV = I->getOperand(1);
2155 if (TLI.supportSwiftError()) {
2156 // Swifterror values can come from either a function parameter with
2157 // swifterror attribute or an alloca with swifterror attribute.
2158 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
2159 if (Arg->hasSwiftErrorAttr())
2160 return false;
2161 }
2162
2163 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
2164 if (Alloca->isSwiftError())
2165 return false;
2166 }
2167 }
2168
2169 // Get the value to be stored into a register. Use the zero register directly
2170 // when possible to avoid an unnecessary copy and a wasted register.
2171 unsigned SrcReg = 0;
2172 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) {
2173 if (CI->isZero())
2174 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2175 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) {
2176 if (CF->isZero() && !CF->isNegative()) {
2177 VT = MVT::getIntegerVT(VT.getSizeInBits());
2178 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
2179 }
2180 }
2181
2182 if (!SrcReg)
2183 SrcReg = getRegForValue(Op0);
2184
2185 if (!SrcReg)
2186 return false;
2187
2188 auto *SI = cast<StoreInst>(I);
2189
2190 // Try to emit a STLR for seq_cst/release.
2191 if (SI->isAtomic()) {
2192 AtomicOrdering Ord = SI->getOrdering();
2193 // The non-atomic instructions are sufficient for relaxed stores.
2194 if (isReleaseOrStronger(Ord)) {
2195 // The STLR addressing mode only supports a base reg; pass that directly.
2196 Register AddrReg = getRegForValue(PtrV);
2197 return emitStoreRelease(VT, SrcReg, AddrReg,
2198 createMachineMemOperandFor(I));
2199 }
2200 }
2201
2202 // See if we can handle this address.
2203 Address Addr;
2204 if (!computeAddress(PtrV, Addr, Op0->getType()))
2205 return false;
2206
2207 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
2208 return false;
2209 return true;
2210 }
2211
getCompareCC(CmpInst::Predicate Pred)2212 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) {
2213 switch (Pred) {
2214 case CmpInst::FCMP_ONE:
2215 case CmpInst::FCMP_UEQ:
2216 default:
2217 // AL is our "false" for now. The other two need more compares.
2218 return AArch64CC::AL;
2219 case CmpInst::ICMP_EQ:
2220 case CmpInst::FCMP_OEQ:
2221 return AArch64CC::EQ;
2222 case CmpInst::ICMP_SGT:
2223 case CmpInst::FCMP_OGT:
2224 return AArch64CC::GT;
2225 case CmpInst::ICMP_SGE:
2226 case CmpInst::FCMP_OGE:
2227 return AArch64CC::GE;
2228 case CmpInst::ICMP_UGT:
2229 case CmpInst::FCMP_UGT:
2230 return AArch64CC::HI;
2231 case CmpInst::FCMP_OLT:
2232 return AArch64CC::MI;
2233 case CmpInst::ICMP_ULE:
2234 case CmpInst::FCMP_OLE:
2235 return AArch64CC::LS;
2236 case CmpInst::FCMP_ORD:
2237 return AArch64CC::VC;
2238 case CmpInst::FCMP_UNO:
2239 return AArch64CC::VS;
2240 case CmpInst::FCMP_UGE:
2241 return AArch64CC::PL;
2242 case CmpInst::ICMP_SLT:
2243 case CmpInst::FCMP_ULT:
2244 return AArch64CC::LT;
2245 case CmpInst::ICMP_SLE:
2246 case CmpInst::FCMP_ULE:
2247 return AArch64CC::LE;
2248 case CmpInst::FCMP_UNE:
2249 case CmpInst::ICMP_NE:
2250 return AArch64CC::NE;
2251 case CmpInst::ICMP_UGE:
2252 return AArch64CC::HS;
2253 case CmpInst::ICMP_ULT:
2254 return AArch64CC::LO;
2255 }
2256 }
2257
2258 /// Try to emit a combined compare-and-branch instruction.
emitCompareAndBranch(const BranchInst * BI)2259 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) {
2260 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
2261 // will not be produced, as they are conditional branch instructions that do
2262 // not set flags.
2263 if (FuncInfo.MF->getFunction().hasFnAttribute(
2264 Attribute::SpeculativeLoadHardening))
2265 return false;
2266
2267 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction");
2268 const CmpInst *CI = cast<CmpInst>(BI->getCondition());
2269 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2270
2271 const Value *LHS = CI->getOperand(0);
2272 const Value *RHS = CI->getOperand(1);
2273
2274 MVT VT;
2275 if (!isTypeSupported(LHS->getType(), VT))
2276 return false;
2277
2278 unsigned BW = VT.getSizeInBits();
2279 if (BW > 64)
2280 return false;
2281
2282 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2283 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2284
2285 // Try to take advantage of fallthrough opportunities.
2286 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2287 std::swap(TBB, FBB);
2288 Predicate = CmpInst::getInversePredicate(Predicate);
2289 }
2290
2291 int TestBit = -1;
2292 bool IsCmpNE;
2293 switch (Predicate) {
2294 default:
2295 return false;
2296 case CmpInst::ICMP_EQ:
2297 case CmpInst::ICMP_NE:
2298 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue())
2299 std::swap(LHS, RHS);
2300
2301 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2302 return false;
2303
2304 if (const auto *AI = dyn_cast<BinaryOperator>(LHS))
2305 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) {
2306 const Value *AndLHS = AI->getOperand(0);
2307 const Value *AndRHS = AI->getOperand(1);
2308
2309 if (const auto *C = dyn_cast<ConstantInt>(AndLHS))
2310 if (C->getValue().isPowerOf2())
2311 std::swap(AndLHS, AndRHS);
2312
2313 if (const auto *C = dyn_cast<ConstantInt>(AndRHS))
2314 if (C->getValue().isPowerOf2()) {
2315 TestBit = C->getValue().logBase2();
2316 LHS = AndLHS;
2317 }
2318 }
2319
2320 if (VT == MVT::i1)
2321 TestBit = 0;
2322
2323 IsCmpNE = Predicate == CmpInst::ICMP_NE;
2324 break;
2325 case CmpInst::ICMP_SLT:
2326 case CmpInst::ICMP_SGE:
2327 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue())
2328 return false;
2329
2330 TestBit = BW - 1;
2331 IsCmpNE = Predicate == CmpInst::ICMP_SLT;
2332 break;
2333 case CmpInst::ICMP_SGT:
2334 case CmpInst::ICMP_SLE:
2335 if (!isa<ConstantInt>(RHS))
2336 return false;
2337
2338 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true))
2339 return false;
2340
2341 TestBit = BW - 1;
2342 IsCmpNE = Predicate == CmpInst::ICMP_SLE;
2343 break;
2344 } // end switch
2345
2346 static const unsigned OpcTable[2][2][2] = {
2347 { {AArch64::CBZW, AArch64::CBZX },
2348 {AArch64::CBNZW, AArch64::CBNZX} },
2349 { {AArch64::TBZW, AArch64::TBZX },
2350 {AArch64::TBNZW, AArch64::TBNZX} }
2351 };
2352
2353 bool IsBitTest = TestBit != -1;
2354 bool Is64Bit = BW == 64;
2355 if (TestBit < 32 && TestBit >= 0)
2356 Is64Bit = false;
2357
2358 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit];
2359 const MCInstrDesc &II = TII.get(Opc);
2360
2361 Register SrcReg = getRegForValue(LHS);
2362 if (!SrcReg)
2363 return false;
2364
2365 if (BW == 64 && !Is64Bit)
2366 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32);
2367
2368 if ((BW < 32) && !IsBitTest)
2369 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true);
2370
2371 // Emit the combined compare and branch instruction.
2372 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs());
2373 MachineInstrBuilder MIB =
2374 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc))
2375 .addReg(SrcReg);
2376 if (IsBitTest)
2377 MIB.addImm(TestBit);
2378 MIB.addMBB(TBB);
2379
2380 finishCondBranch(BI->getParent(), TBB, FBB);
2381 return true;
2382 }
2383
selectBranch(const Instruction * I)2384 bool AArch64FastISel::selectBranch(const Instruction *I) {
2385 const BranchInst *BI = cast<BranchInst>(I);
2386 if (BI->isUnconditional()) {
2387 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)];
2388 fastEmitBranch(MSucc, BI->getDebugLoc());
2389 return true;
2390 }
2391
2392 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
2393 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
2394
2395 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
2396 if (CI->hasOneUse() && isValueAvailable(CI)) {
2397 // Try to optimize or fold the cmp.
2398 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2399 switch (Predicate) {
2400 default:
2401 break;
2402 case CmpInst::FCMP_FALSE:
2403 fastEmitBranch(FBB, MIMD.getDL());
2404 return true;
2405 case CmpInst::FCMP_TRUE:
2406 fastEmitBranch(TBB, MIMD.getDL());
2407 return true;
2408 }
2409
2410 // Try to emit a combined compare-and-branch first.
2411 if (emitCompareAndBranch(BI))
2412 return true;
2413
2414 // Try to take advantage of fallthrough opportunities.
2415 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2416 std::swap(TBB, FBB);
2417 Predicate = CmpInst::getInversePredicate(Predicate);
2418 }
2419
2420 // Emit the cmp.
2421 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2422 return false;
2423
2424 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch
2425 // instruction.
2426 AArch64CC::CondCode CC = getCompareCC(Predicate);
2427 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2428 switch (Predicate) {
2429 default:
2430 break;
2431 case CmpInst::FCMP_UEQ:
2432 ExtraCC = AArch64CC::EQ;
2433 CC = AArch64CC::VS;
2434 break;
2435 case CmpInst::FCMP_ONE:
2436 ExtraCC = AArch64CC::MI;
2437 CC = AArch64CC::GT;
2438 break;
2439 }
2440 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2441
2442 // Emit the extra branch for FCMP_UEQ and FCMP_ONE.
2443 if (ExtraCC != AArch64CC::AL) {
2444 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2445 .addImm(ExtraCC)
2446 .addMBB(TBB);
2447 }
2448
2449 // Emit the branch.
2450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2451 .addImm(CC)
2452 .addMBB(TBB);
2453
2454 finishCondBranch(BI->getParent(), TBB, FBB);
2455 return true;
2456 }
2457 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
2458 uint64_t Imm = CI->getZExtValue();
2459 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
2460 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B))
2461 .addMBB(Target);
2462
2463 // Obtain the branch probability and add the target to the successor list.
2464 if (FuncInfo.BPI) {
2465 auto BranchProbability = FuncInfo.BPI->getEdgeProbability(
2466 BI->getParent(), Target->getBasicBlock());
2467 FuncInfo.MBB->addSuccessor(Target, BranchProbability);
2468 } else
2469 FuncInfo.MBB->addSuccessorWithoutProb(Target);
2470 return true;
2471 } else {
2472 AArch64CC::CondCode CC = AArch64CC::NE;
2473 if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
2474 // Fake request the condition, otherwise the intrinsic might be completely
2475 // optimized away.
2476 Register CondReg = getRegForValue(BI->getCondition());
2477 if (!CondReg)
2478 return false;
2479
2480 // Emit the branch.
2481 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc))
2482 .addImm(CC)
2483 .addMBB(TBB);
2484
2485 finishCondBranch(BI->getParent(), TBB, FBB);
2486 return true;
2487 }
2488 }
2489
2490 Register CondReg = getRegForValue(BI->getCondition());
2491 if (CondReg == 0)
2492 return false;
2493
2494 // i1 conditions come as i32 values, test the lowest bit with tb(n)z.
2495 unsigned Opcode = AArch64::TBNZW;
2496 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
2497 std::swap(TBB, FBB);
2498 Opcode = AArch64::TBZW;
2499 }
2500
2501 const MCInstrDesc &II = TII.get(Opcode);
2502 Register ConstrainedCondReg
2503 = constrainOperandRegClass(II, CondReg, II.getNumDefs());
2504 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
2505 .addReg(ConstrainedCondReg)
2506 .addImm(0)
2507 .addMBB(TBB);
2508
2509 finishCondBranch(BI->getParent(), TBB, FBB);
2510 return true;
2511 }
2512
selectIndirectBr(const Instruction * I)2513 bool AArch64FastISel::selectIndirectBr(const Instruction *I) {
2514 const IndirectBrInst *BI = cast<IndirectBrInst>(I);
2515 Register AddrReg = getRegForValue(BI->getOperand(0));
2516 if (AddrReg == 0)
2517 return false;
2518
2519 // Authenticated indirectbr is not implemented yet.
2520 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos"))
2521 return false;
2522
2523 // Emit the indirect branch.
2524 const MCInstrDesc &II = TII.get(AArch64::BR);
2525 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
2526 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg);
2527
2528 // Make sure the CFG is up-to-date.
2529 for (const auto *Succ : BI->successors())
2530 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]);
2531
2532 return true;
2533 }
2534
selectCmp(const Instruction * I)2535 bool AArch64FastISel::selectCmp(const Instruction *I) {
2536 const CmpInst *CI = cast<CmpInst>(I);
2537
2538 // Vectors of i1 are weird: bail out.
2539 if (CI->getType()->isVectorTy())
2540 return false;
2541
2542 // Try to optimize or fold the cmp.
2543 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
2544 unsigned ResultReg = 0;
2545 switch (Predicate) {
2546 default:
2547 break;
2548 case CmpInst::FCMP_FALSE:
2549 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2550 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
2551 TII.get(TargetOpcode::COPY), ResultReg)
2552 .addReg(AArch64::WZR, getKillRegState(true));
2553 break;
2554 case CmpInst::FCMP_TRUE:
2555 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1);
2556 break;
2557 }
2558
2559 if (ResultReg) {
2560 updateValueMap(I, ResultReg);
2561 return true;
2562 }
2563
2564 // Emit the cmp.
2565 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
2566 return false;
2567
2568 ResultReg = createResultReg(&AArch64::GPR32RegClass);
2569
2570 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These
2571 // condition codes are inverted, because they are used by CSINC.
2572 static unsigned CondCodeTable[2][2] = {
2573 { AArch64CC::NE, AArch64CC::VC },
2574 { AArch64CC::PL, AArch64CC::LE }
2575 };
2576 unsigned *CondCodes = nullptr;
2577 switch (Predicate) {
2578 default:
2579 break;
2580 case CmpInst::FCMP_UEQ:
2581 CondCodes = &CondCodeTable[0][0];
2582 break;
2583 case CmpInst::FCMP_ONE:
2584 CondCodes = &CondCodeTable[1][0];
2585 break;
2586 }
2587
2588 if (CondCodes) {
2589 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass);
2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2591 TmpReg1)
2592 .addReg(AArch64::WZR, getKillRegState(true))
2593 .addReg(AArch64::WZR, getKillRegState(true))
2594 .addImm(CondCodes[0]);
2595 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2596 ResultReg)
2597 .addReg(TmpReg1, getKillRegState(true))
2598 .addReg(AArch64::WZR, getKillRegState(true))
2599 .addImm(CondCodes[1]);
2600
2601 updateValueMap(I, ResultReg);
2602 return true;
2603 }
2604
2605 // Now set a register based on the comparison.
2606 AArch64CC::CondCode CC = getCompareCC(Predicate);
2607 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2608 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC);
2609 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr),
2610 ResultReg)
2611 .addReg(AArch64::WZR, getKillRegState(true))
2612 .addReg(AArch64::WZR, getKillRegState(true))
2613 .addImm(invertedCC);
2614
2615 updateValueMap(I, ResultReg);
2616 return true;
2617 }
2618
2619 /// Optimize selects of i1 if one of the operands has a 'true' or 'false'
2620 /// value.
optimizeSelect(const SelectInst * SI)2621 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) {
2622 if (!SI->getType()->isIntegerTy(1))
2623 return false;
2624
2625 const Value *Src1Val, *Src2Val;
2626 unsigned Opc = 0;
2627 bool NeedExtraOp = false;
2628 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) {
2629 if (CI->isOne()) {
2630 Src1Val = SI->getCondition();
2631 Src2Val = SI->getFalseValue();
2632 Opc = AArch64::ORRWrr;
2633 } else {
2634 assert(CI->isZero());
2635 Src1Val = SI->getFalseValue();
2636 Src2Val = SI->getCondition();
2637 Opc = AArch64::BICWrr;
2638 }
2639 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) {
2640 if (CI->isOne()) {
2641 Src1Val = SI->getCondition();
2642 Src2Val = SI->getTrueValue();
2643 Opc = AArch64::ORRWrr;
2644 NeedExtraOp = true;
2645 } else {
2646 assert(CI->isZero());
2647 Src1Val = SI->getCondition();
2648 Src2Val = SI->getTrueValue();
2649 Opc = AArch64::ANDWrr;
2650 }
2651 }
2652
2653 if (!Opc)
2654 return false;
2655
2656 Register Src1Reg = getRegForValue(Src1Val);
2657 if (!Src1Reg)
2658 return false;
2659
2660 Register Src2Reg = getRegForValue(Src2Val);
2661 if (!Src2Reg)
2662 return false;
2663
2664 if (NeedExtraOp)
2665 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1);
2666
2667 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg,
2668 Src2Reg);
2669 updateValueMap(SI, ResultReg);
2670 return true;
2671 }
2672
selectSelect(const Instruction * I)2673 bool AArch64FastISel::selectSelect(const Instruction *I) {
2674 assert(isa<SelectInst>(I) && "Expected a select instruction.");
2675 MVT VT;
2676 if (!isTypeSupported(I->getType(), VT))
2677 return false;
2678
2679 unsigned Opc;
2680 const TargetRegisterClass *RC;
2681 switch (VT.SimpleTy) {
2682 default:
2683 return false;
2684 case MVT::i1:
2685 case MVT::i8:
2686 case MVT::i16:
2687 case MVT::i32:
2688 Opc = AArch64::CSELWr;
2689 RC = &AArch64::GPR32RegClass;
2690 break;
2691 case MVT::i64:
2692 Opc = AArch64::CSELXr;
2693 RC = &AArch64::GPR64RegClass;
2694 break;
2695 case MVT::f32:
2696 Opc = AArch64::FCSELSrrr;
2697 RC = &AArch64::FPR32RegClass;
2698 break;
2699 case MVT::f64:
2700 Opc = AArch64::FCSELDrrr;
2701 RC = &AArch64::FPR64RegClass;
2702 break;
2703 }
2704
2705 const SelectInst *SI = cast<SelectInst>(I);
2706 const Value *Cond = SI->getCondition();
2707 AArch64CC::CondCode CC = AArch64CC::NE;
2708 AArch64CC::CondCode ExtraCC = AArch64CC::AL;
2709
2710 if (optimizeSelect(SI))
2711 return true;
2712
2713 // Try to pickup the flags, so we don't have to emit another compare.
2714 if (foldXALUIntrinsic(CC, I, Cond)) {
2715 // Fake request the condition to force emission of the XALU intrinsic.
2716 Register CondReg = getRegForValue(Cond);
2717 if (!CondReg)
2718 return false;
2719 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() &&
2720 isValueAvailable(Cond)) {
2721 const auto *Cmp = cast<CmpInst>(Cond);
2722 // Try to optimize or fold the cmp.
2723 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp);
2724 const Value *FoldSelect = nullptr;
2725 switch (Predicate) {
2726 default:
2727 break;
2728 case CmpInst::FCMP_FALSE:
2729 FoldSelect = SI->getFalseValue();
2730 break;
2731 case CmpInst::FCMP_TRUE:
2732 FoldSelect = SI->getTrueValue();
2733 break;
2734 }
2735
2736 if (FoldSelect) {
2737 Register SrcReg = getRegForValue(FoldSelect);
2738 if (!SrcReg)
2739 return false;
2740
2741 updateValueMap(I, SrcReg);
2742 return true;
2743 }
2744
2745 // Emit the cmp.
2746 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned()))
2747 return false;
2748
2749 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction.
2750 CC = getCompareCC(Predicate);
2751 switch (Predicate) {
2752 default:
2753 break;
2754 case CmpInst::FCMP_UEQ:
2755 ExtraCC = AArch64CC::EQ;
2756 CC = AArch64CC::VS;
2757 break;
2758 case CmpInst::FCMP_ONE:
2759 ExtraCC = AArch64CC::MI;
2760 CC = AArch64CC::GT;
2761 break;
2762 }
2763 assert((CC != AArch64CC::AL) && "Unexpected condition code.");
2764 } else {
2765 Register CondReg = getRegForValue(Cond);
2766 if (!CondReg)
2767 return false;
2768
2769 const MCInstrDesc &II = TII.get(AArch64::ANDSWri);
2770 CondReg = constrainOperandRegClass(II, CondReg, 1);
2771
2772 // Emit a TST instruction (ANDS wzr, reg, #imm).
2773 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II,
2774 AArch64::WZR)
2775 .addReg(CondReg)
2776 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
2777 }
2778
2779 Register Src1Reg = getRegForValue(SI->getTrueValue());
2780 Register Src2Reg = getRegForValue(SI->getFalseValue());
2781
2782 if (!Src1Reg || !Src2Reg)
2783 return false;
2784
2785 if (ExtraCC != AArch64CC::AL)
2786 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC);
2787
2788 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC);
2789 updateValueMap(I, ResultReg);
2790 return true;
2791 }
2792
selectFPExt(const Instruction * I)2793 bool AArch64FastISel::selectFPExt(const Instruction *I) {
2794 Value *V = I->getOperand(0);
2795 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy())
2796 return false;
2797
2798 Register Op = getRegForValue(V);
2799 if (Op == 0)
2800 return false;
2801
2802 Register ResultReg = createResultReg(&AArch64::FPR64RegClass);
2803 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr),
2804 ResultReg).addReg(Op);
2805 updateValueMap(I, ResultReg);
2806 return true;
2807 }
2808
selectFPTrunc(const Instruction * I)2809 bool AArch64FastISel::selectFPTrunc(const Instruction *I) {
2810 Value *V = I->getOperand(0);
2811 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy())
2812 return false;
2813
2814 Register Op = getRegForValue(V);
2815 if (Op == 0)
2816 return false;
2817
2818 Register ResultReg = createResultReg(&AArch64::FPR32RegClass);
2819 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr),
2820 ResultReg).addReg(Op);
2821 updateValueMap(I, ResultReg);
2822 return true;
2823 }
2824
2825 // FPToUI and FPToSI
selectFPToInt(const Instruction * I,bool Signed)2826 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) {
2827 MVT DestVT;
2828 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2829 return false;
2830
2831 Register SrcReg = getRegForValue(I->getOperand(0));
2832 if (SrcReg == 0)
2833 return false;
2834
2835 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2836 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16)
2837 return false;
2838
2839 unsigned Opc;
2840 if (SrcVT == MVT::f64) {
2841 if (Signed)
2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr;
2843 else
2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr;
2845 } else {
2846 if (Signed)
2847 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr;
2848 else
2849 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr;
2850 }
2851 Register ResultReg = createResultReg(
2852 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass);
2853 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
2854 .addReg(SrcReg);
2855 updateValueMap(I, ResultReg);
2856 return true;
2857 }
2858
selectIntToFP(const Instruction * I,bool Signed)2859 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) {
2860 MVT DestVT;
2861 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector())
2862 return false;
2863 // Let regular ISEL handle FP16
2864 if (DestVT == MVT::f16 || DestVT == MVT::bf16)
2865 return false;
2866
2867 assert((DestVT == MVT::f32 || DestVT == MVT::f64) &&
2868 "Unexpected value type.");
2869
2870 Register SrcReg = getRegForValue(I->getOperand(0));
2871 if (!SrcReg)
2872 return false;
2873
2874 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true);
2875
2876 // Handle sign-extension.
2877 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) {
2878 SrcReg =
2879 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed);
2880 if (!SrcReg)
2881 return false;
2882 }
2883
2884 unsigned Opc;
2885 if (SrcVT == MVT::i64) {
2886 if (Signed)
2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri;
2888 else
2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri;
2890 } else {
2891 if (Signed)
2892 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri;
2893 else
2894 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri;
2895 }
2896
2897 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg);
2898 updateValueMap(I, ResultReg);
2899 return true;
2900 }
2901
fastLowerArguments()2902 bool AArch64FastISel::fastLowerArguments() {
2903 if (!FuncInfo.CanLowerReturn)
2904 return false;
2905
2906 const Function *F = FuncInfo.Fn;
2907 if (F->isVarArg())
2908 return false;
2909
2910 CallingConv::ID CC = F->getCallingConv();
2911 if (CC != CallingConv::C && CC != CallingConv::Swift)
2912 return false;
2913
2914 if (Subtarget->hasCustomCallingConv())
2915 return false;
2916
2917 // Only handle simple cases of up to 8 GPR and FPR each.
2918 unsigned GPRCnt = 0;
2919 unsigned FPRCnt = 0;
2920 for (auto const &Arg : F->args()) {
2921 if (Arg.hasAttribute(Attribute::ByVal) ||
2922 Arg.hasAttribute(Attribute::InReg) ||
2923 Arg.hasAttribute(Attribute::StructRet) ||
2924 Arg.hasAttribute(Attribute::SwiftSelf) ||
2925 Arg.hasAttribute(Attribute::SwiftAsync) ||
2926 Arg.hasAttribute(Attribute::SwiftError) ||
2927 Arg.hasAttribute(Attribute::Nest))
2928 return false;
2929
2930 Type *ArgTy = Arg.getType();
2931 if (ArgTy->isStructTy() || ArgTy->isArrayTy())
2932 return false;
2933
2934 EVT ArgVT = TLI.getValueType(DL, ArgTy);
2935 if (!ArgVT.isSimple())
2936 return false;
2937
2938 MVT VT = ArgVT.getSimpleVT().SimpleTy;
2939 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8())
2940 return false;
2941
2942 if (VT.isVector() &&
2943 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian()))
2944 return false;
2945
2946 if (VT >= MVT::i1 && VT <= MVT::i64)
2947 ++GPRCnt;
2948 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() ||
2949 VT.is128BitVector())
2950 ++FPRCnt;
2951 else
2952 return false;
2953
2954 if (GPRCnt > 8 || FPRCnt > 8)
2955 return false;
2956 }
2957
2958 static const MCPhysReg Registers[6][8] = {
2959 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4,
2960 AArch64::W5, AArch64::W6, AArch64::W7 },
2961 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4,
2962 AArch64::X5, AArch64::X6, AArch64::X7 },
2963 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4,
2964 AArch64::H5, AArch64::H6, AArch64::H7 },
2965 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4,
2966 AArch64::S5, AArch64::S6, AArch64::S7 },
2967 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4,
2968 AArch64::D5, AArch64::D6, AArch64::D7 },
2969 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4,
2970 AArch64::Q5, AArch64::Q6, AArch64::Q7 }
2971 };
2972
2973 unsigned GPRIdx = 0;
2974 unsigned FPRIdx = 0;
2975 for (auto const &Arg : F->args()) {
2976 MVT VT = TLI.getSimpleValueType(DL, Arg.getType());
2977 unsigned SrcReg;
2978 const TargetRegisterClass *RC;
2979 if (VT >= MVT::i1 && VT <= MVT::i32) {
2980 SrcReg = Registers[0][GPRIdx++];
2981 RC = &AArch64::GPR32RegClass;
2982 VT = MVT::i32;
2983 } else if (VT == MVT::i64) {
2984 SrcReg = Registers[1][GPRIdx++];
2985 RC = &AArch64::GPR64RegClass;
2986 } else if (VT == MVT::f16 || VT == MVT::bf16) {
2987 SrcReg = Registers[2][FPRIdx++];
2988 RC = &AArch64::FPR16RegClass;
2989 } else if (VT == MVT::f32) {
2990 SrcReg = Registers[3][FPRIdx++];
2991 RC = &AArch64::FPR32RegClass;
2992 } else if ((VT == MVT::f64) || VT.is64BitVector()) {
2993 SrcReg = Registers[4][FPRIdx++];
2994 RC = &AArch64::FPR64RegClass;
2995 } else if (VT.is128BitVector()) {
2996 SrcReg = Registers[5][FPRIdx++];
2997 RC = &AArch64::FPR128RegClass;
2998 } else
2999 llvm_unreachable("Unexpected value type.");
3000
3001 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
3002 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
3003 // Without this, EmitLiveInCopies may eliminate the livein if its only
3004 // use is a bitcast (which isn't turned into an instruction).
3005 Register ResultReg = createResultReg(RC);
3006 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3007 TII.get(TargetOpcode::COPY), ResultReg)
3008 .addReg(DstReg, getKillRegState(true));
3009 updateValueMap(&Arg, ResultReg);
3010 }
3011 return true;
3012 }
3013
processCallArgs(CallLoweringInfo & CLI,SmallVectorImpl<MVT> & OutVTs,unsigned & NumBytes)3014 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI,
3015 SmallVectorImpl<MVT> &OutVTs,
3016 unsigned &NumBytes) {
3017 CallingConv::ID CC = CLI.CallConv;
3018 SmallVector<CCValAssign, 16> ArgLocs;
3019 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context);
3020 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC));
3021
3022 // Get a count of how many bytes are to be pushed on the stack.
3023 NumBytes = CCInfo.getStackSize();
3024
3025 // Issue CALLSEQ_START
3026 unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
3027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown))
3028 .addImm(NumBytes).addImm(0);
3029
3030 // Process the args.
3031 for (CCValAssign &VA : ArgLocs) {
3032 const Value *ArgVal = CLI.OutVals[VA.getValNo()];
3033 MVT ArgVT = OutVTs[VA.getValNo()];
3034
3035 Register ArgReg = getRegForValue(ArgVal);
3036 if (!ArgReg)
3037 return false;
3038
3039 // Handle arg promotion: SExt, ZExt, AExt.
3040 switch (VA.getLocInfo()) {
3041 case CCValAssign::Full:
3042 break;
3043 case CCValAssign::SExt: {
3044 MVT DestVT = VA.getLocVT();
3045 MVT SrcVT = ArgVT;
3046 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false);
3047 if (!ArgReg)
3048 return false;
3049 break;
3050 }
3051 case CCValAssign::AExt:
3052 // Intentional fall-through.
3053 case CCValAssign::ZExt: {
3054 MVT DestVT = VA.getLocVT();
3055 MVT SrcVT = ArgVT;
3056 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true);
3057 if (!ArgReg)
3058 return false;
3059 break;
3060 }
3061 default:
3062 llvm_unreachable("Unknown arg promotion!");
3063 }
3064
3065 // Now copy/store arg to correct locations.
3066 if (VA.isRegLoc() && !VA.needsCustom()) {
3067 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3068 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
3069 CLI.OutRegs.push_back(VA.getLocReg());
3070 } else if (VA.needsCustom()) {
3071 // FIXME: Handle custom args.
3072 return false;
3073 } else {
3074 assert(VA.isMemLoc() && "Assuming store on stack.");
3075
3076 // Don't emit stores for undef values.
3077 if (isa<UndefValue>(ArgVal))
3078 continue;
3079
3080 // Need to store on the stack.
3081 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8;
3082
3083 unsigned BEAlign = 0;
3084 if (ArgSize < 8 && !Subtarget->isLittleEndian())
3085 BEAlign = 8 - ArgSize;
3086
3087 Address Addr;
3088 Addr.setKind(Address::RegBase);
3089 Addr.setReg(AArch64::SP);
3090 Addr.setOffset(VA.getLocMemOffset() + BEAlign);
3091
3092 Align Alignment = DL.getABITypeAlign(ArgVal->getType());
3093 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
3094 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()),
3095 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment);
3096
3097 if (!emitStore(ArgVT, ArgReg, Addr, MMO))
3098 return false;
3099 }
3100 }
3101 return true;
3102 }
3103
finishCall(CallLoweringInfo & CLI,unsigned NumBytes)3104 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) {
3105 CallingConv::ID CC = CLI.CallConv;
3106
3107 // Issue CALLSEQ_END
3108 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
3109 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp))
3110 .addImm(NumBytes).addImm(0);
3111
3112 // Now the return values.
3113 SmallVector<CCValAssign, 16> RVLocs;
3114 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
3115 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC));
3116
3117 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
3118 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3119 CCValAssign &VA = RVLocs[i];
3120 MVT CopyVT = VA.getValVT();
3121 unsigned CopyReg = ResultReg + i;
3122
3123 // TODO: Handle big-endian results
3124 if (CopyVT.isVector() && !Subtarget->isLittleEndian())
3125 return false;
3126
3127 // Copy result out of their specified physreg.
3128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY),
3129 CopyReg)
3130 .addReg(VA.getLocReg());
3131 CLI.InRegs.push_back(VA.getLocReg());
3132 }
3133
3134 CLI.ResultReg = ResultReg;
3135 CLI.NumResultRegs = RVLocs.size();
3136
3137 return true;
3138 }
3139
fastLowerCall(CallLoweringInfo & CLI)3140 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) {
3141 CallingConv::ID CC = CLI.CallConv;
3142 bool IsTailCall = CLI.IsTailCall;
3143 bool IsVarArg = CLI.IsVarArg;
3144 const Value *Callee = CLI.Callee;
3145 MCSymbol *Symbol = CLI.Symbol;
3146
3147 if (!Callee && !Symbol)
3148 return false;
3149
3150 // Allow SelectionDAG isel to handle calls to functions like setjmp that need
3151 // a bti instruction following the call.
3152 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) &&
3153 !Subtarget->noBTIAtReturnTwice() &&
3154 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement())
3155 return false;
3156
3157 // Allow SelectionDAG isel to handle indirect calls with KCFI checks.
3158 if (CLI.CB && CLI.CB->isIndirectCall() &&
3159 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi))
3160 return false;
3161
3162 // Allow SelectionDAG isel to handle tail calls.
3163 if (IsTailCall)
3164 return false;
3165
3166 // FIXME: we could and should support this, but for now correctness at -O0 is
3167 // more important.
3168 if (Subtarget->isTargetILP32())
3169 return false;
3170
3171 CodeModel::Model CM = TM.getCodeModel();
3172 // Only support the small-addressing and large code models.
3173 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing())
3174 return false;
3175
3176 // FIXME: Add large code model support for ELF.
3177 if (CM == CodeModel::Large && !Subtarget->isTargetMachO())
3178 return false;
3179
3180 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind
3181 // attribute. Check "RtLibUseGOT" instead.
3182 if (MF->getFunction().getParent()->getRtLibUseGOT())
3183 return false;
3184
3185 // Let SDISel handle vararg functions.
3186 if (IsVarArg)
3187 return false;
3188
3189 if (Subtarget->isWindowsArm64EC())
3190 return false;
3191
3192 for (auto Flag : CLI.OutFlags)
3193 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() ||
3194 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError())
3195 return false;
3196
3197 // Set up the argument vectors.
3198 SmallVector<MVT, 16> OutVTs;
3199 OutVTs.reserve(CLI.OutVals.size());
3200
3201 for (auto *Val : CLI.OutVals) {
3202 MVT VT;
3203 if (!isTypeLegal(Val->getType(), VT) &&
3204 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16))
3205 return false;
3206
3207 // We don't handle vector parameters yet.
3208 if (VT.isVector() || VT.getSizeInBits() > 64)
3209 return false;
3210
3211 OutVTs.push_back(VT);
3212 }
3213
3214 Address Addr;
3215 if (Callee && !computeCallAddress(Callee, Addr))
3216 return false;
3217
3218 // The weak function target may be zero; in that case we must use indirect
3219 // addressing via a stub on windows as it may be out of range for a
3220 // PC-relative jump.
3221 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() &&
3222 Addr.getGlobalValue()->hasExternalWeakLinkage())
3223 return false;
3224
3225 // Handle the arguments now that we've gotten them.
3226 unsigned NumBytes;
3227 if (!processCallArgs(CLI, OutVTs, NumBytes))
3228 return false;
3229
3230 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3231 if (RegInfo->isAnyArgRegReserved(*MF))
3232 RegInfo->emitReservedArgRegCallError(*MF);
3233
3234 // Issue the call.
3235 MachineInstrBuilder MIB;
3236 if (Subtarget->useSmallAddressing()) {
3237 const MCInstrDesc &II =
3238 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL);
3239 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II);
3240 if (Symbol)
3241 MIB.addSym(Symbol, 0);
3242 else if (Addr.getGlobalValue())
3243 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0);
3244 else if (Addr.getReg()) {
3245 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0);
3246 MIB.addReg(Reg);
3247 } else
3248 return false;
3249 } else {
3250 unsigned CallReg = 0;
3251 if (Symbol) {
3252 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass);
3253 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP),
3254 ADRPReg)
3255 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE);
3256
3257 CallReg = createResultReg(&AArch64::GPR64RegClass);
3258 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3259 TII.get(AArch64::LDRXui), CallReg)
3260 .addReg(ADRPReg)
3261 .addSym(Symbol,
3262 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3263 } else if (Addr.getGlobalValue())
3264 CallReg = materializeGV(Addr.getGlobalValue());
3265 else if (Addr.getReg())
3266 CallReg = Addr.getReg();
3267
3268 if (!CallReg)
3269 return false;
3270
3271 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF));
3272 CallReg = constrainOperandRegClass(II, CallReg, 0);
3273 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg);
3274 }
3275
3276 // Add implicit physical register uses to the call.
3277 for (auto Reg : CLI.OutRegs)
3278 MIB.addReg(Reg, RegState::Implicit);
3279
3280 // Add a register mask with the call-preserved registers.
3281 // Proper defs for return values will be added by setPhysRegsDeadExcept().
3282 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
3283
3284 CLI.Call = MIB;
3285
3286 // Finish off the call including any return values.
3287 return finishCall(CLI, NumBytes);
3288 }
3289
isMemCpySmall(uint64_t Len,MaybeAlign Alignment)3290 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) {
3291 if (Alignment)
3292 return Len / Alignment->value() <= 4;
3293 else
3294 return Len < 32;
3295 }
3296
tryEmitSmallMemCpy(Address Dest,Address Src,uint64_t Len,MaybeAlign Alignment)3297 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src,
3298 uint64_t Len, MaybeAlign Alignment) {
3299 // Make sure we don't bloat code by inlining very large memcpy's.
3300 if (!isMemCpySmall(Len, Alignment))
3301 return false;
3302
3303 int64_t UnscaledOffset = 0;
3304 Address OrigDest = Dest;
3305 Address OrigSrc = Src;
3306
3307 while (Len) {
3308 MVT VT;
3309 if (!Alignment || *Alignment >= 8) {
3310 if (Len >= 8)
3311 VT = MVT::i64;
3312 else if (Len >= 4)
3313 VT = MVT::i32;
3314 else if (Len >= 2)
3315 VT = MVT::i16;
3316 else {
3317 VT = MVT::i8;
3318 }
3319 } else {
3320 assert(Alignment && "Alignment is set in this branch");
3321 // Bound based on alignment.
3322 if (Len >= 4 && *Alignment == 4)
3323 VT = MVT::i32;
3324 else if (Len >= 2 && *Alignment == 2)
3325 VT = MVT::i16;
3326 else {
3327 VT = MVT::i8;
3328 }
3329 }
3330
3331 unsigned ResultReg = emitLoad(VT, VT, Src);
3332 if (!ResultReg)
3333 return false;
3334
3335 if (!emitStore(VT, ResultReg, Dest))
3336 return false;
3337
3338 int64_t Size = VT.getSizeInBits() / 8;
3339 Len -= Size;
3340 UnscaledOffset += Size;
3341
3342 // We need to recompute the unscaled offset for each iteration.
3343 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset);
3344 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset);
3345 }
3346
3347 return true;
3348 }
3349
3350 /// Check if it is possible to fold the condition from the XALU intrinsic
3351 /// into the user. The condition code will only be updated on success.
foldXALUIntrinsic(AArch64CC::CondCode & CC,const Instruction * I,const Value * Cond)3352 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
3353 const Instruction *I,
3354 const Value *Cond) {
3355 if (!isa<ExtractValueInst>(Cond))
3356 return false;
3357
3358 const auto *EV = cast<ExtractValueInst>(Cond);
3359 if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
3360 return false;
3361
3362 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
3363 MVT RetVT;
3364 const Function *Callee = II->getCalledFunction();
3365 Type *RetTy =
3366 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
3367 if (!isTypeLegal(RetTy, RetVT))
3368 return false;
3369
3370 if (RetVT != MVT::i32 && RetVT != MVT::i64)
3371 return false;
3372
3373 const Value *LHS = II->getArgOperand(0);
3374 const Value *RHS = II->getArgOperand(1);
3375
3376 // Canonicalize immediate to the RHS.
3377 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3378 std::swap(LHS, RHS);
3379
3380 // Simplify multiplies.
3381 Intrinsic::ID IID = II->getIntrinsicID();
3382 switch (IID) {
3383 default:
3384 break;
3385 case Intrinsic::smul_with_overflow:
3386 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3387 if (C->getValue() == 2)
3388 IID = Intrinsic::sadd_with_overflow;
3389 break;
3390 case Intrinsic::umul_with_overflow:
3391 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3392 if (C->getValue() == 2)
3393 IID = Intrinsic::uadd_with_overflow;
3394 break;
3395 }
3396
3397 AArch64CC::CondCode TmpCC;
3398 switch (IID) {
3399 default:
3400 return false;
3401 case Intrinsic::sadd_with_overflow:
3402 case Intrinsic::ssub_with_overflow:
3403 TmpCC = AArch64CC::VS;
3404 break;
3405 case Intrinsic::uadd_with_overflow:
3406 TmpCC = AArch64CC::HS;
3407 break;
3408 case Intrinsic::usub_with_overflow:
3409 TmpCC = AArch64CC::LO;
3410 break;
3411 case Intrinsic::smul_with_overflow:
3412 case Intrinsic::umul_with_overflow:
3413 TmpCC = AArch64CC::NE;
3414 break;
3415 }
3416
3417 // Check if both instructions are in the same basic block.
3418 if (!isValueAvailable(II))
3419 return false;
3420
3421 // Make sure nothing is in the way
3422 BasicBlock::const_iterator Start(I);
3423 BasicBlock::const_iterator End(II);
3424 for (auto Itr = std::prev(Start); Itr != End; --Itr) {
3425 // We only expect extractvalue instructions between the intrinsic and the
3426 // instruction to be selected.
3427 if (!isa<ExtractValueInst>(Itr))
3428 return false;
3429
3430 // Check that the extractvalue operand comes from the intrinsic.
3431 const auto *EVI = cast<ExtractValueInst>(Itr);
3432 if (EVI->getAggregateOperand() != II)
3433 return false;
3434 }
3435
3436 CC = TmpCC;
3437 return true;
3438 }
3439
fastLowerIntrinsicCall(const IntrinsicInst * II)3440 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
3441 // FIXME: Handle more intrinsics.
3442 switch (II->getIntrinsicID()) {
3443 default: return false;
3444 case Intrinsic::frameaddress: {
3445 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3446 MFI.setFrameAddressIsTaken(true);
3447
3448 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
3449 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
3450 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
3451 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3452 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr);
3453 // Recursively load frame address
3454 // ldr x0, [fp]
3455 // ldr x0, [x0]
3456 // ldr x0, [x0]
3457 // ...
3458 unsigned DestReg;
3459 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
3460 while (Depth--) {
3461 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass,
3462 SrcReg, 0);
3463 assert(DestReg && "Unexpected LDR instruction emission failure.");
3464 SrcReg = DestReg;
3465 }
3466
3467 updateValueMap(II, SrcReg);
3468 return true;
3469 }
3470 case Intrinsic::sponentry: {
3471 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
3472
3473 // SP = FP + Fixed Object + 16
3474 int FI = MFI.CreateFixedObject(4, 0, false);
3475 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass);
3476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3477 TII.get(AArch64::ADDXri), ResultReg)
3478 .addFrameIndex(FI)
3479 .addImm(0)
3480 .addImm(0);
3481
3482 updateValueMap(II, ResultReg);
3483 return true;
3484 }
3485 case Intrinsic::memcpy:
3486 case Intrinsic::memmove: {
3487 const auto *MTI = cast<MemTransferInst>(II);
3488 // Don't handle volatile.
3489 if (MTI->isVolatile())
3490 return false;
3491
3492 // Disable inlining for memmove before calls to ComputeAddress. Otherwise,
3493 // we would emit dead code because we don't currently handle memmoves.
3494 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy);
3495 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) {
3496 // Small memcpy's are common enough that we want to do them without a call
3497 // if possible.
3498 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue();
3499 MaybeAlign Alignment;
3500 if (MTI->getDestAlign() || MTI->getSourceAlign())
3501 Alignment = std::min(MTI->getDestAlign().valueOrOne(),
3502 MTI->getSourceAlign().valueOrOne());
3503 if (isMemCpySmall(Len, Alignment)) {
3504 Address Dest, Src;
3505 if (!computeAddress(MTI->getRawDest(), Dest) ||
3506 !computeAddress(MTI->getRawSource(), Src))
3507 return false;
3508 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment))
3509 return true;
3510 }
3511 }
3512
3513 if (!MTI->getLength()->getType()->isIntegerTy(64))
3514 return false;
3515
3516 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255)
3517 // Fast instruction selection doesn't support the special
3518 // address spaces.
3519 return false;
3520
3521 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove";
3522 return lowerCallTo(II, IntrMemName, II->arg_size() - 1);
3523 }
3524 case Intrinsic::memset: {
3525 const MemSetInst *MSI = cast<MemSetInst>(II);
3526 // Don't handle volatile.
3527 if (MSI->isVolatile())
3528 return false;
3529
3530 if (!MSI->getLength()->getType()->isIntegerTy(64))
3531 return false;
3532
3533 if (MSI->getDestAddressSpace() > 255)
3534 // Fast instruction selection doesn't support the special
3535 // address spaces.
3536 return false;
3537
3538 return lowerCallTo(II, "memset", II->arg_size() - 1);
3539 }
3540 case Intrinsic::sin:
3541 case Intrinsic::cos:
3542 case Intrinsic::tan:
3543 case Intrinsic::pow: {
3544 MVT RetVT;
3545 if (!isTypeLegal(II->getType(), RetVT))
3546 return false;
3547
3548 if (RetVT != MVT::f32 && RetVT != MVT::f64)
3549 return false;
3550
3551 static const RTLIB::Libcall LibCallTable[4][2] = {
3552 {RTLIB::SIN_F32, RTLIB::SIN_F64},
3553 {RTLIB::COS_F32, RTLIB::COS_F64},
3554 {RTLIB::TAN_F32, RTLIB::TAN_F64},
3555 {RTLIB::POW_F32, RTLIB::POW_F64}};
3556 RTLIB::Libcall LC;
3557 bool Is64Bit = RetVT == MVT::f64;
3558 switch (II->getIntrinsicID()) {
3559 default:
3560 llvm_unreachable("Unexpected intrinsic.");
3561 case Intrinsic::sin:
3562 LC = LibCallTable[0][Is64Bit];
3563 break;
3564 case Intrinsic::cos:
3565 LC = LibCallTable[1][Is64Bit];
3566 break;
3567 case Intrinsic::tan:
3568 LC = LibCallTable[2][Is64Bit];
3569 break;
3570 case Intrinsic::pow:
3571 LC = LibCallTable[3][Is64Bit];
3572 break;
3573 }
3574
3575 ArgListTy Args;
3576 Args.reserve(II->arg_size());
3577
3578 // Populate the argument list.
3579 for (auto &Arg : II->args()) {
3580 ArgListEntry Entry;
3581 Entry.Val = Arg;
3582 Entry.Ty = Arg->getType();
3583 Args.push_back(Entry);
3584 }
3585
3586 CallLoweringInfo CLI;
3587 MCContext &Ctx = MF->getContext();
3588 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(),
3589 TLI.getLibcallName(LC), std::move(Args));
3590 if (!lowerCallTo(CLI))
3591 return false;
3592 updateValueMap(II, CLI.ResultReg);
3593 return true;
3594 }
3595 case Intrinsic::fabs: {
3596 MVT VT;
3597 if (!isTypeLegal(II->getType(), VT))
3598 return false;
3599
3600 unsigned Opc;
3601 switch (VT.SimpleTy) {
3602 default:
3603 return false;
3604 case MVT::f32:
3605 Opc = AArch64::FABSSr;
3606 break;
3607 case MVT::f64:
3608 Opc = AArch64::FABSDr;
3609 break;
3610 }
3611 Register SrcReg = getRegForValue(II->getOperand(0));
3612 if (!SrcReg)
3613 return false;
3614 Register ResultReg = createResultReg(TLI.getRegClassFor(VT));
3615 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg)
3616 .addReg(SrcReg);
3617 updateValueMap(II, ResultReg);
3618 return true;
3619 }
3620 case Intrinsic::trap:
3621 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3622 .addImm(1);
3623 return true;
3624 case Intrinsic::debugtrap:
3625 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK))
3626 .addImm(0xF000);
3627 return true;
3628
3629 case Intrinsic::sqrt: {
3630 Type *RetTy = II->getCalledFunction()->getReturnType();
3631
3632 MVT VT;
3633 if (!isTypeLegal(RetTy, VT))
3634 return false;
3635
3636 Register Op0Reg = getRegForValue(II->getOperand(0));
3637 if (!Op0Reg)
3638 return false;
3639
3640 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg);
3641 if (!ResultReg)
3642 return false;
3643
3644 updateValueMap(II, ResultReg);
3645 return true;
3646 }
3647 case Intrinsic::sadd_with_overflow:
3648 case Intrinsic::uadd_with_overflow:
3649 case Intrinsic::ssub_with_overflow:
3650 case Intrinsic::usub_with_overflow:
3651 case Intrinsic::smul_with_overflow:
3652 case Intrinsic::umul_with_overflow: {
3653 // This implements the basic lowering of the xalu with overflow intrinsics.
3654 const Function *Callee = II->getCalledFunction();
3655 auto *Ty = cast<StructType>(Callee->getReturnType());
3656 Type *RetTy = Ty->getTypeAtIndex(0U);
3657
3658 MVT VT;
3659 if (!isTypeLegal(RetTy, VT))
3660 return false;
3661
3662 if (VT != MVT::i32 && VT != MVT::i64)
3663 return false;
3664
3665 const Value *LHS = II->getArgOperand(0);
3666 const Value *RHS = II->getArgOperand(1);
3667 // Canonicalize immediate to the RHS.
3668 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative())
3669 std::swap(LHS, RHS);
3670
3671 // Simplify multiplies.
3672 Intrinsic::ID IID = II->getIntrinsicID();
3673 switch (IID) {
3674 default:
3675 break;
3676 case Intrinsic::smul_with_overflow:
3677 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3678 if (C->getValue() == 2) {
3679 IID = Intrinsic::sadd_with_overflow;
3680 RHS = LHS;
3681 }
3682 break;
3683 case Intrinsic::umul_with_overflow:
3684 if (const auto *C = dyn_cast<ConstantInt>(RHS))
3685 if (C->getValue() == 2) {
3686 IID = Intrinsic::uadd_with_overflow;
3687 RHS = LHS;
3688 }
3689 break;
3690 }
3691
3692 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0;
3693 AArch64CC::CondCode CC = AArch64CC::Invalid;
3694 switch (IID) {
3695 default: llvm_unreachable("Unexpected intrinsic!");
3696 case Intrinsic::sadd_with_overflow:
3697 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3698 CC = AArch64CC::VS;
3699 break;
3700 case Intrinsic::uadd_with_overflow:
3701 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true);
3702 CC = AArch64CC::HS;
3703 break;
3704 case Intrinsic::ssub_with_overflow:
3705 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3706 CC = AArch64CC::VS;
3707 break;
3708 case Intrinsic::usub_with_overflow:
3709 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true);
3710 CC = AArch64CC::LO;
3711 break;
3712 case Intrinsic::smul_with_overflow: {
3713 CC = AArch64CC::NE;
3714 Register LHSReg = getRegForValue(LHS);
3715 if (!LHSReg)
3716 return false;
3717
3718 Register RHSReg = getRegForValue(RHS);
3719 if (!RHSReg)
3720 return false;
3721
3722 if (VT == MVT::i32) {
3723 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg);
3724 Register MulSubReg =
3725 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3726 // cmp xreg, wreg, sxtw
3727 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg,
3728 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true,
3729 /*WantResult=*/false);
3730 MulReg = MulSubReg;
3731 } else {
3732 assert(VT == MVT::i64 && "Unexpected value type.");
3733 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3734 // reused in the next instruction.
3735 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3736 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg);
3737 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63,
3738 /*WantResult=*/false);
3739 }
3740 break;
3741 }
3742 case Intrinsic::umul_with_overflow: {
3743 CC = AArch64CC::NE;
3744 Register LHSReg = getRegForValue(LHS);
3745 if (!LHSReg)
3746 return false;
3747
3748 Register RHSReg = getRegForValue(RHS);
3749 if (!RHSReg)
3750 return false;
3751
3752 if (VT == MVT::i32) {
3753 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg);
3754 // tst xreg, #0xffffffff00000000
3755 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3756 TII.get(AArch64::ANDSXri), AArch64::XZR)
3757 .addReg(MulReg)
3758 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64));
3759 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32);
3760 } else {
3761 assert(VT == MVT::i64 && "Unexpected value type.");
3762 // LHSReg and RHSReg cannot be killed by this Mul, since they are
3763 // reused in the next instruction.
3764 MulReg = emitMul_rr(VT, LHSReg, RHSReg);
3765 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg);
3766 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false);
3767 }
3768 break;
3769 }
3770 }
3771
3772 if (MulReg) {
3773 ResultReg1 = createResultReg(TLI.getRegClassFor(VT));
3774 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3775 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg);
3776 }
3777
3778 if (!ResultReg1)
3779 return false;
3780
3781 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass,
3782 AArch64::WZR, AArch64::WZR,
3783 getInvertedCondCode(CC));
3784 (void)ResultReg2;
3785 assert((ResultReg1 + 1) == ResultReg2 &&
3786 "Nonconsecutive result registers.");
3787 updateValueMap(II, ResultReg1, 2);
3788 return true;
3789 }
3790 case Intrinsic::aarch64_crc32b:
3791 case Intrinsic::aarch64_crc32h:
3792 case Intrinsic::aarch64_crc32w:
3793 case Intrinsic::aarch64_crc32x:
3794 case Intrinsic::aarch64_crc32cb:
3795 case Intrinsic::aarch64_crc32ch:
3796 case Intrinsic::aarch64_crc32cw:
3797 case Intrinsic::aarch64_crc32cx: {
3798 if (!Subtarget->hasCRC())
3799 return false;
3800
3801 unsigned Opc;
3802 switch (II->getIntrinsicID()) {
3803 default:
3804 llvm_unreachable("Unexpected intrinsic!");
3805 case Intrinsic::aarch64_crc32b:
3806 Opc = AArch64::CRC32Brr;
3807 break;
3808 case Intrinsic::aarch64_crc32h:
3809 Opc = AArch64::CRC32Hrr;
3810 break;
3811 case Intrinsic::aarch64_crc32w:
3812 Opc = AArch64::CRC32Wrr;
3813 break;
3814 case Intrinsic::aarch64_crc32x:
3815 Opc = AArch64::CRC32Xrr;
3816 break;
3817 case Intrinsic::aarch64_crc32cb:
3818 Opc = AArch64::CRC32CBrr;
3819 break;
3820 case Intrinsic::aarch64_crc32ch:
3821 Opc = AArch64::CRC32CHrr;
3822 break;
3823 case Intrinsic::aarch64_crc32cw:
3824 Opc = AArch64::CRC32CWrr;
3825 break;
3826 case Intrinsic::aarch64_crc32cx:
3827 Opc = AArch64::CRC32CXrr;
3828 break;
3829 }
3830
3831 Register LHSReg = getRegForValue(II->getArgOperand(0));
3832 Register RHSReg = getRegForValue(II->getArgOperand(1));
3833 if (!LHSReg || !RHSReg)
3834 return false;
3835
3836 Register ResultReg =
3837 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg);
3838 updateValueMap(II, ResultReg);
3839 return true;
3840 }
3841 }
3842 return false;
3843 }
3844
selectRet(const Instruction * I)3845 bool AArch64FastISel::selectRet(const Instruction *I) {
3846 const ReturnInst *Ret = cast<ReturnInst>(I);
3847 const Function &F = *I->getParent()->getParent();
3848
3849 if (!FuncInfo.CanLowerReturn)
3850 return false;
3851
3852 if (F.isVarArg())
3853 return false;
3854
3855 if (TLI.supportSwiftError() &&
3856 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError))
3857 return false;
3858
3859 if (TLI.supportSplitCSR(FuncInfo.MF))
3860 return false;
3861
3862 // Build a list of return value registers.
3863 SmallVector<unsigned, 4> RetRegs;
3864
3865 if (Ret->getNumOperands() > 0) {
3866 CallingConv::ID CC = F.getCallingConv();
3867 SmallVector<ISD::OutputArg, 4> Outs;
3868 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL);
3869
3870 // Analyze operands of the call, assigning locations to each operand.
3871 SmallVector<CCValAssign, 16> ValLocs;
3872 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
3873 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS);
3874
3875 // Only handle a single return value for now.
3876 if (ValLocs.size() != 1)
3877 return false;
3878
3879 CCValAssign &VA = ValLocs[0];
3880 const Value *RV = Ret->getOperand(0);
3881
3882 // Don't bother handling odd stuff for now.
3883 if ((VA.getLocInfo() != CCValAssign::Full) &&
3884 (VA.getLocInfo() != CCValAssign::BCvt))
3885 return false;
3886
3887 // Only handle register returns for now.
3888 if (!VA.isRegLoc())
3889 return false;
3890
3891 Register Reg = getRegForValue(RV);
3892 if (Reg == 0)
3893 return false;
3894
3895 unsigned SrcReg = Reg + VA.getValNo();
3896 Register DestReg = VA.getLocReg();
3897 // Avoid a cross-class copy. This is very unlikely.
3898 if (!MRI.getRegClass(SrcReg)->contains(DestReg))
3899 return false;
3900
3901 EVT RVEVT = TLI.getValueType(DL, RV->getType());
3902 if (!RVEVT.isSimple())
3903 return false;
3904
3905 // Vectors (of > 1 lane) in big endian need tricky handling.
3906 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() &&
3907 !Subtarget->isLittleEndian())
3908 return false;
3909
3910 MVT RVVT = RVEVT.getSimpleVT();
3911 if (RVVT == MVT::f128)
3912 return false;
3913
3914 MVT DestVT = VA.getValVT();
3915 // Special handling for extended integers.
3916 if (RVVT != DestVT) {
3917 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
3918 return false;
3919
3920 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
3921 return false;
3922
3923 bool IsZExt = Outs[0].Flags.isZExt();
3924 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt);
3925 if (SrcReg == 0)
3926 return false;
3927 }
3928
3929 // "Callee" (i.e. value producer) zero extends pointers at function
3930 // boundary.
3931 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy())
3932 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff);
3933
3934 // Make the copy.
3935 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3936 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg);
3937
3938 // Add register to return instruction.
3939 RetRegs.push_back(VA.getLocReg());
3940 }
3941
3942 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
3943 TII.get(AArch64::RET_ReallyLR));
3944 for (unsigned RetReg : RetRegs)
3945 MIB.addReg(RetReg, RegState::Implicit);
3946 return true;
3947 }
3948
selectTrunc(const Instruction * I)3949 bool AArch64FastISel::selectTrunc(const Instruction *I) {
3950 Type *DestTy = I->getType();
3951 Value *Op = I->getOperand(0);
3952 Type *SrcTy = Op->getType();
3953
3954 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true);
3955 EVT DestEVT = TLI.getValueType(DL, DestTy, true);
3956 if (!SrcEVT.isSimple())
3957 return false;
3958 if (!DestEVT.isSimple())
3959 return false;
3960
3961 MVT SrcVT = SrcEVT.getSimpleVT();
3962 MVT DestVT = DestEVT.getSimpleVT();
3963
3964 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 &&
3965 SrcVT != MVT::i8)
3966 return false;
3967 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 &&
3968 DestVT != MVT::i1)
3969 return false;
3970
3971 Register SrcReg = getRegForValue(Op);
3972 if (!SrcReg)
3973 return false;
3974
3975 // If we're truncating from i64 to a smaller non-legal type then generate an
3976 // AND. Otherwise, we know the high bits are undefined and a truncate only
3977 // generate a COPY. We cannot mark the source register also as result
3978 // register, because this can incorrectly transfer the kill flag onto the
3979 // source register.
3980 unsigned ResultReg;
3981 if (SrcVT == MVT::i64) {
3982 uint64_t Mask = 0;
3983 switch (DestVT.SimpleTy) {
3984 default:
3985 // Trunc i64 to i32 is handled by the target-independent fast-isel.
3986 return false;
3987 case MVT::i1:
3988 Mask = 0x1;
3989 break;
3990 case MVT::i8:
3991 Mask = 0xff;
3992 break;
3993 case MVT::i16:
3994 Mask = 0xffff;
3995 break;
3996 }
3997 // Issue an extract_subreg to get the lower 32-bits.
3998 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg,
3999 AArch64::sub_32);
4000 // Create the AND instruction which performs the actual truncation.
4001 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask);
4002 assert(ResultReg && "Unexpected AND instruction emission failure.");
4003 } else {
4004 ResultReg = createResultReg(&AArch64::GPR32RegClass);
4005 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4006 TII.get(TargetOpcode::COPY), ResultReg)
4007 .addReg(SrcReg);
4008 }
4009
4010 updateValueMap(I, ResultReg);
4011 return true;
4012 }
4013
emiti1Ext(unsigned SrcReg,MVT DestVT,bool IsZExt)4014 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) {
4015 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 ||
4016 DestVT == MVT::i64) &&
4017 "Unexpected value type.");
4018 // Handle i8 and i16 as i32.
4019 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4020 DestVT = MVT::i32;
4021
4022 if (IsZExt) {
4023 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1);
4024 assert(ResultReg && "Unexpected AND instruction emission failure.");
4025 if (DestVT == MVT::i64) {
4026 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the
4027 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd.
4028 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4030 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4031 .addImm(0)
4032 .addReg(ResultReg)
4033 .addImm(AArch64::sub_32);
4034 ResultReg = Reg64;
4035 }
4036 return ResultReg;
4037 } else {
4038 if (DestVT == MVT::i64) {
4039 // FIXME: We're SExt i1 to i64.
4040 return 0;
4041 }
4042 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg,
4043 0, 0);
4044 }
4045 }
4046
emitMul_rr(MVT RetVT,unsigned Op0,unsigned Op1)4047 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4048 unsigned Opc, ZReg;
4049 switch (RetVT.SimpleTy) {
4050 default: return 0;
4051 case MVT::i8:
4052 case MVT::i16:
4053 case MVT::i32:
4054 RetVT = MVT::i32;
4055 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break;
4056 case MVT::i64:
4057 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break;
4058 }
4059
4060 const TargetRegisterClass *RC =
4061 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4062 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg);
4063 }
4064
emitSMULL_rr(MVT RetVT,unsigned Op0,unsigned Op1)4065 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4066 if (RetVT != MVT::i64)
4067 return 0;
4068
4069 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass,
4070 Op0, Op1, AArch64::XZR);
4071 }
4072
emitUMULL_rr(MVT RetVT,unsigned Op0,unsigned Op1)4073 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) {
4074 if (RetVT != MVT::i64)
4075 return 0;
4076
4077 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass,
4078 Op0, Op1, AArch64::XZR);
4079 }
4080
emitLSL_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4081 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg,
4082 unsigned Op1Reg) {
4083 unsigned Opc = 0;
4084 bool NeedTrunc = false;
4085 uint64_t Mask = 0;
4086 switch (RetVT.SimpleTy) {
4087 default: return 0;
4088 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break;
4089 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break;
4090 case MVT::i32: Opc = AArch64::LSLVWr; break;
4091 case MVT::i64: Opc = AArch64::LSLVXr; break;
4092 }
4093
4094 const TargetRegisterClass *RC =
4095 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4096 if (NeedTrunc)
4097 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4098
4099 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4100 if (NeedTrunc)
4101 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4102 return ResultReg;
4103 }
4104
emitLSL_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4105 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4106 uint64_t Shift, bool IsZExt) {
4107 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4108 "Unexpected source/return type pair.");
4109 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4110 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4111 "Unexpected source value type.");
4112 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4113 RetVT == MVT::i64) && "Unexpected return value type.");
4114
4115 bool Is64Bit = (RetVT == MVT::i64);
4116 unsigned RegSize = Is64Bit ? 64 : 32;
4117 unsigned DstBits = RetVT.getSizeInBits();
4118 unsigned SrcBits = SrcVT.getSizeInBits();
4119 const TargetRegisterClass *RC =
4120 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4121
4122 // Just emit a copy for "zero" shifts.
4123 if (Shift == 0) {
4124 if (RetVT == SrcVT) {
4125 Register ResultReg = createResultReg(RC);
4126 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4127 TII.get(TargetOpcode::COPY), ResultReg)
4128 .addReg(Op0);
4129 return ResultReg;
4130 } else
4131 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4132 }
4133
4134 // Don't deal with undefined shifts.
4135 if (Shift >= DstBits)
4136 return 0;
4137
4138 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4139 // {S|U}BFM Wd, Wn, #r, #s
4140 // Wd<32+s-r,32-r> = Wn<s:0> when r > s
4141
4142 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4143 // %2 = shl i16 %1, 4
4144 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7
4145 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext
4146 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext
4147 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext
4148
4149 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4150 // %2 = shl i16 %1, 8
4151 // Wd<32+7-24,32-24> = Wn<7:0>
4152 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext
4153 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext
4154 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext
4155
4156 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4157 // %2 = shl i16 %1, 12
4158 // Wd<32+3-20,32-20> = Wn<3:0>
4159 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext
4160 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext
4161 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext
4162
4163 unsigned ImmR = RegSize - Shift;
4164 // Limit the width to the length of the source type.
4165 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift);
4166 static const unsigned OpcTable[2][2] = {
4167 {AArch64::SBFMWri, AArch64::SBFMXri},
4168 {AArch64::UBFMWri, AArch64::UBFMXri}
4169 };
4170 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4171 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4172 Register TmpReg = MRI.createVirtualRegister(RC);
4173 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4174 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4175 .addImm(0)
4176 .addReg(Op0)
4177 .addImm(AArch64::sub_32);
4178 Op0 = TmpReg;
4179 }
4180 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4181 }
4182
emitLSR_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4183 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg,
4184 unsigned Op1Reg) {
4185 unsigned Opc = 0;
4186 bool NeedTrunc = false;
4187 uint64_t Mask = 0;
4188 switch (RetVT.SimpleTy) {
4189 default: return 0;
4190 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break;
4191 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break;
4192 case MVT::i32: Opc = AArch64::LSRVWr; break;
4193 case MVT::i64: Opc = AArch64::LSRVXr; break;
4194 }
4195
4196 const TargetRegisterClass *RC =
4197 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4198 if (NeedTrunc) {
4199 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask);
4200 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4201 }
4202 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4203 if (NeedTrunc)
4204 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4205 return ResultReg;
4206 }
4207
emitLSR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4208 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4209 uint64_t Shift, bool IsZExt) {
4210 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4211 "Unexpected source/return type pair.");
4212 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4213 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4214 "Unexpected source value type.");
4215 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4216 RetVT == MVT::i64) && "Unexpected return value type.");
4217
4218 bool Is64Bit = (RetVT == MVT::i64);
4219 unsigned RegSize = Is64Bit ? 64 : 32;
4220 unsigned DstBits = RetVT.getSizeInBits();
4221 unsigned SrcBits = SrcVT.getSizeInBits();
4222 const TargetRegisterClass *RC =
4223 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4224
4225 // Just emit a copy for "zero" shifts.
4226 if (Shift == 0) {
4227 if (RetVT == SrcVT) {
4228 Register ResultReg = createResultReg(RC);
4229 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4230 TII.get(TargetOpcode::COPY), ResultReg)
4231 .addReg(Op0);
4232 return ResultReg;
4233 } else
4234 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4235 }
4236
4237 // Don't deal with undefined shifts.
4238 if (Shift >= DstBits)
4239 return 0;
4240
4241 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4242 // {S|U}BFM Wd, Wn, #r, #s
4243 // Wd<s-r:0> = Wn<s:r> when r <= s
4244
4245 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4246 // %2 = lshr i16 %1, 4
4247 // Wd<7-4:0> = Wn<7:4>
4248 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext
4249 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4250 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4251
4252 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4253 // %2 = lshr i16 %1, 8
4254 // Wd<7-7,0> = Wn<7:7>
4255 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext
4256 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4257 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4258
4259 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4260 // %2 = lshr i16 %1, 12
4261 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4262 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext
4263 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4264 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4265
4266 if (Shift >= SrcBits && IsZExt)
4267 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4268
4269 // It is not possible to fold a sign-extend into the LShr instruction. In this
4270 // case emit a sign-extend.
4271 if (!IsZExt) {
4272 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4273 if (!Op0)
4274 return 0;
4275 SrcVT = RetVT;
4276 SrcBits = SrcVT.getSizeInBits();
4277 IsZExt = true;
4278 }
4279
4280 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4281 unsigned ImmS = SrcBits - 1;
4282 static const unsigned OpcTable[2][2] = {
4283 {AArch64::SBFMWri, AArch64::SBFMXri},
4284 {AArch64::UBFMWri, AArch64::UBFMXri}
4285 };
4286 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4287 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4288 Register TmpReg = MRI.createVirtualRegister(RC);
4289 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4290 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4291 .addImm(0)
4292 .addReg(Op0)
4293 .addImm(AArch64::sub_32);
4294 Op0 = TmpReg;
4295 }
4296 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4297 }
4298
emitASR_rr(MVT RetVT,unsigned Op0Reg,unsigned Op1Reg)4299 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg,
4300 unsigned Op1Reg) {
4301 unsigned Opc = 0;
4302 bool NeedTrunc = false;
4303 uint64_t Mask = 0;
4304 switch (RetVT.SimpleTy) {
4305 default: return 0;
4306 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break;
4307 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break;
4308 case MVT::i32: Opc = AArch64::ASRVWr; break;
4309 case MVT::i64: Opc = AArch64::ASRVXr; break;
4310 }
4311
4312 const TargetRegisterClass *RC =
4313 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4314 if (NeedTrunc) {
4315 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false);
4316 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask);
4317 }
4318 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg);
4319 if (NeedTrunc)
4320 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask);
4321 return ResultReg;
4322 }
4323
emitASR_ri(MVT RetVT,MVT SrcVT,unsigned Op0,uint64_t Shift,bool IsZExt)4324 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0,
4325 uint64_t Shift, bool IsZExt) {
4326 assert(RetVT.SimpleTy >= SrcVT.SimpleTy &&
4327 "Unexpected source/return type pair.");
4328 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 ||
4329 SrcVT == MVT::i32 || SrcVT == MVT::i64) &&
4330 "Unexpected source value type.");
4331 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 ||
4332 RetVT == MVT::i64) && "Unexpected return value type.");
4333
4334 bool Is64Bit = (RetVT == MVT::i64);
4335 unsigned RegSize = Is64Bit ? 64 : 32;
4336 unsigned DstBits = RetVT.getSizeInBits();
4337 unsigned SrcBits = SrcVT.getSizeInBits();
4338 const TargetRegisterClass *RC =
4339 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4340
4341 // Just emit a copy for "zero" shifts.
4342 if (Shift == 0) {
4343 if (RetVT == SrcVT) {
4344 Register ResultReg = createResultReg(RC);
4345 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4346 TII.get(TargetOpcode::COPY), ResultReg)
4347 .addReg(Op0);
4348 return ResultReg;
4349 } else
4350 return emitIntExt(SrcVT, Op0, RetVT, IsZExt);
4351 }
4352
4353 // Don't deal with undefined shifts.
4354 if (Shift >= DstBits)
4355 return 0;
4356
4357 // For immediate shifts we can fold the zero-/sign-extension into the shift.
4358 // {S|U}BFM Wd, Wn, #r, #s
4359 // Wd<s-r:0> = Wn<s:r> when r <= s
4360
4361 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4362 // %2 = ashr i16 %1, 4
4363 // Wd<7-4:0> = Wn<7:4>
4364 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext
4365 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext
4366 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext
4367
4368 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4369 // %2 = ashr i16 %1, 8
4370 // Wd<7-7,0> = Wn<7:7>
4371 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4372 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4373 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4374
4375 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16
4376 // %2 = ashr i16 %1, 12
4377 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7
4378 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext
4379 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext
4380 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext
4381
4382 if (Shift >= SrcBits && IsZExt)
4383 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT);
4384
4385 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift);
4386 unsigned ImmS = SrcBits - 1;
4387 static const unsigned OpcTable[2][2] = {
4388 {AArch64::SBFMWri, AArch64::SBFMXri},
4389 {AArch64::UBFMWri, AArch64::UBFMXri}
4390 };
4391 unsigned Opc = OpcTable[IsZExt][Is64Bit];
4392 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) {
4393 Register TmpReg = MRI.createVirtualRegister(RC);
4394 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4395 TII.get(AArch64::SUBREG_TO_REG), TmpReg)
4396 .addImm(0)
4397 .addReg(Op0)
4398 .addImm(AArch64::sub_32);
4399 Op0 = TmpReg;
4400 }
4401 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS);
4402 }
4403
emitIntExt(MVT SrcVT,unsigned SrcReg,MVT DestVT,bool IsZExt)4404 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
4405 bool IsZExt) {
4406 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?");
4407
4408 // FastISel does not have plumbing to deal with extensions where the SrcVT or
4409 // DestVT are odd things, so test to make sure that they are both types we can
4410 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise
4411 // bail out to SelectionDAG.
4412 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) &&
4413 (DestVT != MVT::i32) && (DestVT != MVT::i64)) ||
4414 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) &&
4415 (SrcVT != MVT::i16) && (SrcVT != MVT::i32)))
4416 return 0;
4417
4418 unsigned Opc;
4419 unsigned Imm = 0;
4420
4421 switch (SrcVT.SimpleTy) {
4422 default:
4423 return 0;
4424 case MVT::i1:
4425 return emiti1Ext(SrcReg, DestVT, IsZExt);
4426 case MVT::i8:
4427 if (DestVT == MVT::i64)
4428 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4429 else
4430 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4431 Imm = 7;
4432 break;
4433 case MVT::i16:
4434 if (DestVT == MVT::i64)
4435 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4436 else
4437 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri;
4438 Imm = 15;
4439 break;
4440 case MVT::i32:
4441 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?");
4442 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri;
4443 Imm = 31;
4444 break;
4445 }
4446
4447 // Handle i8 and i16 as i32.
4448 if (DestVT == MVT::i8 || DestVT == MVT::i16)
4449 DestVT = MVT::i32;
4450 else if (DestVT == MVT::i64) {
4451 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass);
4452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4453 TII.get(AArch64::SUBREG_TO_REG), Src64)
4454 .addImm(0)
4455 .addReg(SrcReg)
4456 .addImm(AArch64::sub_32);
4457 SrcReg = Src64;
4458 }
4459
4460 const TargetRegisterClass *RC =
4461 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4462 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm);
4463 }
4464
isZExtLoad(const MachineInstr * LI)4465 static bool isZExtLoad(const MachineInstr *LI) {
4466 switch (LI->getOpcode()) {
4467 default:
4468 return false;
4469 case AArch64::LDURBBi:
4470 case AArch64::LDURHHi:
4471 case AArch64::LDURWi:
4472 case AArch64::LDRBBui:
4473 case AArch64::LDRHHui:
4474 case AArch64::LDRWui:
4475 case AArch64::LDRBBroX:
4476 case AArch64::LDRHHroX:
4477 case AArch64::LDRWroX:
4478 case AArch64::LDRBBroW:
4479 case AArch64::LDRHHroW:
4480 case AArch64::LDRWroW:
4481 return true;
4482 }
4483 }
4484
isSExtLoad(const MachineInstr * LI)4485 static bool isSExtLoad(const MachineInstr *LI) {
4486 switch (LI->getOpcode()) {
4487 default:
4488 return false;
4489 case AArch64::LDURSBWi:
4490 case AArch64::LDURSHWi:
4491 case AArch64::LDURSBXi:
4492 case AArch64::LDURSHXi:
4493 case AArch64::LDURSWi:
4494 case AArch64::LDRSBWui:
4495 case AArch64::LDRSHWui:
4496 case AArch64::LDRSBXui:
4497 case AArch64::LDRSHXui:
4498 case AArch64::LDRSWui:
4499 case AArch64::LDRSBWroX:
4500 case AArch64::LDRSHWroX:
4501 case AArch64::LDRSBXroX:
4502 case AArch64::LDRSHXroX:
4503 case AArch64::LDRSWroX:
4504 case AArch64::LDRSBWroW:
4505 case AArch64::LDRSHWroW:
4506 case AArch64::LDRSBXroW:
4507 case AArch64::LDRSHXroW:
4508 case AArch64::LDRSWroW:
4509 return true;
4510 }
4511 }
4512
optimizeIntExtLoad(const Instruction * I,MVT RetVT,MVT SrcVT)4513 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT,
4514 MVT SrcVT) {
4515 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0));
4516 if (!LI || !LI->hasOneUse())
4517 return false;
4518
4519 // Check if the load instruction has already been selected.
4520 Register Reg = lookUpRegForValue(LI);
4521 if (!Reg)
4522 return false;
4523
4524 MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
4525 if (!MI)
4526 return false;
4527
4528 // Check if the correct load instruction has been emitted - SelectionDAG might
4529 // have emitted a zero-extending load, but we need a sign-extending load.
4530 bool IsZExt = isa<ZExtInst>(I);
4531 const auto *LoadMI = MI;
4532 if (LoadMI->getOpcode() == TargetOpcode::COPY &&
4533 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) {
4534 Register LoadReg = MI->getOperand(1).getReg();
4535 LoadMI = MRI.getUniqueVRegDef(LoadReg);
4536 assert(LoadMI && "Expected valid instruction");
4537 }
4538 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI)))
4539 return false;
4540
4541 // Nothing to be done.
4542 if (RetVT != MVT::i64 || SrcVT > MVT::i32) {
4543 updateValueMap(I, Reg);
4544 return true;
4545 }
4546
4547 if (IsZExt) {
4548 Register Reg64 = createResultReg(&AArch64::GPR64RegClass);
4549 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4550 TII.get(AArch64::SUBREG_TO_REG), Reg64)
4551 .addImm(0)
4552 .addReg(Reg, getKillRegState(true))
4553 .addImm(AArch64::sub_32);
4554 Reg = Reg64;
4555 } else {
4556 assert((MI->getOpcode() == TargetOpcode::COPY &&
4557 MI->getOperand(1).getSubReg() == AArch64::sub_32) &&
4558 "Expected copy instruction");
4559 Reg = MI->getOperand(1).getReg();
4560 MachineBasicBlock::iterator I(MI);
4561 removeDeadCode(I, std::next(I));
4562 }
4563 updateValueMap(I, Reg);
4564 return true;
4565 }
4566
selectIntExt(const Instruction * I)4567 bool AArch64FastISel::selectIntExt(const Instruction *I) {
4568 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) &&
4569 "Unexpected integer extend instruction.");
4570 MVT RetVT;
4571 MVT SrcVT;
4572 if (!isTypeSupported(I->getType(), RetVT))
4573 return false;
4574
4575 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT))
4576 return false;
4577
4578 // Try to optimize already sign-/zero-extended values from load instructions.
4579 if (optimizeIntExtLoad(I, RetVT, SrcVT))
4580 return true;
4581
4582 Register SrcReg = getRegForValue(I->getOperand(0));
4583 if (!SrcReg)
4584 return false;
4585
4586 // Try to optimize already sign-/zero-extended values from function arguments.
4587 bool IsZExt = isa<ZExtInst>(I);
4588 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) {
4589 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) {
4590 if (RetVT == MVT::i64 && SrcVT != MVT::i64) {
4591 Register ResultReg = createResultReg(&AArch64::GPR64RegClass);
4592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD,
4593 TII.get(AArch64::SUBREG_TO_REG), ResultReg)
4594 .addImm(0)
4595 .addReg(SrcReg)
4596 .addImm(AArch64::sub_32);
4597 SrcReg = ResultReg;
4598 }
4599
4600 updateValueMap(I, SrcReg);
4601 return true;
4602 }
4603 }
4604
4605 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt);
4606 if (!ResultReg)
4607 return false;
4608
4609 updateValueMap(I, ResultReg);
4610 return true;
4611 }
4612
selectRem(const Instruction * I,unsigned ISDOpcode)4613 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) {
4614 EVT DestEVT = TLI.getValueType(DL, I->getType(), true);
4615 if (!DestEVT.isSimple())
4616 return false;
4617
4618 MVT DestVT = DestEVT.getSimpleVT();
4619 if (DestVT != MVT::i64 && DestVT != MVT::i32)
4620 return false;
4621
4622 unsigned DivOpc;
4623 bool Is64bit = (DestVT == MVT::i64);
4624 switch (ISDOpcode) {
4625 default:
4626 return false;
4627 case ISD::SREM:
4628 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr;
4629 break;
4630 case ISD::UREM:
4631 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr;
4632 break;
4633 }
4634 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr;
4635 Register Src0Reg = getRegForValue(I->getOperand(0));
4636 if (!Src0Reg)
4637 return false;
4638
4639 Register Src1Reg = getRegForValue(I->getOperand(1));
4640 if (!Src1Reg)
4641 return false;
4642
4643 const TargetRegisterClass *RC =
4644 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
4645 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg);
4646 assert(QuotReg && "Unexpected DIV instruction emission failure.");
4647 // The remainder is computed as numerator - (quotient * denominator) using the
4648 // MSUB instruction.
4649 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg);
4650 updateValueMap(I, ResultReg);
4651 return true;
4652 }
4653
selectMul(const Instruction * I)4654 bool AArch64FastISel::selectMul(const Instruction *I) {
4655 MVT VT;
4656 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true))
4657 return false;
4658
4659 if (VT.isVector())
4660 return selectBinaryOp(I, ISD::MUL);
4661
4662 const Value *Src0 = I->getOperand(0);
4663 const Value *Src1 = I->getOperand(1);
4664 if (const auto *C = dyn_cast<ConstantInt>(Src0))
4665 if (C->getValue().isPowerOf2())
4666 std::swap(Src0, Src1);
4667
4668 // Try to simplify to a shift instruction.
4669 if (const auto *C = dyn_cast<ConstantInt>(Src1))
4670 if (C->getValue().isPowerOf2()) {
4671 uint64_t ShiftVal = C->getValue().logBase2();
4672 MVT SrcVT = VT;
4673 bool IsZExt = true;
4674 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) {
4675 if (!isIntExtFree(ZExt)) {
4676 MVT VT;
4677 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) {
4678 SrcVT = VT;
4679 IsZExt = true;
4680 Src0 = ZExt->getOperand(0);
4681 }
4682 }
4683 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) {
4684 if (!isIntExtFree(SExt)) {
4685 MVT VT;
4686 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) {
4687 SrcVT = VT;
4688 IsZExt = false;
4689 Src0 = SExt->getOperand(0);
4690 }
4691 }
4692 }
4693
4694 Register Src0Reg = getRegForValue(Src0);
4695 if (!Src0Reg)
4696 return false;
4697
4698 unsigned ResultReg =
4699 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt);
4700
4701 if (ResultReg) {
4702 updateValueMap(I, ResultReg);
4703 return true;
4704 }
4705 }
4706
4707 Register Src0Reg = getRegForValue(I->getOperand(0));
4708 if (!Src0Reg)
4709 return false;
4710
4711 Register Src1Reg = getRegForValue(I->getOperand(1));
4712 if (!Src1Reg)
4713 return false;
4714
4715 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg);
4716
4717 if (!ResultReg)
4718 return false;
4719
4720 updateValueMap(I, ResultReg);
4721 return true;
4722 }
4723
selectShift(const Instruction * I)4724 bool AArch64FastISel::selectShift(const Instruction *I) {
4725 MVT RetVT;
4726 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true))
4727 return false;
4728
4729 if (RetVT.isVector())
4730 return selectOperator(I, I->getOpcode());
4731
4732 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) {
4733 unsigned ResultReg = 0;
4734 uint64_t ShiftVal = C->getZExtValue();
4735 MVT SrcVT = RetVT;
4736 bool IsZExt = I->getOpcode() != Instruction::AShr;
4737 const Value *Op0 = I->getOperand(0);
4738 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) {
4739 if (!isIntExtFree(ZExt)) {
4740 MVT TmpVT;
4741 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) {
4742 SrcVT = TmpVT;
4743 IsZExt = true;
4744 Op0 = ZExt->getOperand(0);
4745 }
4746 }
4747 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) {
4748 if (!isIntExtFree(SExt)) {
4749 MVT TmpVT;
4750 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) {
4751 SrcVT = TmpVT;
4752 IsZExt = false;
4753 Op0 = SExt->getOperand(0);
4754 }
4755 }
4756 }
4757
4758 Register Op0Reg = getRegForValue(Op0);
4759 if (!Op0Reg)
4760 return false;
4761
4762 switch (I->getOpcode()) {
4763 default: llvm_unreachable("Unexpected instruction.");
4764 case Instruction::Shl:
4765 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4766 break;
4767 case Instruction::AShr:
4768 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4769 break;
4770 case Instruction::LShr:
4771 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt);
4772 break;
4773 }
4774 if (!ResultReg)
4775 return false;
4776
4777 updateValueMap(I, ResultReg);
4778 return true;
4779 }
4780
4781 Register Op0Reg = getRegForValue(I->getOperand(0));
4782 if (!Op0Reg)
4783 return false;
4784
4785 Register Op1Reg = getRegForValue(I->getOperand(1));
4786 if (!Op1Reg)
4787 return false;
4788
4789 unsigned ResultReg = 0;
4790 switch (I->getOpcode()) {
4791 default: llvm_unreachable("Unexpected instruction.");
4792 case Instruction::Shl:
4793 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg);
4794 break;
4795 case Instruction::AShr:
4796 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg);
4797 break;
4798 case Instruction::LShr:
4799 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg);
4800 break;
4801 }
4802
4803 if (!ResultReg)
4804 return false;
4805
4806 updateValueMap(I, ResultReg);
4807 return true;
4808 }
4809
selectBitCast(const Instruction * I)4810 bool AArch64FastISel::selectBitCast(const Instruction *I) {
4811 MVT RetVT, SrcVT;
4812
4813 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT))
4814 return false;
4815 if (!isTypeLegal(I->getType(), RetVT))
4816 return false;
4817
4818 unsigned Opc;
4819 if (RetVT == MVT::f32 && SrcVT == MVT::i32)
4820 Opc = AArch64::FMOVWSr;
4821 else if (RetVT == MVT::f64 && SrcVT == MVT::i64)
4822 Opc = AArch64::FMOVXDr;
4823 else if (RetVT == MVT::i32 && SrcVT == MVT::f32)
4824 Opc = AArch64::FMOVSWr;
4825 else if (RetVT == MVT::i64 && SrcVT == MVT::f64)
4826 Opc = AArch64::FMOVDXr;
4827 else
4828 return false;
4829
4830 const TargetRegisterClass *RC = nullptr;
4831 switch (RetVT.SimpleTy) {
4832 default: llvm_unreachable("Unexpected value type.");
4833 case MVT::i32: RC = &AArch64::GPR32RegClass; break;
4834 case MVT::i64: RC = &AArch64::GPR64RegClass; break;
4835 case MVT::f32: RC = &AArch64::FPR32RegClass; break;
4836 case MVT::f64: RC = &AArch64::FPR64RegClass; break;
4837 }
4838 Register Op0Reg = getRegForValue(I->getOperand(0));
4839 if (!Op0Reg)
4840 return false;
4841
4842 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg);
4843 if (!ResultReg)
4844 return false;
4845
4846 updateValueMap(I, ResultReg);
4847 return true;
4848 }
4849
selectFRem(const Instruction * I)4850 bool AArch64FastISel::selectFRem(const Instruction *I) {
4851 MVT RetVT;
4852 if (!isTypeLegal(I->getType(), RetVT))
4853 return false;
4854
4855 RTLIB::Libcall LC;
4856 switch (RetVT.SimpleTy) {
4857 default:
4858 return false;
4859 case MVT::f32:
4860 LC = RTLIB::REM_F32;
4861 break;
4862 case MVT::f64:
4863 LC = RTLIB::REM_F64;
4864 break;
4865 }
4866
4867 ArgListTy Args;
4868 Args.reserve(I->getNumOperands());
4869
4870 // Populate the argument list.
4871 for (auto &Arg : I->operands()) {
4872 ArgListEntry Entry;
4873 Entry.Val = Arg;
4874 Entry.Ty = Arg->getType();
4875 Args.push_back(Entry);
4876 }
4877
4878 CallLoweringInfo CLI;
4879 MCContext &Ctx = MF->getContext();
4880 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(),
4881 TLI.getLibcallName(LC), std::move(Args));
4882 if (!lowerCallTo(CLI))
4883 return false;
4884 updateValueMap(I, CLI.ResultReg);
4885 return true;
4886 }
4887
selectSDiv(const Instruction * I)4888 bool AArch64FastISel::selectSDiv(const Instruction *I) {
4889 MVT VT;
4890 if (!isTypeLegal(I->getType(), VT))
4891 return false;
4892
4893 if (!isa<ConstantInt>(I->getOperand(1)))
4894 return selectBinaryOp(I, ISD::SDIV);
4895
4896 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
4897 if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
4898 !(C.isPowerOf2() || C.isNegatedPowerOf2()))
4899 return selectBinaryOp(I, ISD::SDIV);
4900
4901 unsigned Lg2 = C.countr_zero();
4902 Register Src0Reg = getRegForValue(I->getOperand(0));
4903 if (!Src0Reg)
4904 return false;
4905
4906 if (cast<BinaryOperator>(I)->isExact()) {
4907 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2);
4908 if (!ResultReg)
4909 return false;
4910 updateValueMap(I, ResultReg);
4911 return true;
4912 }
4913
4914 int64_t Pow2MinusOne = (1ULL << Lg2) - 1;
4915 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne);
4916 if (!AddReg)
4917 return false;
4918
4919 // (Src0 < 0) ? Pow2 - 1 : 0;
4920 if (!emitICmp_ri(VT, Src0Reg, 0))
4921 return false;
4922
4923 unsigned SelectOpc;
4924 const TargetRegisterClass *RC;
4925 if (VT == MVT::i64) {
4926 SelectOpc = AArch64::CSELXr;
4927 RC = &AArch64::GPR64RegClass;
4928 } else {
4929 SelectOpc = AArch64::CSELWr;
4930 RC = &AArch64::GPR32RegClass;
4931 }
4932 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg,
4933 AArch64CC::LT);
4934 if (!SelectReg)
4935 return false;
4936
4937 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
4938 // negate the result.
4939 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
4940 unsigned ResultReg;
4941 if (C.isNegative())
4942 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg,
4943 AArch64_AM::ASR, Lg2);
4944 else
4945 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2);
4946
4947 if (!ResultReg)
4948 return false;
4949
4950 updateValueMap(I, ResultReg);
4951 return true;
4952 }
4953
4954 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We
4955 /// have to duplicate it for AArch64, because otherwise we would fail during the
4956 /// sign-extend emission.
getRegForGEPIndex(const Value * Idx)4957 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) {
4958 Register IdxN = getRegForValue(Idx);
4959 if (IdxN == 0)
4960 // Unhandled operand. Halt "fast" selection and bail.
4961 return 0;
4962
4963 // If the index is smaller or larger than intptr_t, truncate or extend it.
4964 MVT PtrVT = TLI.getPointerTy(DL);
4965 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false);
4966 if (IdxVT.bitsLT(PtrVT)) {
4967 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false);
4968 } else if (IdxVT.bitsGT(PtrVT))
4969 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64");
4970 return IdxN;
4971 }
4972
4973 /// This is mostly a copy of the existing FastISel GEP code, but we have to
4974 /// duplicate it for AArch64, because otherwise we would bail out even for
4975 /// simple cases. This is because the standard fastEmit functions don't cover
4976 /// MUL at all and ADD is lowered very inefficientily.
selectGetElementPtr(const Instruction * I)4977 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
4978 if (Subtarget->isTargetILP32())
4979 return false;
4980
4981 Register N = getRegForValue(I->getOperand(0));
4982 if (!N)
4983 return false;
4984
4985 // Keep a running tab of the total offset to coalesce multiple N = N + Offset
4986 // into a single N = N + TotalOffset.
4987 uint64_t TotalOffs = 0;
4988 MVT VT = TLI.getPointerTy(DL);
4989 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
4990 GTI != E; ++GTI) {
4991 const Value *Idx = GTI.getOperand();
4992 if (auto *StTy = GTI.getStructTypeOrNull()) {
4993 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
4994 // N = N + Offset
4995 if (Field)
4996 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field);
4997 } else {
4998 // If this is a constant subscript, handle it quickly.
4999 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
5000 if (CI->isZero())
5001 continue;
5002 // N = N + Offset
5003 TotalOffs += GTI.getSequentialElementStride(DL) *
5004 cast<ConstantInt>(CI)->getSExtValue();
5005 continue;
5006 }
5007 if (TotalOffs) {
5008 N = emitAdd_ri_(VT, N, TotalOffs);
5009 if (!N)
5010 return false;
5011 TotalOffs = 0;
5012 }
5013
5014 // N = N + Idx * ElementSize;
5015 uint64_t ElementSize = GTI.getSequentialElementStride(DL);
5016 unsigned IdxN = getRegForGEPIndex(Idx);
5017 if (!IdxN)
5018 return false;
5019
5020 if (ElementSize != 1) {
5021 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize);
5022 if (!C)
5023 return false;
5024 IdxN = emitMul_rr(VT, IdxN, C);
5025 if (!IdxN)
5026 return false;
5027 }
5028 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN);
5029 if (!N)
5030 return false;
5031 }
5032 }
5033 if (TotalOffs) {
5034 N = emitAdd_ri_(VT, N, TotalOffs);
5035 if (!N)
5036 return false;
5037 }
5038 updateValueMap(I, N);
5039 return true;
5040 }
5041
selectAtomicCmpXchg(const AtomicCmpXchgInst * I)5042 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
5043 assert(TM.getOptLevel() == CodeGenOptLevel::None &&
5044 "cmpxchg survived AtomicExpand at optlevel > -O0");
5045
5046 auto *RetPairTy = cast<StructType>(I->getType());
5047 Type *RetTy = RetPairTy->getTypeAtIndex(0U);
5048 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
5049 "cmpxchg has a non-i1 status result");
5050
5051 MVT VT;
5052 if (!isTypeLegal(RetTy, VT))
5053 return false;
5054
5055 const TargetRegisterClass *ResRC;
5056 unsigned Opc, CmpOpc;
5057 // This only supports i32/i64, because i8/i16 aren't legal, and the generic
5058 // extractvalue selection doesn't support that.
5059 if (VT == MVT::i32) {
5060 Opc = AArch64::CMP_SWAP_32;
5061 CmpOpc = AArch64::SUBSWrs;
5062 ResRC = &AArch64::GPR32RegClass;
5063 } else if (VT == MVT::i64) {
5064 Opc = AArch64::CMP_SWAP_64;
5065 CmpOpc = AArch64::SUBSXrs;
5066 ResRC = &AArch64::GPR64RegClass;
5067 } else {
5068 return false;
5069 }
5070
5071 const MCInstrDesc &II = TII.get(Opc);
5072
5073 const Register AddrReg = constrainOperandRegClass(
5074 II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
5075 const Register DesiredReg = constrainOperandRegClass(
5076 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
5077 const Register NewReg = constrainOperandRegClass(
5078 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
5079
5080 const Register ResultReg1 = createResultReg(ResRC);
5081 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
5082 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass);
5083
5084 // FIXME: MachineMemOperand doesn't support cmpxchg yet.
5085 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II)
5086 .addDef(ResultReg1)
5087 .addDef(ScratchReg)
5088 .addUse(AddrReg)
5089 .addUse(DesiredReg)
5090 .addUse(NewReg);
5091
5092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc))
5093 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
5094 .addUse(ResultReg1)
5095 .addUse(DesiredReg)
5096 .addImm(0);
5097
5098 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr))
5099 .addDef(ResultReg2)
5100 .addUse(AArch64::WZR)
5101 .addUse(AArch64::WZR)
5102 .addImm(AArch64CC::NE);
5103
5104 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
5105 updateValueMap(I, ResultReg1, 2);
5106 return true;
5107 }
5108
fastSelectInstruction(const Instruction * I)5109 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
5110 if (TLI.fallBackToDAGISel(*I))
5111 return false;
5112 switch (I->getOpcode()) {
5113 default:
5114 break;
5115 case Instruction::Add:
5116 case Instruction::Sub:
5117 return selectAddSub(I);
5118 case Instruction::Mul:
5119 return selectMul(I);
5120 case Instruction::SDiv:
5121 return selectSDiv(I);
5122 case Instruction::SRem:
5123 if (!selectBinaryOp(I, ISD::SREM))
5124 return selectRem(I, ISD::SREM);
5125 return true;
5126 case Instruction::URem:
5127 if (!selectBinaryOp(I, ISD::UREM))
5128 return selectRem(I, ISD::UREM);
5129 return true;
5130 case Instruction::Shl:
5131 case Instruction::LShr:
5132 case Instruction::AShr:
5133 return selectShift(I);
5134 case Instruction::And:
5135 case Instruction::Or:
5136 case Instruction::Xor:
5137 return selectLogicalOp(I);
5138 case Instruction::Br:
5139 return selectBranch(I);
5140 case Instruction::IndirectBr:
5141 return selectIndirectBr(I);
5142 case Instruction::BitCast:
5143 if (!FastISel::selectBitCast(I))
5144 return selectBitCast(I);
5145 return true;
5146 case Instruction::FPToSI:
5147 if (!selectCast(I, ISD::FP_TO_SINT))
5148 return selectFPToInt(I, /*Signed=*/true);
5149 return true;
5150 case Instruction::FPToUI:
5151 return selectFPToInt(I, /*Signed=*/false);
5152 case Instruction::ZExt:
5153 case Instruction::SExt:
5154 return selectIntExt(I);
5155 case Instruction::Trunc:
5156 if (!selectCast(I, ISD::TRUNCATE))
5157 return selectTrunc(I);
5158 return true;
5159 case Instruction::FPExt:
5160 return selectFPExt(I);
5161 case Instruction::FPTrunc:
5162 return selectFPTrunc(I);
5163 case Instruction::SIToFP:
5164 if (!selectCast(I, ISD::SINT_TO_FP))
5165 return selectIntToFP(I, /*Signed=*/true);
5166 return true;
5167 case Instruction::UIToFP:
5168 return selectIntToFP(I, /*Signed=*/false);
5169 case Instruction::Load:
5170 return selectLoad(I);
5171 case Instruction::Store:
5172 return selectStore(I);
5173 case Instruction::FCmp:
5174 case Instruction::ICmp:
5175 return selectCmp(I);
5176 case Instruction::Select:
5177 return selectSelect(I);
5178 case Instruction::Ret:
5179 return selectRet(I);
5180 case Instruction::FRem:
5181 return selectFRem(I);
5182 case Instruction::GetElementPtr:
5183 return selectGetElementPtr(I);
5184 case Instruction::AtomicCmpXchg:
5185 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
5186 }
5187
5188 // fall-back to target-independent instruction selection.
5189 return selectOperator(I, I->getOpcode());
5190 }
5191
createFastISel(FunctionLoweringInfo & FuncInfo,const TargetLibraryInfo * LibInfo)5192 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
5193 const TargetLibraryInfo *LibInfo) {
5194
5195 SMEAttrs CallerAttrs(*FuncInfo.Fn);
5196 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() ||
5197 CallerAttrs.hasStreamingInterfaceOrBody() ||
5198 CallerAttrs.hasStreamingCompatibleInterface())
5199 return nullptr;
5200 return new AArch64FastISel(FuncInfo, LibInfo);
5201 }
5202