1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64RegisterInfo.h" 18 #include "AArch64Subtarget.h" 19 #include "MCTargetDesc/AArch64AddressingModes.h" 20 #include "Utils/AArch64BaseInfo.h" 21 #include "llvm/ADT/APFloat.h" 22 #include "llvm/ADT/APInt.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/Analysis/BranchProbabilityInfo.h" 26 #include "llvm/CodeGen/CallingConvLower.h" 27 #include "llvm/CodeGen/FastISel.h" 28 #include "llvm/CodeGen/FunctionLoweringInfo.h" 29 #include "llvm/CodeGen/ISDOpcodes.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineConstantPool.h" 32 #include "llvm/CodeGen/MachineFrameInfo.h" 33 #include "llvm/CodeGen/MachineInstr.h" 34 #include "llvm/CodeGen/MachineInstrBuilder.h" 35 #include "llvm/CodeGen/MachineMemOperand.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/RuntimeLibcalls.h" 38 #include "llvm/CodeGen/ValueTypes.h" 39 #include "llvm/IR/Argument.h" 40 #include "llvm/IR/Attributes.h" 41 #include "llvm/IR/BasicBlock.h" 42 #include "llvm/IR/CallingConv.h" 43 #include "llvm/IR/Constant.h" 44 #include "llvm/IR/Constants.h" 45 #include "llvm/IR/DataLayout.h" 46 #include "llvm/IR/DerivedTypes.h" 47 #include "llvm/IR/Function.h" 48 #include "llvm/IR/GetElementPtrTypeIterator.h" 49 #include "llvm/IR/GlobalValue.h" 50 #include "llvm/IR/InstrTypes.h" 51 #include "llvm/IR/Instruction.h" 52 #include "llvm/IR/Instructions.h" 53 #include "llvm/IR/IntrinsicInst.h" 54 #include "llvm/IR/Intrinsics.h" 55 #include "llvm/IR/Operator.h" 56 #include "llvm/IR/Type.h" 57 #include "llvm/IR/User.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/MC/MCInstrDesc.h" 60 #include "llvm/MC/MCRegisterInfo.h" 61 #include "llvm/MC/MCSymbol.h" 62 #include "llvm/Support/AtomicOrdering.h" 63 #include "llvm/Support/Casting.h" 64 #include "llvm/Support/CodeGen.h" 65 #include "llvm/Support/Compiler.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/MachineValueType.h" 68 #include "llvm/Support/MathExtras.h" 69 #include <algorithm> 70 #include <cassert> 71 #include <cstdint> 72 #include <iterator> 73 #include <utility> 74 75 using namespace llvm; 76 77 namespace { 78 79 class AArch64FastISel final : public FastISel { 80 class Address { 81 public: 82 using BaseKind = enum { 83 RegBase, 84 FrameIndexBase 85 }; 86 87 private: 88 BaseKind Kind = RegBase; 89 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 90 union { 91 unsigned Reg; 92 int FI; 93 } Base; 94 unsigned OffsetReg = 0; 95 unsigned Shift = 0; 96 int64_t Offset = 0; 97 const GlobalValue *GV = nullptr; 98 99 public: 100 Address() { Base.Reg = 0; } 101 102 void setKind(BaseKind K) { Kind = K; } 103 BaseKind getKind() const { return Kind; } 104 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 105 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 106 bool isRegBase() const { return Kind == RegBase; } 107 bool isFIBase() const { return Kind == FrameIndexBase; } 108 109 void setReg(unsigned Reg) { 110 assert(isRegBase() && "Invalid base register access!"); 111 Base.Reg = Reg; 112 } 113 114 unsigned getReg() const { 115 assert(isRegBase() && "Invalid base register access!"); 116 return Base.Reg; 117 } 118 119 void setOffsetReg(unsigned Reg) { 120 OffsetReg = Reg; 121 } 122 123 unsigned getOffsetReg() const { 124 return OffsetReg; 125 } 126 127 void setFI(unsigned FI) { 128 assert(isFIBase() && "Invalid base frame index access!"); 129 Base.FI = FI; 130 } 131 132 unsigned getFI() const { 133 assert(isFIBase() && "Invalid base frame index access!"); 134 return Base.FI; 135 } 136 137 void setOffset(int64_t O) { Offset = O; } 138 int64_t getOffset() { return Offset; } 139 void setShift(unsigned S) { Shift = S; } 140 unsigned getShift() { return Shift; } 141 142 void setGlobalValue(const GlobalValue *G) { GV = G; } 143 const GlobalValue *getGlobalValue() { return GV; } 144 }; 145 146 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 147 /// make the right decision when generating code for different targets. 148 const AArch64Subtarget *Subtarget; 149 LLVMContext *Context; 150 151 bool fastLowerArguments() override; 152 bool fastLowerCall(CallLoweringInfo &CLI) override; 153 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 154 155 private: 156 // Selection routines. 157 bool selectAddSub(const Instruction *I); 158 bool selectLogicalOp(const Instruction *I); 159 bool selectLoad(const Instruction *I); 160 bool selectStore(const Instruction *I); 161 bool selectBranch(const Instruction *I); 162 bool selectIndirectBr(const Instruction *I); 163 bool selectCmp(const Instruction *I); 164 bool selectSelect(const Instruction *I); 165 bool selectFPExt(const Instruction *I); 166 bool selectFPTrunc(const Instruction *I); 167 bool selectFPToInt(const Instruction *I, bool Signed); 168 bool selectIntToFP(const Instruction *I, bool Signed); 169 bool selectRem(const Instruction *I, unsigned ISDOpcode); 170 bool selectRet(const Instruction *I); 171 bool selectTrunc(const Instruction *I); 172 bool selectIntExt(const Instruction *I); 173 bool selectMul(const Instruction *I); 174 bool selectShift(const Instruction *I); 175 bool selectBitCast(const Instruction *I); 176 bool selectFRem(const Instruction *I); 177 bool selectSDiv(const Instruction *I); 178 bool selectGetElementPtr(const Instruction *I); 179 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 180 181 // Utility helper routines. 182 bool isTypeLegal(Type *Ty, MVT &VT); 183 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 184 bool isValueAvailable(const Value *V) const; 185 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 186 bool computeCallAddress(const Value *V, Address &Addr); 187 bool simplifyAddress(Address &Addr, MVT VT); 188 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 189 MachineMemOperand::Flags Flags, 190 unsigned ScaleFactor, MachineMemOperand *MMO); 191 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 192 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 193 unsigned Alignment); 194 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 195 const Value *Cond); 196 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 197 bool optimizeSelect(const SelectInst *SI); 198 std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); 199 200 // Emit helper routines. 201 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 202 const Value *RHS, bool SetFlags = false, 203 bool WantResult = true, bool IsZExt = false); 204 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 205 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 206 bool SetFlags = false, bool WantResult = true); 207 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 208 bool LHSIsKill, uint64_t Imm, bool SetFlags = false, 209 bool WantResult = true); 210 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 211 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 212 AArch64_AM::ShiftExtendType ShiftType, 213 uint64_t ShiftImm, bool SetFlags = false, 214 bool WantResult = true); 215 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 216 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 217 AArch64_AM::ShiftExtendType ExtType, 218 uint64_t ShiftImm, bool SetFlags = false, 219 bool WantResult = true); 220 221 // Emit functions. 222 bool emitCompareAndBranch(const BranchInst *BI); 223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 228 MachineMemOperand *MMO = nullptr); 229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 230 MachineMemOperand *MMO = nullptr); 231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 232 MachineMemOperand *MMO = nullptr); 233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 236 bool SetFlags = false, bool WantResult = true, 237 bool IsZExt = false); 238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); 239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 240 bool SetFlags = false, bool WantResult = true, 241 bool IsZExt = false); 242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 243 unsigned RHSReg, bool RHSIsKill, bool WantResult = true); 244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 245 unsigned RHSReg, bool RHSIsKill, 246 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 247 bool WantResult = true); 248 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 249 const Value *RHS); 250 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 251 bool LHSIsKill, uint64_t Imm); 252 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 253 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 254 uint64_t ShiftImm); 255 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 256 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 257 unsigned Op1, bool Op1IsKill); 258 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 259 unsigned Op1, bool Op1IsKill); 260 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 261 unsigned Op1, bool Op1IsKill); 262 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 263 unsigned Op1Reg, bool Op1IsKill); 264 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 265 uint64_t Imm, bool IsZExt = true); 266 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 267 unsigned Op1Reg, bool Op1IsKill); 268 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 269 uint64_t Imm, bool IsZExt = true); 270 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 271 unsigned Op1Reg, bool Op1IsKill); 272 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 273 uint64_t Imm, bool IsZExt = false); 274 275 unsigned materializeInt(const ConstantInt *CI, MVT VT); 276 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 277 unsigned materializeGV(const GlobalValue *GV); 278 279 // Call handling routines. 280 private: 281 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 282 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 283 unsigned &NumBytes); 284 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 285 286 public: 287 // Backend specific FastISel code. 288 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 289 unsigned fastMaterializeConstant(const Constant *C) override; 290 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 291 292 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 293 const TargetLibraryInfo *LibInfo) 294 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 295 Subtarget = 296 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); 297 Context = &FuncInfo.Fn->getContext(); 298 } 299 300 bool fastSelectInstruction(const Instruction *I) override; 301 302 #include "AArch64GenFastISel.inc" 303 }; 304 305 } // end anonymous namespace 306 307 /// Check if the sign-/zero-extend will be a noop. 308 static bool isIntExtFree(const Instruction *I) { 309 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 310 "Unexpected integer extend instruction."); 311 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 312 "Unexpected value type."); 313 bool IsZExt = isa<ZExtInst>(I); 314 315 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 316 if (LI->hasOneUse()) 317 return true; 318 319 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 320 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 321 return true; 322 323 return false; 324 } 325 326 /// Determine the implicit scale factor that is applied by a memory 327 /// operation for a given value type. 328 static unsigned getImplicitScaleFactor(MVT VT) { 329 switch (VT.SimpleTy) { 330 default: 331 return 0; // invalid 332 case MVT::i1: // fall-through 333 case MVT::i8: 334 return 1; 335 case MVT::i16: 336 return 2; 337 case MVT::i32: // fall-through 338 case MVT::f32: 339 return 4; 340 case MVT::i64: // fall-through 341 case MVT::f64: 342 return 8; 343 } 344 } 345 346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 347 if (CC == CallingConv::WebKit_JS) 348 return CC_AArch64_WebKit_JS; 349 if (CC == CallingConv::GHC) 350 return CC_AArch64_GHC; 351 if (CC == CallingConv::CFGuard_Check) 352 return CC_AArch64_Win64_CFGuard_Check; 353 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 354 } 355 356 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 357 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 358 "Alloca should always return a pointer."); 359 360 // Don't handle dynamic allocas. 361 if (!FuncInfo.StaticAllocaMap.count(AI)) 362 return 0; 363 364 DenseMap<const AllocaInst *, int>::iterator SI = 365 FuncInfo.StaticAllocaMap.find(AI); 366 367 if (SI != FuncInfo.StaticAllocaMap.end()) { 368 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 369 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 370 ResultReg) 371 .addFrameIndex(SI->second) 372 .addImm(0) 373 .addImm(0); 374 return ResultReg; 375 } 376 377 return 0; 378 } 379 380 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 381 if (VT > MVT::i64) 382 return 0; 383 384 if (!CI->isZero()) 385 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 386 387 // Create a copy from the zero register to materialize a "0" value. 388 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 389 : &AArch64::GPR32RegClass; 390 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 391 unsigned ResultReg = createResultReg(RC); 392 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 393 ResultReg).addReg(ZeroReg, getKillRegState(true)); 394 return ResultReg; 395 } 396 397 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 398 // Positive zero (+0.0) has to be materialized with a fmov from the zero 399 // register, because the immediate version of fmov cannot encode zero. 400 if (CFP->isNullValue()) 401 return fastMaterializeFloatZero(CFP); 402 403 if (VT != MVT::f32 && VT != MVT::f64) 404 return 0; 405 406 const APFloat Val = CFP->getValueAPF(); 407 bool Is64Bit = (VT == MVT::f64); 408 // This checks to see if we can use FMOV instructions to materialize 409 // a constant, otherwise we have to materialize via the constant pool. 410 int Imm = 411 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 412 if (Imm != -1) { 413 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 414 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 415 } 416 417 // For the MachO large code model materialize the FP constant in code. 418 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { 419 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 420 const TargetRegisterClass *RC = Is64Bit ? 421 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 422 423 unsigned TmpReg = createResultReg(RC); 424 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) 425 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 426 427 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 428 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 429 TII.get(TargetOpcode::COPY), ResultReg) 430 .addReg(TmpReg, getKillRegState(true)); 431 432 return ResultReg; 433 } 434 435 // Materialize via constant pool. MachineConstantPool wants an explicit 436 // alignment. 437 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 438 439 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 440 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 442 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 443 444 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 445 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 447 .addReg(ADRPReg) 448 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 449 return ResultReg; 450 } 451 452 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 453 // We can't handle thread-local variables quickly yet. 454 if (GV->isThreadLocal()) 455 return 0; 456 457 // MachO still uses GOT for large code-model accesses, but ELF requires 458 // movz/movk sequences, which FastISel doesn't handle yet. 459 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 460 return 0; 461 462 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 463 464 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 465 if (!DestEVT.isSimple()) 466 return 0; 467 468 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 469 unsigned ResultReg; 470 471 if (OpFlags & AArch64II::MO_GOT) { 472 // ADRP + LDRX 473 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 474 ADRPReg) 475 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 476 477 unsigned LdrOpc; 478 if (Subtarget->isTargetILP32()) { 479 ResultReg = createResultReg(&AArch64::GPR32RegClass); 480 LdrOpc = AArch64::LDRWui; 481 } else { 482 ResultReg = createResultReg(&AArch64::GPR64RegClass); 483 LdrOpc = AArch64::LDRXui; 484 } 485 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), 486 ResultReg) 487 .addReg(ADRPReg) 488 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 489 AArch64II::MO_NC | OpFlags); 490 if (!Subtarget->isTargetILP32()) 491 return ResultReg; 492 493 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 494 // so we must extend the result on ILP32. 495 unsigned Result64 = createResultReg(&AArch64::GPR64RegClass); 496 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 497 TII.get(TargetOpcode::SUBREG_TO_REG)) 498 .addDef(Result64) 499 .addImm(0) 500 .addReg(ResultReg, RegState::Kill) 501 .addImm(AArch64::sub_32); 502 return Result64; 503 } else { 504 // ADRP + ADDX 505 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 506 ADRPReg) 507 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 508 509 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 510 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 511 ResultReg) 512 .addReg(ADRPReg) 513 .addGlobalAddress(GV, 0, 514 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 515 .addImm(0); 516 } 517 return ResultReg; 518 } 519 520 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 521 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 522 523 // Only handle simple types. 524 if (!CEVT.isSimple()) 525 return 0; 526 MVT VT = CEVT.getSimpleVT(); 527 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 528 // 'null' pointers need to have a somewhat special treatment. 529 if (const auto *CPN = dyn_cast<ConstantPointerNull>(C)) { 530 (void)CPN; 531 assert(CPN->getType()->getPointerAddressSpace() == 0 && 532 "Unexpected address space"); 533 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 534 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 535 } 536 537 if (const auto *CI = dyn_cast<ConstantInt>(C)) 538 return materializeInt(CI, VT); 539 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 540 return materializeFP(CFP, VT); 541 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 542 return materializeGV(GV); 543 544 return 0; 545 } 546 547 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 548 assert(CFP->isNullValue() && 549 "Floating-point constant is not a positive zero."); 550 MVT VT; 551 if (!isTypeLegal(CFP->getType(), VT)) 552 return 0; 553 554 if (VT != MVT::f32 && VT != MVT::f64) 555 return 0; 556 557 bool Is64Bit = (VT == MVT::f64); 558 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 559 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 560 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); 561 } 562 563 /// Check if the multiply is by a power-of-2 constant. 564 static bool isMulPowOf2(const Value *I) { 565 if (const auto *MI = dyn_cast<MulOperator>(I)) { 566 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 567 if (C->getValue().isPowerOf2()) 568 return true; 569 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 570 if (C->getValue().isPowerOf2()) 571 return true; 572 } 573 return false; 574 } 575 576 // Computes the address to get to an object. 577 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 578 { 579 const User *U = nullptr; 580 unsigned Opcode = Instruction::UserOp1; 581 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 582 // Don't walk into other basic blocks unless the object is an alloca from 583 // another block, otherwise it may not have a virtual register assigned. 584 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 585 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 586 Opcode = I->getOpcode(); 587 U = I; 588 } 589 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 590 Opcode = C->getOpcode(); 591 U = C; 592 } 593 594 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 595 if (Ty->getAddressSpace() > 255) 596 // Fast instruction selection doesn't support the special 597 // address spaces. 598 return false; 599 600 switch (Opcode) { 601 default: 602 break; 603 case Instruction::BitCast: 604 // Look through bitcasts. 605 return computeAddress(U->getOperand(0), Addr, Ty); 606 607 case Instruction::IntToPtr: 608 // Look past no-op inttoptrs. 609 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 610 TLI.getPointerTy(DL)) 611 return computeAddress(U->getOperand(0), Addr, Ty); 612 break; 613 614 case Instruction::PtrToInt: 615 // Look past no-op ptrtoints. 616 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 617 return computeAddress(U->getOperand(0), Addr, Ty); 618 break; 619 620 case Instruction::GetElementPtr: { 621 Address SavedAddr = Addr; 622 uint64_t TmpOffset = Addr.getOffset(); 623 624 // Iterate through the GEP folding the constants into offsets where 625 // we can. 626 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 627 GTI != E; ++GTI) { 628 const Value *Op = GTI.getOperand(); 629 if (StructType *STy = GTI.getStructTypeOrNull()) { 630 const StructLayout *SL = DL.getStructLayout(STy); 631 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 632 TmpOffset += SL->getElementOffset(Idx); 633 } else { 634 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 635 while (true) { 636 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 637 // Constant-offset addressing. 638 TmpOffset += CI->getSExtValue() * S; 639 break; 640 } 641 if (canFoldAddIntoGEP(U, Op)) { 642 // A compatible add with a constant operand. Fold the constant. 643 ConstantInt *CI = 644 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 645 TmpOffset += CI->getSExtValue() * S; 646 // Iterate on the other operand. 647 Op = cast<AddOperator>(Op)->getOperand(0); 648 continue; 649 } 650 // Unsupported 651 goto unsupported_gep; 652 } 653 } 654 } 655 656 // Try to grab the base operand now. 657 Addr.setOffset(TmpOffset); 658 if (computeAddress(U->getOperand(0), Addr, Ty)) 659 return true; 660 661 // We failed, restore everything and try the other options. 662 Addr = SavedAddr; 663 664 unsupported_gep: 665 break; 666 } 667 case Instruction::Alloca: { 668 const AllocaInst *AI = cast<AllocaInst>(Obj); 669 DenseMap<const AllocaInst *, int>::iterator SI = 670 FuncInfo.StaticAllocaMap.find(AI); 671 if (SI != FuncInfo.StaticAllocaMap.end()) { 672 Addr.setKind(Address::FrameIndexBase); 673 Addr.setFI(SI->second); 674 return true; 675 } 676 break; 677 } 678 case Instruction::Add: { 679 // Adds of constants are common and easy enough. 680 const Value *LHS = U->getOperand(0); 681 const Value *RHS = U->getOperand(1); 682 683 if (isa<ConstantInt>(LHS)) 684 std::swap(LHS, RHS); 685 686 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 687 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 688 return computeAddress(LHS, Addr, Ty); 689 } 690 691 Address Backup = Addr; 692 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 693 return true; 694 Addr = Backup; 695 696 break; 697 } 698 case Instruction::Sub: { 699 // Subs of constants are common and easy enough. 700 const Value *LHS = U->getOperand(0); 701 const Value *RHS = U->getOperand(1); 702 703 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 704 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 705 return computeAddress(LHS, Addr, Ty); 706 } 707 break; 708 } 709 case Instruction::Shl: { 710 if (Addr.getOffsetReg()) 711 break; 712 713 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 714 if (!CI) 715 break; 716 717 unsigned Val = CI->getZExtValue(); 718 if (Val < 1 || Val > 3) 719 break; 720 721 uint64_t NumBytes = 0; 722 if (Ty && Ty->isSized()) { 723 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 724 NumBytes = NumBits / 8; 725 if (!isPowerOf2_64(NumBits)) 726 NumBytes = 0; 727 } 728 729 if (NumBytes != (1ULL << Val)) 730 break; 731 732 Addr.setShift(Val); 733 Addr.setExtendType(AArch64_AM::LSL); 734 735 const Value *Src = U->getOperand(0); 736 if (const auto *I = dyn_cast<Instruction>(Src)) { 737 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 738 // Fold the zext or sext when it won't become a noop. 739 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 740 if (!isIntExtFree(ZE) && 741 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 742 Addr.setExtendType(AArch64_AM::UXTW); 743 Src = ZE->getOperand(0); 744 } 745 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 746 if (!isIntExtFree(SE) && 747 SE->getOperand(0)->getType()->isIntegerTy(32)) { 748 Addr.setExtendType(AArch64_AM::SXTW); 749 Src = SE->getOperand(0); 750 } 751 } 752 } 753 } 754 755 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 756 if (AI->getOpcode() == Instruction::And) { 757 const Value *LHS = AI->getOperand(0); 758 const Value *RHS = AI->getOperand(1); 759 760 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 761 if (C->getValue() == 0xffffffff) 762 std::swap(LHS, RHS); 763 764 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 765 if (C->getValue() == 0xffffffff) { 766 Addr.setExtendType(AArch64_AM::UXTW); 767 unsigned Reg = getRegForValue(LHS); 768 if (!Reg) 769 return false; 770 bool RegIsKill = hasTrivialKill(LHS); 771 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 772 AArch64::sub_32); 773 Addr.setOffsetReg(Reg); 774 return true; 775 } 776 } 777 778 unsigned Reg = getRegForValue(Src); 779 if (!Reg) 780 return false; 781 Addr.setOffsetReg(Reg); 782 return true; 783 } 784 case Instruction::Mul: { 785 if (Addr.getOffsetReg()) 786 break; 787 788 if (!isMulPowOf2(U)) 789 break; 790 791 const Value *LHS = U->getOperand(0); 792 const Value *RHS = U->getOperand(1); 793 794 // Canonicalize power-of-2 value to the RHS. 795 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 796 if (C->getValue().isPowerOf2()) 797 std::swap(LHS, RHS); 798 799 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 800 const auto *C = cast<ConstantInt>(RHS); 801 unsigned Val = C->getValue().logBase2(); 802 if (Val < 1 || Val > 3) 803 break; 804 805 uint64_t NumBytes = 0; 806 if (Ty && Ty->isSized()) { 807 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 808 NumBytes = NumBits / 8; 809 if (!isPowerOf2_64(NumBits)) 810 NumBytes = 0; 811 } 812 813 if (NumBytes != (1ULL << Val)) 814 break; 815 816 Addr.setShift(Val); 817 Addr.setExtendType(AArch64_AM::LSL); 818 819 const Value *Src = LHS; 820 if (const auto *I = dyn_cast<Instruction>(Src)) { 821 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 822 // Fold the zext or sext when it won't become a noop. 823 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 824 if (!isIntExtFree(ZE) && 825 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 826 Addr.setExtendType(AArch64_AM::UXTW); 827 Src = ZE->getOperand(0); 828 } 829 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 830 if (!isIntExtFree(SE) && 831 SE->getOperand(0)->getType()->isIntegerTy(32)) { 832 Addr.setExtendType(AArch64_AM::SXTW); 833 Src = SE->getOperand(0); 834 } 835 } 836 } 837 } 838 839 unsigned Reg = getRegForValue(Src); 840 if (!Reg) 841 return false; 842 Addr.setOffsetReg(Reg); 843 return true; 844 } 845 case Instruction::And: { 846 if (Addr.getOffsetReg()) 847 break; 848 849 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 850 break; 851 852 const Value *LHS = U->getOperand(0); 853 const Value *RHS = U->getOperand(1); 854 855 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 856 if (C->getValue() == 0xffffffff) 857 std::swap(LHS, RHS); 858 859 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 860 if (C->getValue() == 0xffffffff) { 861 Addr.setShift(0); 862 Addr.setExtendType(AArch64_AM::LSL); 863 Addr.setExtendType(AArch64_AM::UXTW); 864 865 unsigned Reg = getRegForValue(LHS); 866 if (!Reg) 867 return false; 868 bool RegIsKill = hasTrivialKill(LHS); 869 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 870 AArch64::sub_32); 871 Addr.setOffsetReg(Reg); 872 return true; 873 } 874 break; 875 } 876 case Instruction::SExt: 877 case Instruction::ZExt: { 878 if (!Addr.getReg() || Addr.getOffsetReg()) 879 break; 880 881 const Value *Src = nullptr; 882 // Fold the zext or sext when it won't become a noop. 883 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 884 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 885 Addr.setExtendType(AArch64_AM::UXTW); 886 Src = ZE->getOperand(0); 887 } 888 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 889 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 890 Addr.setExtendType(AArch64_AM::SXTW); 891 Src = SE->getOperand(0); 892 } 893 } 894 895 if (!Src) 896 break; 897 898 Addr.setShift(0); 899 unsigned Reg = getRegForValue(Src); 900 if (!Reg) 901 return false; 902 Addr.setOffsetReg(Reg); 903 return true; 904 } 905 } // end switch 906 907 if (Addr.isRegBase() && !Addr.getReg()) { 908 unsigned Reg = getRegForValue(Obj); 909 if (!Reg) 910 return false; 911 Addr.setReg(Reg); 912 return true; 913 } 914 915 if (!Addr.getOffsetReg()) { 916 unsigned Reg = getRegForValue(Obj); 917 if (!Reg) 918 return false; 919 Addr.setOffsetReg(Reg); 920 return true; 921 } 922 923 return false; 924 } 925 926 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 927 const User *U = nullptr; 928 unsigned Opcode = Instruction::UserOp1; 929 bool InMBB = true; 930 931 if (const auto *I = dyn_cast<Instruction>(V)) { 932 Opcode = I->getOpcode(); 933 U = I; 934 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 935 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 936 Opcode = C->getOpcode(); 937 U = C; 938 } 939 940 switch (Opcode) { 941 default: break; 942 case Instruction::BitCast: 943 // Look past bitcasts if its operand is in the same BB. 944 if (InMBB) 945 return computeCallAddress(U->getOperand(0), Addr); 946 break; 947 case Instruction::IntToPtr: 948 // Look past no-op inttoptrs if its operand is in the same BB. 949 if (InMBB && 950 TLI.getValueType(DL, U->getOperand(0)->getType()) == 951 TLI.getPointerTy(DL)) 952 return computeCallAddress(U->getOperand(0), Addr); 953 break; 954 case Instruction::PtrToInt: 955 // Look past no-op ptrtoints if its operand is in the same BB. 956 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 957 return computeCallAddress(U->getOperand(0), Addr); 958 break; 959 } 960 961 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 962 Addr.setGlobalValue(GV); 963 return true; 964 } 965 966 // If all else fails, try to materialize the value in a register. 967 if (!Addr.getGlobalValue()) { 968 Addr.setReg(getRegForValue(V)); 969 return Addr.getReg() != 0; 970 } 971 972 return false; 973 } 974 975 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 976 EVT evt = TLI.getValueType(DL, Ty, true); 977 978 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 979 return false; 980 981 // Only handle simple types. 982 if (evt == MVT::Other || !evt.isSimple()) 983 return false; 984 VT = evt.getSimpleVT(); 985 986 // This is a legal type, but it's not something we handle in fast-isel. 987 if (VT == MVT::f128) 988 return false; 989 990 // Handle all other legal types, i.e. a register that will directly hold this 991 // value. 992 return TLI.isTypeLegal(VT); 993 } 994 995 /// Determine if the value type is supported by FastISel. 996 /// 997 /// FastISel for AArch64 can handle more value types than are legal. This adds 998 /// simple value type such as i1, i8, and i16. 999 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 1000 if (Ty->isVectorTy() && !IsVectorAllowed) 1001 return false; 1002 1003 if (isTypeLegal(Ty, VT)) 1004 return true; 1005 1006 // If this is a type than can be sign or zero-extended to a basic operation 1007 // go ahead and accept it now. 1008 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 1009 return true; 1010 1011 return false; 1012 } 1013 1014 bool AArch64FastISel::isValueAvailable(const Value *V) const { 1015 if (!isa<Instruction>(V)) 1016 return true; 1017 1018 const auto *I = cast<Instruction>(V); 1019 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 1020 } 1021 1022 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1023 if (Subtarget->isTargetILP32()) 1024 return false; 1025 1026 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1027 if (!ScaleFactor) 1028 return false; 1029 1030 bool ImmediateOffsetNeedsLowering = false; 1031 bool RegisterOffsetNeedsLowering = false; 1032 int64_t Offset = Addr.getOffset(); 1033 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1034 ImmediateOffsetNeedsLowering = true; 1035 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1036 !isUInt<12>(Offset / ScaleFactor)) 1037 ImmediateOffsetNeedsLowering = true; 1038 1039 // Cannot encode an offset register and an immediate offset in the same 1040 // instruction. Fold the immediate offset into the load/store instruction and 1041 // emit an additional add to take care of the offset register. 1042 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1043 RegisterOffsetNeedsLowering = true; 1044 1045 // Cannot encode zero register as base. 1046 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1047 RegisterOffsetNeedsLowering = true; 1048 1049 // If this is a stack pointer and the offset needs to be simplified then put 1050 // the alloca address into a register, set the base type back to register and 1051 // continue. This should almost never happen. 1052 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1053 { 1054 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1055 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 1056 ResultReg) 1057 .addFrameIndex(Addr.getFI()) 1058 .addImm(0) 1059 .addImm(0); 1060 Addr.setKind(Address::RegBase); 1061 Addr.setReg(ResultReg); 1062 } 1063 1064 if (RegisterOffsetNeedsLowering) { 1065 unsigned ResultReg = 0; 1066 if (Addr.getReg()) { 1067 if (Addr.getExtendType() == AArch64_AM::SXTW || 1068 Addr.getExtendType() == AArch64_AM::UXTW ) 1069 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1070 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1071 /*TODO:IsKill=*/false, Addr.getExtendType(), 1072 Addr.getShift()); 1073 else 1074 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1075 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1076 /*TODO:IsKill=*/false, AArch64_AM::LSL, 1077 Addr.getShift()); 1078 } else { 1079 if (Addr.getExtendType() == AArch64_AM::UXTW) 1080 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1081 /*Op0IsKill=*/false, Addr.getShift(), 1082 /*IsZExt=*/true); 1083 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1084 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1085 /*Op0IsKill=*/false, Addr.getShift(), 1086 /*IsZExt=*/false); 1087 else 1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1089 /*Op0IsKill=*/false, Addr.getShift()); 1090 } 1091 if (!ResultReg) 1092 return false; 1093 1094 Addr.setReg(ResultReg); 1095 Addr.setOffsetReg(0); 1096 Addr.setShift(0); 1097 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1098 } 1099 1100 // Since the offset is too large for the load/store instruction get the 1101 // reg+offset into a register. 1102 if (ImmediateOffsetNeedsLowering) { 1103 unsigned ResultReg; 1104 if (Addr.getReg()) 1105 // Try to fold the immediate into the add instruction. 1106 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); 1107 else 1108 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1109 1110 if (!ResultReg) 1111 return false; 1112 Addr.setReg(ResultReg); 1113 Addr.setOffset(0); 1114 } 1115 return true; 1116 } 1117 1118 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1119 const MachineInstrBuilder &MIB, 1120 MachineMemOperand::Flags Flags, 1121 unsigned ScaleFactor, 1122 MachineMemOperand *MMO) { 1123 int64_t Offset = Addr.getOffset() / ScaleFactor; 1124 // Frame base works a bit differently. Handle it separately. 1125 if (Addr.isFIBase()) { 1126 int FI = Addr.getFI(); 1127 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1128 // and alignment should be based on the VT. 1129 MMO = FuncInfo.MF->getMachineMemOperand( 1130 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1131 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1132 // Now add the rest of the operands. 1133 MIB.addFrameIndex(FI).addImm(Offset); 1134 } else { 1135 assert(Addr.isRegBase() && "Unexpected address kind."); 1136 const MCInstrDesc &II = MIB->getDesc(); 1137 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1138 Addr.setReg( 1139 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1140 Addr.setOffsetReg( 1141 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1142 if (Addr.getOffsetReg()) { 1143 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1144 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1145 Addr.getExtendType() == AArch64_AM::SXTX; 1146 MIB.addReg(Addr.getReg()); 1147 MIB.addReg(Addr.getOffsetReg()); 1148 MIB.addImm(IsSigned); 1149 MIB.addImm(Addr.getShift() != 0); 1150 } else 1151 MIB.addReg(Addr.getReg()).addImm(Offset); 1152 } 1153 1154 if (MMO) 1155 MIB.addMemOperand(MMO); 1156 } 1157 1158 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1159 const Value *RHS, bool SetFlags, 1160 bool WantResult, bool IsZExt) { 1161 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1162 bool NeedExtend = false; 1163 switch (RetVT.SimpleTy) { 1164 default: 1165 return 0; 1166 case MVT::i1: 1167 NeedExtend = true; 1168 break; 1169 case MVT::i8: 1170 NeedExtend = true; 1171 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1172 break; 1173 case MVT::i16: 1174 NeedExtend = true; 1175 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1176 break; 1177 case MVT::i32: // fall-through 1178 case MVT::i64: 1179 break; 1180 } 1181 MVT SrcVT = RetVT; 1182 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1183 1184 // Canonicalize immediates to the RHS first. 1185 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1186 std::swap(LHS, RHS); 1187 1188 // Canonicalize mul by power of 2 to the RHS. 1189 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1190 if (isMulPowOf2(LHS)) 1191 std::swap(LHS, RHS); 1192 1193 // Canonicalize shift immediate to the RHS. 1194 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1195 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1196 if (isa<ConstantInt>(SI->getOperand(1))) 1197 if (SI->getOpcode() == Instruction::Shl || 1198 SI->getOpcode() == Instruction::LShr || 1199 SI->getOpcode() == Instruction::AShr ) 1200 std::swap(LHS, RHS); 1201 1202 unsigned LHSReg = getRegForValue(LHS); 1203 if (!LHSReg) 1204 return 0; 1205 bool LHSIsKill = hasTrivialKill(LHS); 1206 1207 if (NeedExtend) 1208 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1209 1210 unsigned ResultReg = 0; 1211 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1212 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1213 if (C->isNegative()) 1214 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, 1215 SetFlags, WantResult); 1216 else 1217 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, 1218 WantResult); 1219 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1220 if (C->isNullValue()) 1221 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, 1222 WantResult); 1223 1224 if (ResultReg) 1225 return ResultReg; 1226 1227 // Only extend the RHS within the instruction if there is a valid extend type. 1228 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1229 isValueAvailable(RHS)) { 1230 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1231 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1232 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1233 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1234 if (!RHSReg) 1235 return 0; 1236 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1237 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1238 RHSIsKill, ExtendType, C->getZExtValue(), 1239 SetFlags, WantResult); 1240 } 1241 unsigned RHSReg = getRegForValue(RHS); 1242 if (!RHSReg) 1243 return 0; 1244 bool RHSIsKill = hasTrivialKill(RHS); 1245 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1246 ExtendType, 0, SetFlags, WantResult); 1247 } 1248 1249 // Check if the mul can be folded into the instruction. 1250 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1251 if (isMulPowOf2(RHS)) { 1252 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1253 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1254 1255 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1256 if (C->getValue().isPowerOf2()) 1257 std::swap(MulLHS, MulRHS); 1258 1259 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1260 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1261 unsigned RHSReg = getRegForValue(MulLHS); 1262 if (!RHSReg) 1263 return 0; 1264 bool RHSIsKill = hasTrivialKill(MulLHS); 1265 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1266 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, 1267 WantResult); 1268 if (ResultReg) 1269 return ResultReg; 1270 } 1271 } 1272 1273 // Check if the shift can be folded into the instruction. 1274 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1275 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1276 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1277 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1278 switch (SI->getOpcode()) { 1279 default: break; 1280 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1281 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1282 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1283 } 1284 uint64_t ShiftVal = C->getZExtValue(); 1285 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1286 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1287 if (!RHSReg) 1288 return 0; 1289 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1290 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1291 RHSIsKill, ShiftType, ShiftVal, SetFlags, 1292 WantResult); 1293 if (ResultReg) 1294 return ResultReg; 1295 } 1296 } 1297 } 1298 } 1299 1300 unsigned RHSReg = getRegForValue(RHS); 1301 if (!RHSReg) 1302 return 0; 1303 bool RHSIsKill = hasTrivialKill(RHS); 1304 1305 if (NeedExtend) 1306 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1307 1308 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1309 SetFlags, WantResult); 1310 } 1311 1312 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1313 bool LHSIsKill, unsigned RHSReg, 1314 bool RHSIsKill, bool SetFlags, 1315 bool WantResult) { 1316 assert(LHSReg && RHSReg && "Invalid register number."); 1317 1318 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1319 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1320 return 0; 1321 1322 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1323 return 0; 1324 1325 static const unsigned OpcTable[2][2][2] = { 1326 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1327 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1328 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1329 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1330 }; 1331 bool Is64Bit = RetVT == MVT::i64; 1332 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1333 const TargetRegisterClass *RC = 1334 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1335 unsigned ResultReg; 1336 if (WantResult) 1337 ResultReg = createResultReg(RC); 1338 else 1339 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1340 1341 const MCInstrDesc &II = TII.get(Opc); 1342 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1343 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1344 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1345 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1346 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1347 return ResultReg; 1348 } 1349 1350 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1351 bool LHSIsKill, uint64_t Imm, 1352 bool SetFlags, bool WantResult) { 1353 assert(LHSReg && "Invalid register number."); 1354 1355 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1356 return 0; 1357 1358 unsigned ShiftImm; 1359 if (isUInt<12>(Imm)) 1360 ShiftImm = 0; 1361 else if ((Imm & 0xfff000) == Imm) { 1362 ShiftImm = 12; 1363 Imm >>= 12; 1364 } else 1365 return 0; 1366 1367 static const unsigned OpcTable[2][2][2] = { 1368 { { AArch64::SUBWri, AArch64::SUBXri }, 1369 { AArch64::ADDWri, AArch64::ADDXri } }, 1370 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1371 { AArch64::ADDSWri, AArch64::ADDSXri } } 1372 }; 1373 bool Is64Bit = RetVT == MVT::i64; 1374 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1375 const TargetRegisterClass *RC; 1376 if (SetFlags) 1377 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1378 else 1379 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1380 unsigned ResultReg; 1381 if (WantResult) 1382 ResultReg = createResultReg(RC); 1383 else 1384 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1385 1386 const MCInstrDesc &II = TII.get(Opc); 1387 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1388 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1389 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1390 .addImm(Imm) 1391 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1392 return ResultReg; 1393 } 1394 1395 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1396 bool LHSIsKill, unsigned RHSReg, 1397 bool RHSIsKill, 1398 AArch64_AM::ShiftExtendType ShiftType, 1399 uint64_t ShiftImm, bool SetFlags, 1400 bool WantResult) { 1401 assert(LHSReg && RHSReg && "Invalid register number."); 1402 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1403 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1404 1405 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1406 return 0; 1407 1408 // Don't deal with undefined shifts. 1409 if (ShiftImm >= RetVT.getSizeInBits()) 1410 return 0; 1411 1412 static const unsigned OpcTable[2][2][2] = { 1413 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1414 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1415 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1416 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1417 }; 1418 bool Is64Bit = RetVT == MVT::i64; 1419 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1420 const TargetRegisterClass *RC = 1421 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1422 unsigned ResultReg; 1423 if (WantResult) 1424 ResultReg = createResultReg(RC); 1425 else 1426 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1427 1428 const MCInstrDesc &II = TII.get(Opc); 1429 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1430 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1432 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1433 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1434 .addImm(getShifterImm(ShiftType, ShiftImm)); 1435 return ResultReg; 1436 } 1437 1438 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1439 bool LHSIsKill, unsigned RHSReg, 1440 bool RHSIsKill, 1441 AArch64_AM::ShiftExtendType ExtType, 1442 uint64_t ShiftImm, bool SetFlags, 1443 bool WantResult) { 1444 assert(LHSReg && RHSReg && "Invalid register number."); 1445 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1446 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1447 1448 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1449 return 0; 1450 1451 if (ShiftImm >= 4) 1452 return 0; 1453 1454 static const unsigned OpcTable[2][2][2] = { 1455 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1456 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1457 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1458 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1459 }; 1460 bool Is64Bit = RetVT == MVT::i64; 1461 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1462 const TargetRegisterClass *RC = nullptr; 1463 if (SetFlags) 1464 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1465 else 1466 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1467 unsigned ResultReg; 1468 if (WantResult) 1469 ResultReg = createResultReg(RC); 1470 else 1471 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1472 1473 const MCInstrDesc &II = TII.get(Opc); 1474 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1475 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1477 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1478 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1479 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1480 return ResultReg; 1481 } 1482 1483 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1484 Type *Ty = LHS->getType(); 1485 EVT EVT = TLI.getValueType(DL, Ty, true); 1486 if (!EVT.isSimple()) 1487 return false; 1488 MVT VT = EVT.getSimpleVT(); 1489 1490 switch (VT.SimpleTy) { 1491 default: 1492 return false; 1493 case MVT::i1: 1494 case MVT::i8: 1495 case MVT::i16: 1496 case MVT::i32: 1497 case MVT::i64: 1498 return emitICmp(VT, LHS, RHS, IsZExt); 1499 case MVT::f32: 1500 case MVT::f64: 1501 return emitFCmp(VT, LHS, RHS); 1502 } 1503 } 1504 1505 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1506 bool IsZExt) { 1507 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1508 IsZExt) != 0; 1509 } 1510 1511 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1512 uint64_t Imm) { 1513 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, 1514 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1515 } 1516 1517 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1518 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1519 return false; 1520 1521 // Check to see if the 2nd operand is a constant that we can encode directly 1522 // in the compare. 1523 bool UseImm = false; 1524 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1525 if (CFP->isZero() && !CFP->isNegative()) 1526 UseImm = true; 1527 1528 unsigned LHSReg = getRegForValue(LHS); 1529 if (!LHSReg) 1530 return false; 1531 bool LHSIsKill = hasTrivialKill(LHS); 1532 1533 if (UseImm) { 1534 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1535 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1536 .addReg(LHSReg, getKillRegState(LHSIsKill)); 1537 return true; 1538 } 1539 1540 unsigned RHSReg = getRegForValue(RHS); 1541 if (!RHSReg) 1542 return false; 1543 bool RHSIsKill = hasTrivialKill(RHS); 1544 1545 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1546 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1547 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1548 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1549 return true; 1550 } 1551 1552 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1553 bool SetFlags, bool WantResult, bool IsZExt) { 1554 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1555 IsZExt); 1556 } 1557 1558 /// This method is a wrapper to simplify add emission. 1559 /// 1560 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1561 /// that fails, then try to materialize the immediate into a register and use 1562 /// emitAddSub_rr instead. 1563 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, 1564 int64_t Imm) { 1565 unsigned ResultReg; 1566 if (Imm < 0) 1567 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); 1568 else 1569 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); 1570 1571 if (ResultReg) 1572 return ResultReg; 1573 1574 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1575 if (!CReg) 1576 return 0; 1577 1578 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); 1579 return ResultReg; 1580 } 1581 1582 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1583 bool SetFlags, bool WantResult, bool IsZExt) { 1584 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1585 IsZExt); 1586 } 1587 1588 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1589 bool LHSIsKill, unsigned RHSReg, 1590 bool RHSIsKill, bool WantResult) { 1591 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1592 RHSIsKill, /*SetFlags=*/true, WantResult); 1593 } 1594 1595 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1596 bool LHSIsKill, unsigned RHSReg, 1597 bool RHSIsKill, 1598 AArch64_AM::ShiftExtendType ShiftType, 1599 uint64_t ShiftImm, bool WantResult) { 1600 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1601 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, 1602 WantResult); 1603 } 1604 1605 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1606 const Value *LHS, const Value *RHS) { 1607 // Canonicalize immediates to the RHS first. 1608 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1609 std::swap(LHS, RHS); 1610 1611 // Canonicalize mul by power-of-2 to the RHS. 1612 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1613 if (isMulPowOf2(LHS)) 1614 std::swap(LHS, RHS); 1615 1616 // Canonicalize shift immediate to the RHS. 1617 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1618 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1619 if (isa<ConstantInt>(SI->getOperand(1))) 1620 std::swap(LHS, RHS); 1621 1622 unsigned LHSReg = getRegForValue(LHS); 1623 if (!LHSReg) 1624 return 0; 1625 bool LHSIsKill = hasTrivialKill(LHS); 1626 1627 unsigned ResultReg = 0; 1628 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1629 uint64_t Imm = C->getZExtValue(); 1630 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); 1631 } 1632 if (ResultReg) 1633 return ResultReg; 1634 1635 // Check if the mul can be folded into the instruction. 1636 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1637 if (isMulPowOf2(RHS)) { 1638 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1639 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1640 1641 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1642 if (C->getValue().isPowerOf2()) 1643 std::swap(MulLHS, MulRHS); 1644 1645 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1646 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1647 1648 unsigned RHSReg = getRegForValue(MulLHS); 1649 if (!RHSReg) 1650 return 0; 1651 bool RHSIsKill = hasTrivialKill(MulLHS); 1652 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1653 RHSIsKill, ShiftVal); 1654 if (ResultReg) 1655 return ResultReg; 1656 } 1657 } 1658 1659 // Check if the shift can be folded into the instruction. 1660 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1661 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1662 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1663 uint64_t ShiftVal = C->getZExtValue(); 1664 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1665 if (!RHSReg) 1666 return 0; 1667 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1668 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1669 RHSIsKill, ShiftVal); 1670 if (ResultReg) 1671 return ResultReg; 1672 } 1673 } 1674 1675 unsigned RHSReg = getRegForValue(RHS); 1676 if (!RHSReg) 1677 return 0; 1678 bool RHSIsKill = hasTrivialKill(RHS); 1679 1680 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1681 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 1682 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1683 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1684 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1685 } 1686 return ResultReg; 1687 } 1688 1689 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1690 unsigned LHSReg, bool LHSIsKill, 1691 uint64_t Imm) { 1692 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1693 "ISD nodes are not consecutive!"); 1694 static const unsigned OpcTable[3][2] = { 1695 { AArch64::ANDWri, AArch64::ANDXri }, 1696 { AArch64::ORRWri, AArch64::ORRXri }, 1697 { AArch64::EORWri, AArch64::EORXri } 1698 }; 1699 const TargetRegisterClass *RC; 1700 unsigned Opc; 1701 unsigned RegSize; 1702 switch (RetVT.SimpleTy) { 1703 default: 1704 return 0; 1705 case MVT::i1: 1706 case MVT::i8: 1707 case MVT::i16: 1708 case MVT::i32: { 1709 unsigned Idx = ISDOpc - ISD::AND; 1710 Opc = OpcTable[Idx][0]; 1711 RC = &AArch64::GPR32spRegClass; 1712 RegSize = 32; 1713 break; 1714 } 1715 case MVT::i64: 1716 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1717 RC = &AArch64::GPR64spRegClass; 1718 RegSize = 64; 1719 break; 1720 } 1721 1722 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1723 return 0; 1724 1725 unsigned ResultReg = 1726 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, 1727 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1728 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1729 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1730 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1731 } 1732 return ResultReg; 1733 } 1734 1735 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1736 unsigned LHSReg, bool LHSIsKill, 1737 unsigned RHSReg, bool RHSIsKill, 1738 uint64_t ShiftImm) { 1739 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1740 "ISD nodes are not consecutive!"); 1741 static const unsigned OpcTable[3][2] = { 1742 { AArch64::ANDWrs, AArch64::ANDXrs }, 1743 { AArch64::ORRWrs, AArch64::ORRXrs }, 1744 { AArch64::EORWrs, AArch64::EORXrs } 1745 }; 1746 1747 // Don't deal with undefined shifts. 1748 if (ShiftImm >= RetVT.getSizeInBits()) 1749 return 0; 1750 1751 const TargetRegisterClass *RC; 1752 unsigned Opc; 1753 switch (RetVT.SimpleTy) { 1754 default: 1755 return 0; 1756 case MVT::i1: 1757 case MVT::i8: 1758 case MVT::i16: 1759 case MVT::i32: 1760 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1761 RC = &AArch64::GPR32RegClass; 1762 break; 1763 case MVT::i64: 1764 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1765 RC = &AArch64::GPR64RegClass; 1766 break; 1767 } 1768 unsigned ResultReg = 1769 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1770 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1771 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1772 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1773 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1774 } 1775 return ResultReg; 1776 } 1777 1778 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1779 uint64_t Imm) { 1780 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); 1781 } 1782 1783 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1784 bool WantZExt, MachineMemOperand *MMO) { 1785 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1786 return 0; 1787 1788 // Simplify this down to something we can handle. 1789 if (!simplifyAddress(Addr, VT)) 1790 return 0; 1791 1792 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1793 if (!ScaleFactor) 1794 llvm_unreachable("Unexpected value type."); 1795 1796 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1797 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1798 bool UseScaled = true; 1799 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1800 UseScaled = false; 1801 ScaleFactor = 1; 1802 } 1803 1804 static const unsigned GPOpcTable[2][8][4] = { 1805 // Sign-extend. 1806 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1807 AArch64::LDURXi }, 1808 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1809 AArch64::LDURXi }, 1810 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1811 AArch64::LDRXui }, 1812 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1813 AArch64::LDRXui }, 1814 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1815 AArch64::LDRXroX }, 1816 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1817 AArch64::LDRXroX }, 1818 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1819 AArch64::LDRXroW }, 1820 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1821 AArch64::LDRXroW } 1822 }, 1823 // Zero-extend. 1824 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1825 AArch64::LDURXi }, 1826 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1827 AArch64::LDURXi }, 1828 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1829 AArch64::LDRXui }, 1830 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1831 AArch64::LDRXui }, 1832 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1833 AArch64::LDRXroX }, 1834 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1835 AArch64::LDRXroX }, 1836 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1837 AArch64::LDRXroW }, 1838 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1839 AArch64::LDRXroW } 1840 } 1841 }; 1842 1843 static const unsigned FPOpcTable[4][2] = { 1844 { AArch64::LDURSi, AArch64::LDURDi }, 1845 { AArch64::LDRSui, AArch64::LDRDui }, 1846 { AArch64::LDRSroX, AArch64::LDRDroX }, 1847 { AArch64::LDRSroW, AArch64::LDRDroW } 1848 }; 1849 1850 unsigned Opc; 1851 const TargetRegisterClass *RC; 1852 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1853 Addr.getOffsetReg(); 1854 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1855 if (Addr.getExtendType() == AArch64_AM::UXTW || 1856 Addr.getExtendType() == AArch64_AM::SXTW) 1857 Idx++; 1858 1859 bool IsRet64Bit = RetVT == MVT::i64; 1860 switch (VT.SimpleTy) { 1861 default: 1862 llvm_unreachable("Unexpected value type."); 1863 case MVT::i1: // Intentional fall-through. 1864 case MVT::i8: 1865 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1866 RC = (IsRet64Bit && !WantZExt) ? 1867 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1868 break; 1869 case MVT::i16: 1870 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1871 RC = (IsRet64Bit && !WantZExt) ? 1872 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1873 break; 1874 case MVT::i32: 1875 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1876 RC = (IsRet64Bit && !WantZExt) ? 1877 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1878 break; 1879 case MVT::i64: 1880 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1881 RC = &AArch64::GPR64RegClass; 1882 break; 1883 case MVT::f32: 1884 Opc = FPOpcTable[Idx][0]; 1885 RC = &AArch64::FPR32RegClass; 1886 break; 1887 case MVT::f64: 1888 Opc = FPOpcTable[Idx][1]; 1889 RC = &AArch64::FPR64RegClass; 1890 break; 1891 } 1892 1893 // Create the base instruction, then add the operands. 1894 unsigned ResultReg = createResultReg(RC); 1895 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1896 TII.get(Opc), ResultReg); 1897 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1898 1899 // Loading an i1 requires special handling. 1900 if (VT == MVT::i1) { 1901 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); 1902 assert(ANDReg && "Unexpected AND instruction emission failure."); 1903 ResultReg = ANDReg; 1904 } 1905 1906 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1907 // the 32bit reg to a 64bit reg. 1908 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1909 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 1910 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1911 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1912 .addImm(0) 1913 .addReg(ResultReg, getKillRegState(true)) 1914 .addImm(AArch64::sub_32); 1915 ResultReg = Reg64; 1916 } 1917 return ResultReg; 1918 } 1919 1920 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1921 MVT VT; 1922 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1923 return false; 1924 1925 if (VT.isVector()) 1926 return selectOperator(I, I->getOpcode()); 1927 1928 unsigned ResultReg; 1929 switch (I->getOpcode()) { 1930 default: 1931 llvm_unreachable("Unexpected instruction."); 1932 case Instruction::Add: 1933 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1934 break; 1935 case Instruction::Sub: 1936 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1937 break; 1938 } 1939 if (!ResultReg) 1940 return false; 1941 1942 updateValueMap(I, ResultReg); 1943 return true; 1944 } 1945 1946 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1947 MVT VT; 1948 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1949 return false; 1950 1951 if (VT.isVector()) 1952 return selectOperator(I, I->getOpcode()); 1953 1954 unsigned ResultReg; 1955 switch (I->getOpcode()) { 1956 default: 1957 llvm_unreachable("Unexpected instruction."); 1958 case Instruction::And: 1959 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1960 break; 1961 case Instruction::Or: 1962 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1963 break; 1964 case Instruction::Xor: 1965 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1966 break; 1967 } 1968 if (!ResultReg) 1969 return false; 1970 1971 updateValueMap(I, ResultReg); 1972 return true; 1973 } 1974 1975 bool AArch64FastISel::selectLoad(const Instruction *I) { 1976 MVT VT; 1977 // Verify we have a legal type before going any further. Currently, we handle 1978 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1979 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1980 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1981 cast<LoadInst>(I)->isAtomic()) 1982 return false; 1983 1984 const Value *SV = I->getOperand(0); 1985 if (TLI.supportSwiftError()) { 1986 // Swifterror values can come from either a function parameter with 1987 // swifterror attribute or an alloca with swifterror attribute. 1988 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1989 if (Arg->hasSwiftErrorAttr()) 1990 return false; 1991 } 1992 1993 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1994 if (Alloca->isSwiftError()) 1995 return false; 1996 } 1997 } 1998 1999 // See if we can handle this address. 2000 Address Addr; 2001 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 2002 return false; 2003 2004 // Fold the following sign-/zero-extend into the load instruction. 2005 bool WantZExt = true; 2006 MVT RetVT = VT; 2007 const Value *IntExtVal = nullptr; 2008 if (I->hasOneUse()) { 2009 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 2010 if (isTypeSupported(ZE->getType(), RetVT)) 2011 IntExtVal = ZE; 2012 else 2013 RetVT = VT; 2014 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 2015 if (isTypeSupported(SE->getType(), RetVT)) 2016 IntExtVal = SE; 2017 else 2018 RetVT = VT; 2019 WantZExt = false; 2020 } 2021 } 2022 2023 unsigned ResultReg = 2024 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 2025 if (!ResultReg) 2026 return false; 2027 2028 // There are a few different cases we have to handle, because the load or the 2029 // sign-/zero-extend might not be selected by FastISel if we fall-back to 2030 // SelectionDAG. There is also an ordering issue when both instructions are in 2031 // different basic blocks. 2032 // 1.) The load instruction is selected by FastISel, but the integer extend 2033 // not. This usually happens when the integer extend is in a different 2034 // basic block and SelectionDAG took over for that basic block. 2035 // 2.) The load instruction is selected before the integer extend. This only 2036 // happens when the integer extend is in a different basic block. 2037 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2038 // by FastISel. This happens if there are instructions between the load 2039 // and the integer extend that couldn't be selected by FastISel. 2040 if (IntExtVal) { 2041 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2042 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2043 // it when it selects the integer extend. 2044 unsigned Reg = lookUpRegForValue(IntExtVal); 2045 auto *MI = MRI.getUniqueVRegDef(Reg); 2046 if (!MI) { 2047 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2048 if (WantZExt) { 2049 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2050 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2051 ResultReg = std::prev(I)->getOperand(0).getReg(); 2052 removeDeadCode(I, std::next(I)); 2053 } else 2054 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2055 /*IsKill=*/true, 2056 AArch64::sub_32); 2057 } 2058 updateValueMap(I, ResultReg); 2059 return true; 2060 } 2061 2062 // The integer extend has already been emitted - delete all the instructions 2063 // that have been emitted by the integer extend lowering code and use the 2064 // result from the load instruction directly. 2065 while (MI) { 2066 Reg = 0; 2067 for (auto &Opnd : MI->uses()) { 2068 if (Opnd.isReg()) { 2069 Reg = Opnd.getReg(); 2070 break; 2071 } 2072 } 2073 MachineBasicBlock::iterator I(MI); 2074 removeDeadCode(I, std::next(I)); 2075 MI = nullptr; 2076 if (Reg) 2077 MI = MRI.getUniqueVRegDef(Reg); 2078 } 2079 updateValueMap(IntExtVal, ResultReg); 2080 return true; 2081 } 2082 2083 updateValueMap(I, ResultReg); 2084 return true; 2085 } 2086 2087 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2088 unsigned AddrReg, 2089 MachineMemOperand *MMO) { 2090 unsigned Opc; 2091 switch (VT.SimpleTy) { 2092 default: return false; 2093 case MVT::i8: Opc = AArch64::STLRB; break; 2094 case MVT::i16: Opc = AArch64::STLRH; break; 2095 case MVT::i32: Opc = AArch64::STLRW; break; 2096 case MVT::i64: Opc = AArch64::STLRX; break; 2097 } 2098 2099 const MCInstrDesc &II = TII.get(Opc); 2100 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2101 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2102 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2103 .addReg(SrcReg) 2104 .addReg(AddrReg) 2105 .addMemOperand(MMO); 2106 return true; 2107 } 2108 2109 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2110 MachineMemOperand *MMO) { 2111 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2112 return false; 2113 2114 // Simplify this down to something we can handle. 2115 if (!simplifyAddress(Addr, VT)) 2116 return false; 2117 2118 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2119 if (!ScaleFactor) 2120 llvm_unreachable("Unexpected value type."); 2121 2122 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2123 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2124 bool UseScaled = true; 2125 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2126 UseScaled = false; 2127 ScaleFactor = 1; 2128 } 2129 2130 static const unsigned OpcTable[4][6] = { 2131 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2132 AArch64::STURSi, AArch64::STURDi }, 2133 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2134 AArch64::STRSui, AArch64::STRDui }, 2135 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2136 AArch64::STRSroX, AArch64::STRDroX }, 2137 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2138 AArch64::STRSroW, AArch64::STRDroW } 2139 }; 2140 2141 unsigned Opc; 2142 bool VTIsi1 = false; 2143 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2144 Addr.getOffsetReg(); 2145 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2146 if (Addr.getExtendType() == AArch64_AM::UXTW || 2147 Addr.getExtendType() == AArch64_AM::SXTW) 2148 Idx++; 2149 2150 switch (VT.SimpleTy) { 2151 default: llvm_unreachable("Unexpected value type."); 2152 case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; 2153 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2154 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2155 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2156 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2157 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2158 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2159 } 2160 2161 // Storing an i1 requires special handling. 2162 if (VTIsi1 && SrcReg != AArch64::WZR) { 2163 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 2164 assert(ANDReg && "Unexpected AND instruction emission failure."); 2165 SrcReg = ANDReg; 2166 } 2167 // Create the base instruction, then add the operands. 2168 const MCInstrDesc &II = TII.get(Opc); 2169 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2170 MachineInstrBuilder MIB = 2171 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 2172 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2173 2174 return true; 2175 } 2176 2177 bool AArch64FastISel::selectStore(const Instruction *I) { 2178 MVT VT; 2179 const Value *Op0 = I->getOperand(0); 2180 // Verify we have a legal type before going any further. Currently, we handle 2181 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2182 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2183 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2184 return false; 2185 2186 const Value *PtrV = I->getOperand(1); 2187 if (TLI.supportSwiftError()) { 2188 // Swifterror values can come from either a function parameter with 2189 // swifterror attribute or an alloca with swifterror attribute. 2190 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2191 if (Arg->hasSwiftErrorAttr()) 2192 return false; 2193 } 2194 2195 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2196 if (Alloca->isSwiftError()) 2197 return false; 2198 } 2199 } 2200 2201 // Get the value to be stored into a register. Use the zero register directly 2202 // when possible to avoid an unnecessary copy and a wasted register. 2203 unsigned SrcReg = 0; 2204 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2205 if (CI->isZero()) 2206 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2207 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2208 if (CF->isZero() && !CF->isNegative()) { 2209 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2210 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2211 } 2212 } 2213 2214 if (!SrcReg) 2215 SrcReg = getRegForValue(Op0); 2216 2217 if (!SrcReg) 2218 return false; 2219 2220 auto *SI = cast<StoreInst>(I); 2221 2222 // Try to emit a STLR for seq_cst/release. 2223 if (SI->isAtomic()) { 2224 AtomicOrdering Ord = SI->getOrdering(); 2225 // The non-atomic instructions are sufficient for relaxed stores. 2226 if (isReleaseOrStronger(Ord)) { 2227 // The STLR addressing mode only supports a base reg; pass that directly. 2228 unsigned AddrReg = getRegForValue(PtrV); 2229 return emitStoreRelease(VT, SrcReg, AddrReg, 2230 createMachineMemOperandFor(I)); 2231 } 2232 } 2233 2234 // See if we can handle this address. 2235 Address Addr; 2236 if (!computeAddress(PtrV, Addr, Op0->getType())) 2237 return false; 2238 2239 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2240 return false; 2241 return true; 2242 } 2243 2244 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2245 switch (Pred) { 2246 case CmpInst::FCMP_ONE: 2247 case CmpInst::FCMP_UEQ: 2248 default: 2249 // AL is our "false" for now. The other two need more compares. 2250 return AArch64CC::AL; 2251 case CmpInst::ICMP_EQ: 2252 case CmpInst::FCMP_OEQ: 2253 return AArch64CC::EQ; 2254 case CmpInst::ICMP_SGT: 2255 case CmpInst::FCMP_OGT: 2256 return AArch64CC::GT; 2257 case CmpInst::ICMP_SGE: 2258 case CmpInst::FCMP_OGE: 2259 return AArch64CC::GE; 2260 case CmpInst::ICMP_UGT: 2261 case CmpInst::FCMP_UGT: 2262 return AArch64CC::HI; 2263 case CmpInst::FCMP_OLT: 2264 return AArch64CC::MI; 2265 case CmpInst::ICMP_ULE: 2266 case CmpInst::FCMP_OLE: 2267 return AArch64CC::LS; 2268 case CmpInst::FCMP_ORD: 2269 return AArch64CC::VC; 2270 case CmpInst::FCMP_UNO: 2271 return AArch64CC::VS; 2272 case CmpInst::FCMP_UGE: 2273 return AArch64CC::PL; 2274 case CmpInst::ICMP_SLT: 2275 case CmpInst::FCMP_ULT: 2276 return AArch64CC::LT; 2277 case CmpInst::ICMP_SLE: 2278 case CmpInst::FCMP_ULE: 2279 return AArch64CC::LE; 2280 case CmpInst::FCMP_UNE: 2281 case CmpInst::ICMP_NE: 2282 return AArch64CC::NE; 2283 case CmpInst::ICMP_UGE: 2284 return AArch64CC::HS; 2285 case CmpInst::ICMP_ULT: 2286 return AArch64CC::LO; 2287 } 2288 } 2289 2290 /// Try to emit a combined compare-and-branch instruction. 2291 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2292 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2293 // will not be produced, as they are conditional branch instructions that do 2294 // not set flags. 2295 if (FuncInfo.MF->getFunction().hasFnAttribute( 2296 Attribute::SpeculativeLoadHardening)) 2297 return false; 2298 2299 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2300 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2301 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2302 2303 const Value *LHS = CI->getOperand(0); 2304 const Value *RHS = CI->getOperand(1); 2305 2306 MVT VT; 2307 if (!isTypeSupported(LHS->getType(), VT)) 2308 return false; 2309 2310 unsigned BW = VT.getSizeInBits(); 2311 if (BW > 64) 2312 return false; 2313 2314 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2315 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2316 2317 // Try to take advantage of fallthrough opportunities. 2318 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2319 std::swap(TBB, FBB); 2320 Predicate = CmpInst::getInversePredicate(Predicate); 2321 } 2322 2323 int TestBit = -1; 2324 bool IsCmpNE; 2325 switch (Predicate) { 2326 default: 2327 return false; 2328 case CmpInst::ICMP_EQ: 2329 case CmpInst::ICMP_NE: 2330 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2331 std::swap(LHS, RHS); 2332 2333 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2334 return false; 2335 2336 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2337 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2338 const Value *AndLHS = AI->getOperand(0); 2339 const Value *AndRHS = AI->getOperand(1); 2340 2341 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2342 if (C->getValue().isPowerOf2()) 2343 std::swap(AndLHS, AndRHS); 2344 2345 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2346 if (C->getValue().isPowerOf2()) { 2347 TestBit = C->getValue().logBase2(); 2348 LHS = AndLHS; 2349 } 2350 } 2351 2352 if (VT == MVT::i1) 2353 TestBit = 0; 2354 2355 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2356 break; 2357 case CmpInst::ICMP_SLT: 2358 case CmpInst::ICMP_SGE: 2359 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2360 return false; 2361 2362 TestBit = BW - 1; 2363 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2364 break; 2365 case CmpInst::ICMP_SGT: 2366 case CmpInst::ICMP_SLE: 2367 if (!isa<ConstantInt>(RHS)) 2368 return false; 2369 2370 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2371 return false; 2372 2373 TestBit = BW - 1; 2374 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2375 break; 2376 } // end switch 2377 2378 static const unsigned OpcTable[2][2][2] = { 2379 { {AArch64::CBZW, AArch64::CBZX }, 2380 {AArch64::CBNZW, AArch64::CBNZX} }, 2381 { {AArch64::TBZW, AArch64::TBZX }, 2382 {AArch64::TBNZW, AArch64::TBNZX} } 2383 }; 2384 2385 bool IsBitTest = TestBit != -1; 2386 bool Is64Bit = BW == 64; 2387 if (TestBit < 32 && TestBit >= 0) 2388 Is64Bit = false; 2389 2390 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2391 const MCInstrDesc &II = TII.get(Opc); 2392 2393 unsigned SrcReg = getRegForValue(LHS); 2394 if (!SrcReg) 2395 return false; 2396 bool SrcIsKill = hasTrivialKill(LHS); 2397 2398 if (BW == 64 && !Is64Bit) 2399 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 2400 AArch64::sub_32); 2401 2402 if ((BW < 32) && !IsBitTest) 2403 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2404 2405 // Emit the combined compare and branch instruction. 2406 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2407 MachineInstrBuilder MIB = 2408 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 2409 .addReg(SrcReg, getKillRegState(SrcIsKill)); 2410 if (IsBitTest) 2411 MIB.addImm(TestBit); 2412 MIB.addMBB(TBB); 2413 2414 finishCondBranch(BI->getParent(), TBB, FBB); 2415 return true; 2416 } 2417 2418 bool AArch64FastISel::selectBranch(const Instruction *I) { 2419 const BranchInst *BI = cast<BranchInst>(I); 2420 if (BI->isUnconditional()) { 2421 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2422 fastEmitBranch(MSucc, BI->getDebugLoc()); 2423 return true; 2424 } 2425 2426 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2427 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2428 2429 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2430 if (CI->hasOneUse() && isValueAvailable(CI)) { 2431 // Try to optimize or fold the cmp. 2432 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2433 switch (Predicate) { 2434 default: 2435 break; 2436 case CmpInst::FCMP_FALSE: 2437 fastEmitBranch(FBB, DbgLoc); 2438 return true; 2439 case CmpInst::FCMP_TRUE: 2440 fastEmitBranch(TBB, DbgLoc); 2441 return true; 2442 } 2443 2444 // Try to emit a combined compare-and-branch first. 2445 if (emitCompareAndBranch(BI)) 2446 return true; 2447 2448 // Try to take advantage of fallthrough opportunities. 2449 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2450 std::swap(TBB, FBB); 2451 Predicate = CmpInst::getInversePredicate(Predicate); 2452 } 2453 2454 // Emit the cmp. 2455 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2456 return false; 2457 2458 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2459 // instruction. 2460 AArch64CC::CondCode CC = getCompareCC(Predicate); 2461 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2462 switch (Predicate) { 2463 default: 2464 break; 2465 case CmpInst::FCMP_UEQ: 2466 ExtraCC = AArch64CC::EQ; 2467 CC = AArch64CC::VS; 2468 break; 2469 case CmpInst::FCMP_ONE: 2470 ExtraCC = AArch64CC::MI; 2471 CC = AArch64CC::GT; 2472 break; 2473 } 2474 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2475 2476 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2477 if (ExtraCC != AArch64CC::AL) { 2478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2479 .addImm(ExtraCC) 2480 .addMBB(TBB); 2481 } 2482 2483 // Emit the branch. 2484 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2485 .addImm(CC) 2486 .addMBB(TBB); 2487 2488 finishCondBranch(BI->getParent(), TBB, FBB); 2489 return true; 2490 } 2491 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2492 uint64_t Imm = CI->getZExtValue(); 2493 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2494 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 2495 .addMBB(Target); 2496 2497 // Obtain the branch probability and add the target to the successor list. 2498 if (FuncInfo.BPI) { 2499 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2500 BI->getParent(), Target->getBasicBlock()); 2501 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2502 } else 2503 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2504 return true; 2505 } else { 2506 AArch64CC::CondCode CC = AArch64CC::NE; 2507 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2508 // Fake request the condition, otherwise the intrinsic might be completely 2509 // optimized away. 2510 unsigned CondReg = getRegForValue(BI->getCondition()); 2511 if (!CondReg) 2512 return false; 2513 2514 // Emit the branch. 2515 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2516 .addImm(CC) 2517 .addMBB(TBB); 2518 2519 finishCondBranch(BI->getParent(), TBB, FBB); 2520 return true; 2521 } 2522 } 2523 2524 unsigned CondReg = getRegForValue(BI->getCondition()); 2525 if (CondReg == 0) 2526 return false; 2527 bool CondRegIsKill = hasTrivialKill(BI->getCondition()); 2528 2529 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2530 unsigned Opcode = AArch64::TBNZW; 2531 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2532 std::swap(TBB, FBB); 2533 Opcode = AArch64::TBZW; 2534 } 2535 2536 const MCInstrDesc &II = TII.get(Opcode); 2537 unsigned ConstrainedCondReg 2538 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2539 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2540 .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) 2541 .addImm(0) 2542 .addMBB(TBB); 2543 2544 finishCondBranch(BI->getParent(), TBB, FBB); 2545 return true; 2546 } 2547 2548 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2549 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2550 unsigned AddrReg = getRegForValue(BI->getOperand(0)); 2551 if (AddrReg == 0) 2552 return false; 2553 2554 // Emit the indirect branch. 2555 const MCInstrDesc &II = TII.get(AArch64::BR); 2556 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2557 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 2558 2559 // Make sure the CFG is up-to-date. 2560 for (auto *Succ : BI->successors()) 2561 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2562 2563 return true; 2564 } 2565 2566 bool AArch64FastISel::selectCmp(const Instruction *I) { 2567 const CmpInst *CI = cast<CmpInst>(I); 2568 2569 // Vectors of i1 are weird: bail out. 2570 if (CI->getType()->isVectorTy()) 2571 return false; 2572 2573 // Try to optimize or fold the cmp. 2574 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2575 unsigned ResultReg = 0; 2576 switch (Predicate) { 2577 default: 2578 break; 2579 case CmpInst::FCMP_FALSE: 2580 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2581 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2582 TII.get(TargetOpcode::COPY), ResultReg) 2583 .addReg(AArch64::WZR, getKillRegState(true)); 2584 break; 2585 case CmpInst::FCMP_TRUE: 2586 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2587 break; 2588 } 2589 2590 if (ResultReg) { 2591 updateValueMap(I, ResultReg); 2592 return true; 2593 } 2594 2595 // Emit the cmp. 2596 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2597 return false; 2598 2599 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2600 2601 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2602 // condition codes are inverted, because they are used by CSINC. 2603 static unsigned CondCodeTable[2][2] = { 2604 { AArch64CC::NE, AArch64CC::VC }, 2605 { AArch64CC::PL, AArch64CC::LE } 2606 }; 2607 unsigned *CondCodes = nullptr; 2608 switch (Predicate) { 2609 default: 2610 break; 2611 case CmpInst::FCMP_UEQ: 2612 CondCodes = &CondCodeTable[0][0]; 2613 break; 2614 case CmpInst::FCMP_ONE: 2615 CondCodes = &CondCodeTable[1][0]; 2616 break; 2617 } 2618 2619 if (CondCodes) { 2620 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2621 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2622 TmpReg1) 2623 .addReg(AArch64::WZR, getKillRegState(true)) 2624 .addReg(AArch64::WZR, getKillRegState(true)) 2625 .addImm(CondCodes[0]); 2626 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2627 ResultReg) 2628 .addReg(TmpReg1, getKillRegState(true)) 2629 .addReg(AArch64::WZR, getKillRegState(true)) 2630 .addImm(CondCodes[1]); 2631 2632 updateValueMap(I, ResultReg); 2633 return true; 2634 } 2635 2636 // Now set a register based on the comparison. 2637 AArch64CC::CondCode CC = getCompareCC(Predicate); 2638 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2639 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2640 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2641 ResultReg) 2642 .addReg(AArch64::WZR, getKillRegState(true)) 2643 .addReg(AArch64::WZR, getKillRegState(true)) 2644 .addImm(invertedCC); 2645 2646 updateValueMap(I, ResultReg); 2647 return true; 2648 } 2649 2650 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2651 /// value. 2652 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2653 if (!SI->getType()->isIntegerTy(1)) 2654 return false; 2655 2656 const Value *Src1Val, *Src2Val; 2657 unsigned Opc = 0; 2658 bool NeedExtraOp = false; 2659 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2660 if (CI->isOne()) { 2661 Src1Val = SI->getCondition(); 2662 Src2Val = SI->getFalseValue(); 2663 Opc = AArch64::ORRWrr; 2664 } else { 2665 assert(CI->isZero()); 2666 Src1Val = SI->getFalseValue(); 2667 Src2Val = SI->getCondition(); 2668 Opc = AArch64::BICWrr; 2669 } 2670 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2671 if (CI->isOne()) { 2672 Src1Val = SI->getCondition(); 2673 Src2Val = SI->getTrueValue(); 2674 Opc = AArch64::ORRWrr; 2675 NeedExtraOp = true; 2676 } else { 2677 assert(CI->isZero()); 2678 Src1Val = SI->getCondition(); 2679 Src2Val = SI->getTrueValue(); 2680 Opc = AArch64::ANDWrr; 2681 } 2682 } 2683 2684 if (!Opc) 2685 return false; 2686 2687 unsigned Src1Reg = getRegForValue(Src1Val); 2688 if (!Src1Reg) 2689 return false; 2690 bool Src1IsKill = hasTrivialKill(Src1Val); 2691 2692 unsigned Src2Reg = getRegForValue(Src2Val); 2693 if (!Src2Reg) 2694 return false; 2695 bool Src2IsKill = hasTrivialKill(Src2Val); 2696 2697 if (NeedExtraOp) { 2698 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); 2699 Src1IsKill = true; 2700 } 2701 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2702 Src1IsKill, Src2Reg, Src2IsKill); 2703 updateValueMap(SI, ResultReg); 2704 return true; 2705 } 2706 2707 bool AArch64FastISel::selectSelect(const Instruction *I) { 2708 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2709 MVT VT; 2710 if (!isTypeSupported(I->getType(), VT)) 2711 return false; 2712 2713 unsigned Opc; 2714 const TargetRegisterClass *RC; 2715 switch (VT.SimpleTy) { 2716 default: 2717 return false; 2718 case MVT::i1: 2719 case MVT::i8: 2720 case MVT::i16: 2721 case MVT::i32: 2722 Opc = AArch64::CSELWr; 2723 RC = &AArch64::GPR32RegClass; 2724 break; 2725 case MVT::i64: 2726 Opc = AArch64::CSELXr; 2727 RC = &AArch64::GPR64RegClass; 2728 break; 2729 case MVT::f32: 2730 Opc = AArch64::FCSELSrrr; 2731 RC = &AArch64::FPR32RegClass; 2732 break; 2733 case MVT::f64: 2734 Opc = AArch64::FCSELDrrr; 2735 RC = &AArch64::FPR64RegClass; 2736 break; 2737 } 2738 2739 const SelectInst *SI = cast<SelectInst>(I); 2740 const Value *Cond = SI->getCondition(); 2741 AArch64CC::CondCode CC = AArch64CC::NE; 2742 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2743 2744 if (optimizeSelect(SI)) 2745 return true; 2746 2747 // Try to pickup the flags, so we don't have to emit another compare. 2748 if (foldXALUIntrinsic(CC, I, Cond)) { 2749 // Fake request the condition to force emission of the XALU intrinsic. 2750 unsigned CondReg = getRegForValue(Cond); 2751 if (!CondReg) 2752 return false; 2753 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2754 isValueAvailable(Cond)) { 2755 const auto *Cmp = cast<CmpInst>(Cond); 2756 // Try to optimize or fold the cmp. 2757 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2758 const Value *FoldSelect = nullptr; 2759 switch (Predicate) { 2760 default: 2761 break; 2762 case CmpInst::FCMP_FALSE: 2763 FoldSelect = SI->getFalseValue(); 2764 break; 2765 case CmpInst::FCMP_TRUE: 2766 FoldSelect = SI->getTrueValue(); 2767 break; 2768 } 2769 2770 if (FoldSelect) { 2771 unsigned SrcReg = getRegForValue(FoldSelect); 2772 if (!SrcReg) 2773 return false; 2774 unsigned UseReg = lookUpRegForValue(SI); 2775 if (UseReg) 2776 MRI.clearKillFlags(UseReg); 2777 2778 updateValueMap(I, SrcReg); 2779 return true; 2780 } 2781 2782 // Emit the cmp. 2783 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2784 return false; 2785 2786 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2787 CC = getCompareCC(Predicate); 2788 switch (Predicate) { 2789 default: 2790 break; 2791 case CmpInst::FCMP_UEQ: 2792 ExtraCC = AArch64CC::EQ; 2793 CC = AArch64CC::VS; 2794 break; 2795 case CmpInst::FCMP_ONE: 2796 ExtraCC = AArch64CC::MI; 2797 CC = AArch64CC::GT; 2798 break; 2799 } 2800 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2801 } else { 2802 unsigned CondReg = getRegForValue(Cond); 2803 if (!CondReg) 2804 return false; 2805 bool CondIsKill = hasTrivialKill(Cond); 2806 2807 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2808 CondReg = constrainOperandRegClass(II, CondReg, 1); 2809 2810 // Emit a TST instruction (ANDS wzr, reg, #imm). 2811 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, 2812 AArch64::WZR) 2813 .addReg(CondReg, getKillRegState(CondIsKill)) 2814 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2815 } 2816 2817 unsigned Src1Reg = getRegForValue(SI->getTrueValue()); 2818 bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); 2819 2820 unsigned Src2Reg = getRegForValue(SI->getFalseValue()); 2821 bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); 2822 2823 if (!Src1Reg || !Src2Reg) 2824 return false; 2825 2826 if (ExtraCC != AArch64CC::AL) { 2827 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2828 Src2IsKill, ExtraCC); 2829 Src2IsKill = true; 2830 } 2831 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2832 Src2IsKill, CC); 2833 updateValueMap(I, ResultReg); 2834 return true; 2835 } 2836 2837 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2838 Value *V = I->getOperand(0); 2839 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2840 return false; 2841 2842 unsigned Op = getRegForValue(V); 2843 if (Op == 0) 2844 return false; 2845 2846 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); 2847 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 2848 ResultReg).addReg(Op); 2849 updateValueMap(I, ResultReg); 2850 return true; 2851 } 2852 2853 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2854 Value *V = I->getOperand(0); 2855 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2856 return false; 2857 2858 unsigned Op = getRegForValue(V); 2859 if (Op == 0) 2860 return false; 2861 2862 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); 2863 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 2864 ResultReg).addReg(Op); 2865 updateValueMap(I, ResultReg); 2866 return true; 2867 } 2868 2869 // FPToUI and FPToSI 2870 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2871 MVT DestVT; 2872 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2873 return false; 2874 2875 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2876 if (SrcReg == 0) 2877 return false; 2878 2879 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2880 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2881 return false; 2882 2883 unsigned Opc; 2884 if (SrcVT == MVT::f64) { 2885 if (Signed) 2886 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2887 else 2888 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2889 } else { 2890 if (Signed) 2891 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2892 else 2893 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2894 } 2895 unsigned ResultReg = createResultReg( 2896 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2897 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2898 .addReg(SrcReg); 2899 updateValueMap(I, ResultReg); 2900 return true; 2901 } 2902 2903 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2904 MVT DestVT; 2905 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2906 return false; 2907 // Let regular ISEL handle FP16 2908 if (DestVT == MVT::f16) 2909 return false; 2910 2911 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2912 "Unexpected value type."); 2913 2914 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2915 if (!SrcReg) 2916 return false; 2917 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 2918 2919 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2920 2921 // Handle sign-extension. 2922 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2923 SrcReg = 2924 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2925 if (!SrcReg) 2926 return false; 2927 SrcIsKill = true; 2928 } 2929 2930 unsigned Opc; 2931 if (SrcVT == MVT::i64) { 2932 if (Signed) 2933 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2934 else 2935 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2936 } else { 2937 if (Signed) 2938 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2939 else 2940 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2941 } 2942 2943 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, 2944 SrcIsKill); 2945 updateValueMap(I, ResultReg); 2946 return true; 2947 } 2948 2949 bool AArch64FastISel::fastLowerArguments() { 2950 if (!FuncInfo.CanLowerReturn) 2951 return false; 2952 2953 const Function *F = FuncInfo.Fn; 2954 if (F->isVarArg()) 2955 return false; 2956 2957 CallingConv::ID CC = F->getCallingConv(); 2958 if (CC != CallingConv::C && CC != CallingConv::Swift) 2959 return false; 2960 2961 if (Subtarget->hasCustomCallingConv()) 2962 return false; 2963 2964 // Only handle simple cases of up to 8 GPR and FPR each. 2965 unsigned GPRCnt = 0; 2966 unsigned FPRCnt = 0; 2967 for (auto const &Arg : F->args()) { 2968 if (Arg.hasAttribute(Attribute::ByVal) || 2969 Arg.hasAttribute(Attribute::InReg) || 2970 Arg.hasAttribute(Attribute::StructRet) || 2971 Arg.hasAttribute(Attribute::SwiftSelf) || 2972 Arg.hasAttribute(Attribute::SwiftError) || 2973 Arg.hasAttribute(Attribute::Nest)) 2974 return false; 2975 2976 Type *ArgTy = Arg.getType(); 2977 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2978 return false; 2979 2980 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2981 if (!ArgVT.isSimple()) 2982 return false; 2983 2984 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2985 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2986 return false; 2987 2988 if (VT.isVector() && 2989 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2990 return false; 2991 2992 if (VT >= MVT::i1 && VT <= MVT::i64) 2993 ++GPRCnt; 2994 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2995 VT.is128BitVector()) 2996 ++FPRCnt; 2997 else 2998 return false; 2999 3000 if (GPRCnt > 8 || FPRCnt > 8) 3001 return false; 3002 } 3003 3004 static const MCPhysReg Registers[6][8] = { 3005 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 3006 AArch64::W5, AArch64::W6, AArch64::W7 }, 3007 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 3008 AArch64::X5, AArch64::X6, AArch64::X7 }, 3009 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 3010 AArch64::H5, AArch64::H6, AArch64::H7 }, 3011 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 3012 AArch64::S5, AArch64::S6, AArch64::S7 }, 3013 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 3014 AArch64::D5, AArch64::D6, AArch64::D7 }, 3015 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 3016 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 3017 }; 3018 3019 unsigned GPRIdx = 0; 3020 unsigned FPRIdx = 0; 3021 for (auto const &Arg : F->args()) { 3022 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 3023 unsigned SrcReg; 3024 const TargetRegisterClass *RC; 3025 if (VT >= MVT::i1 && VT <= MVT::i32) { 3026 SrcReg = Registers[0][GPRIdx++]; 3027 RC = &AArch64::GPR32RegClass; 3028 VT = MVT::i32; 3029 } else if (VT == MVT::i64) { 3030 SrcReg = Registers[1][GPRIdx++]; 3031 RC = &AArch64::GPR64RegClass; 3032 } else if (VT == MVT::f16) { 3033 SrcReg = Registers[2][FPRIdx++]; 3034 RC = &AArch64::FPR16RegClass; 3035 } else if (VT == MVT::f32) { 3036 SrcReg = Registers[3][FPRIdx++]; 3037 RC = &AArch64::FPR32RegClass; 3038 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 3039 SrcReg = Registers[4][FPRIdx++]; 3040 RC = &AArch64::FPR64RegClass; 3041 } else if (VT.is128BitVector()) { 3042 SrcReg = Registers[5][FPRIdx++]; 3043 RC = &AArch64::FPR128RegClass; 3044 } else 3045 llvm_unreachable("Unexpected value type."); 3046 3047 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 3048 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 3049 // Without this, EmitLiveInCopies may eliminate the livein if its only 3050 // use is a bitcast (which isn't turned into an instruction). 3051 unsigned ResultReg = createResultReg(RC); 3052 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3053 TII.get(TargetOpcode::COPY), ResultReg) 3054 .addReg(DstReg, getKillRegState(true)); 3055 updateValueMap(&Arg, ResultReg); 3056 } 3057 return true; 3058 } 3059 3060 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3061 SmallVectorImpl<MVT> &OutVTs, 3062 unsigned &NumBytes) { 3063 CallingConv::ID CC = CLI.CallConv; 3064 SmallVector<CCValAssign, 16> ArgLocs; 3065 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3066 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3067 3068 // Get a count of how many bytes are to be pushed on the stack. 3069 NumBytes = CCInfo.getNextStackOffset(); 3070 3071 // Issue CALLSEQ_START 3072 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3073 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 3074 .addImm(NumBytes).addImm(0); 3075 3076 // Process the args. 3077 for (CCValAssign &VA : ArgLocs) { 3078 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3079 MVT ArgVT = OutVTs[VA.getValNo()]; 3080 3081 unsigned ArgReg = getRegForValue(ArgVal); 3082 if (!ArgReg) 3083 return false; 3084 3085 // Handle arg promotion: SExt, ZExt, AExt. 3086 switch (VA.getLocInfo()) { 3087 case CCValAssign::Full: 3088 break; 3089 case CCValAssign::SExt: { 3090 MVT DestVT = VA.getLocVT(); 3091 MVT SrcVT = ArgVT; 3092 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3093 if (!ArgReg) 3094 return false; 3095 break; 3096 } 3097 case CCValAssign::AExt: 3098 // Intentional fall-through. 3099 case CCValAssign::ZExt: { 3100 MVT DestVT = VA.getLocVT(); 3101 MVT SrcVT = ArgVT; 3102 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3103 if (!ArgReg) 3104 return false; 3105 break; 3106 } 3107 default: 3108 llvm_unreachable("Unknown arg promotion!"); 3109 } 3110 3111 // Now copy/store arg to correct locations. 3112 if (VA.isRegLoc() && !VA.needsCustom()) { 3113 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3114 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3115 CLI.OutRegs.push_back(VA.getLocReg()); 3116 } else if (VA.needsCustom()) { 3117 // FIXME: Handle custom args. 3118 return false; 3119 } else { 3120 assert(VA.isMemLoc() && "Assuming store on stack."); 3121 3122 // Don't emit stores for undef values. 3123 if (isa<UndefValue>(ArgVal)) 3124 continue; 3125 3126 // Need to store on the stack. 3127 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3128 3129 unsigned BEAlign = 0; 3130 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3131 BEAlign = 8 - ArgSize; 3132 3133 Address Addr; 3134 Addr.setKind(Address::RegBase); 3135 Addr.setReg(AArch64::SP); 3136 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3137 3138 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3139 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3140 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3141 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3142 3143 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3144 return false; 3145 } 3146 } 3147 return true; 3148 } 3149 3150 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 3151 unsigned NumBytes) { 3152 CallingConv::ID CC = CLI.CallConv; 3153 3154 // Issue CALLSEQ_END 3155 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3156 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3157 .addImm(NumBytes).addImm(0); 3158 3159 // Now the return value. 3160 if (RetVT != MVT::isVoid) { 3161 SmallVector<CCValAssign, 16> RVLocs; 3162 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3163 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3164 3165 // Only handle a single return value. 3166 if (RVLocs.size() != 1) 3167 return false; 3168 3169 // Copy all of the result registers out of their specified physreg. 3170 MVT CopyVT = RVLocs[0].getValVT(); 3171 3172 // TODO: Handle big-endian results 3173 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3174 return false; 3175 3176 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3177 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3178 TII.get(TargetOpcode::COPY), ResultReg) 3179 .addReg(RVLocs[0].getLocReg()); 3180 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3181 3182 CLI.ResultReg = ResultReg; 3183 CLI.NumResultRegs = 1; 3184 } 3185 3186 return true; 3187 } 3188 3189 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3190 CallingConv::ID CC = CLI.CallConv; 3191 bool IsTailCall = CLI.IsTailCall; 3192 bool IsVarArg = CLI.IsVarArg; 3193 const Value *Callee = CLI.Callee; 3194 MCSymbol *Symbol = CLI.Symbol; 3195 3196 if (!Callee && !Symbol) 3197 return false; 3198 3199 // Allow SelectionDAG isel to handle tail calls. 3200 if (IsTailCall) 3201 return false; 3202 3203 // FIXME: we could and should support this, but for now correctness at -O0 is 3204 // more important. 3205 if (Subtarget->isTargetILP32()) 3206 return false; 3207 3208 CodeModel::Model CM = TM.getCodeModel(); 3209 // Only support the small-addressing and large code models. 3210 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3211 return false; 3212 3213 // FIXME: Add large code model support for ELF. 3214 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3215 return false; 3216 3217 // Let SDISel handle vararg functions. 3218 if (IsVarArg) 3219 return false; 3220 3221 // FIXME: Only handle *simple* calls for now. 3222 MVT RetVT; 3223 if (CLI.RetTy->isVoidTy()) 3224 RetVT = MVT::isVoid; 3225 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3226 return false; 3227 3228 for (auto Flag : CLI.OutFlags) 3229 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3230 Flag.isSwiftSelf() || Flag.isSwiftError()) 3231 return false; 3232 3233 // Set up the argument vectors. 3234 SmallVector<MVT, 16> OutVTs; 3235 OutVTs.reserve(CLI.OutVals.size()); 3236 3237 for (auto *Val : CLI.OutVals) { 3238 MVT VT; 3239 if (!isTypeLegal(Val->getType(), VT) && 3240 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3241 return false; 3242 3243 // We don't handle vector parameters yet. 3244 if (VT.isVector() || VT.getSizeInBits() > 64) 3245 return false; 3246 3247 OutVTs.push_back(VT); 3248 } 3249 3250 Address Addr; 3251 if (Callee && !computeCallAddress(Callee, Addr)) 3252 return false; 3253 3254 // The weak function target may be zero; in that case we must use indirect 3255 // addressing via a stub on windows as it may be out of range for a 3256 // PC-relative jump. 3257 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3258 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3259 return false; 3260 3261 // Handle the arguments now that we've gotten them. 3262 unsigned NumBytes; 3263 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3264 return false; 3265 3266 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3267 if (RegInfo->isAnyArgRegReserved(*MF)) 3268 RegInfo->emitReservedArgRegCallError(*MF); 3269 3270 // Issue the call. 3271 MachineInstrBuilder MIB; 3272 if (Subtarget->useSmallAddressing()) { 3273 const MCInstrDesc &II = 3274 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3275 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 3276 if (Symbol) 3277 MIB.addSym(Symbol, 0); 3278 else if (Addr.getGlobalValue()) 3279 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3280 else if (Addr.getReg()) { 3281 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3282 MIB.addReg(Reg); 3283 } else 3284 return false; 3285 } else { 3286 unsigned CallReg = 0; 3287 if (Symbol) { 3288 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3289 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 3290 ADRPReg) 3291 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3292 3293 CallReg = createResultReg(&AArch64::GPR64RegClass); 3294 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3295 TII.get(AArch64::LDRXui), CallReg) 3296 .addReg(ADRPReg) 3297 .addSym(Symbol, 3298 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3299 } else if (Addr.getGlobalValue()) 3300 CallReg = materializeGV(Addr.getGlobalValue()); 3301 else if (Addr.getReg()) 3302 CallReg = Addr.getReg(); 3303 3304 if (!CallReg) 3305 return false; 3306 3307 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3308 CallReg = constrainOperandRegClass(II, CallReg, 0); 3309 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 3310 } 3311 3312 // Add implicit physical register uses to the call. 3313 for (auto Reg : CLI.OutRegs) 3314 MIB.addReg(Reg, RegState::Implicit); 3315 3316 // Add a register mask with the call-preserved registers. 3317 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3318 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3319 3320 CLI.Call = MIB; 3321 3322 // Finish off the call including any return values. 3323 return finishCall(CLI, RetVT, NumBytes); 3324 } 3325 3326 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 3327 if (Alignment) 3328 return Len / Alignment <= 4; 3329 else 3330 return Len < 32; 3331 } 3332 3333 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3334 uint64_t Len, unsigned Alignment) { 3335 // Make sure we don't bloat code by inlining very large memcpy's. 3336 if (!isMemCpySmall(Len, Alignment)) 3337 return false; 3338 3339 int64_t UnscaledOffset = 0; 3340 Address OrigDest = Dest; 3341 Address OrigSrc = Src; 3342 3343 while (Len) { 3344 MVT VT; 3345 if (!Alignment || Alignment >= 8) { 3346 if (Len >= 8) 3347 VT = MVT::i64; 3348 else if (Len >= 4) 3349 VT = MVT::i32; 3350 else if (Len >= 2) 3351 VT = MVT::i16; 3352 else { 3353 VT = MVT::i8; 3354 } 3355 } else { 3356 // Bound based on alignment. 3357 if (Len >= 4 && Alignment == 4) 3358 VT = MVT::i32; 3359 else if (Len >= 2 && Alignment == 2) 3360 VT = MVT::i16; 3361 else { 3362 VT = MVT::i8; 3363 } 3364 } 3365 3366 unsigned ResultReg = emitLoad(VT, VT, Src); 3367 if (!ResultReg) 3368 return false; 3369 3370 if (!emitStore(VT, ResultReg, Dest)) 3371 return false; 3372 3373 int64_t Size = VT.getSizeInBits() / 8; 3374 Len -= Size; 3375 UnscaledOffset += Size; 3376 3377 // We need to recompute the unscaled offset for each iteration. 3378 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3379 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3380 } 3381 3382 return true; 3383 } 3384 3385 /// Check if it is possible to fold the condition from the XALU intrinsic 3386 /// into the user. The condition code will only be updated on success. 3387 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3388 const Instruction *I, 3389 const Value *Cond) { 3390 if (!isa<ExtractValueInst>(Cond)) 3391 return false; 3392 3393 const auto *EV = cast<ExtractValueInst>(Cond); 3394 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3395 return false; 3396 3397 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3398 MVT RetVT; 3399 const Function *Callee = II->getCalledFunction(); 3400 Type *RetTy = 3401 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3402 if (!isTypeLegal(RetTy, RetVT)) 3403 return false; 3404 3405 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3406 return false; 3407 3408 const Value *LHS = II->getArgOperand(0); 3409 const Value *RHS = II->getArgOperand(1); 3410 3411 // Canonicalize immediate to the RHS. 3412 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3413 isCommutativeIntrinsic(II)) 3414 std::swap(LHS, RHS); 3415 3416 // Simplify multiplies. 3417 Intrinsic::ID IID = II->getIntrinsicID(); 3418 switch (IID) { 3419 default: 3420 break; 3421 case Intrinsic::smul_with_overflow: 3422 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3423 if (C->getValue() == 2) 3424 IID = Intrinsic::sadd_with_overflow; 3425 break; 3426 case Intrinsic::umul_with_overflow: 3427 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3428 if (C->getValue() == 2) 3429 IID = Intrinsic::uadd_with_overflow; 3430 break; 3431 } 3432 3433 AArch64CC::CondCode TmpCC; 3434 switch (IID) { 3435 default: 3436 return false; 3437 case Intrinsic::sadd_with_overflow: 3438 case Intrinsic::ssub_with_overflow: 3439 TmpCC = AArch64CC::VS; 3440 break; 3441 case Intrinsic::uadd_with_overflow: 3442 TmpCC = AArch64CC::HS; 3443 break; 3444 case Intrinsic::usub_with_overflow: 3445 TmpCC = AArch64CC::LO; 3446 break; 3447 case Intrinsic::smul_with_overflow: 3448 case Intrinsic::umul_with_overflow: 3449 TmpCC = AArch64CC::NE; 3450 break; 3451 } 3452 3453 // Check if both instructions are in the same basic block. 3454 if (!isValueAvailable(II)) 3455 return false; 3456 3457 // Make sure nothing is in the way 3458 BasicBlock::const_iterator Start(I); 3459 BasicBlock::const_iterator End(II); 3460 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3461 // We only expect extractvalue instructions between the intrinsic and the 3462 // instruction to be selected. 3463 if (!isa<ExtractValueInst>(Itr)) 3464 return false; 3465 3466 // Check that the extractvalue operand comes from the intrinsic. 3467 const auto *EVI = cast<ExtractValueInst>(Itr); 3468 if (EVI->getAggregateOperand() != II) 3469 return false; 3470 } 3471 3472 CC = TmpCC; 3473 return true; 3474 } 3475 3476 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3477 // FIXME: Handle more intrinsics. 3478 switch (II->getIntrinsicID()) { 3479 default: return false; 3480 case Intrinsic::frameaddress: { 3481 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3482 MFI.setFrameAddressIsTaken(true); 3483 3484 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3485 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3486 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3487 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3488 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3489 // Recursively load frame address 3490 // ldr x0, [fp] 3491 // ldr x0, [x0] 3492 // ldr x0, [x0] 3493 // ... 3494 unsigned DestReg; 3495 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3496 while (Depth--) { 3497 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3498 SrcReg, /*IsKill=*/true, 0); 3499 assert(DestReg && "Unexpected LDR instruction emission failure."); 3500 SrcReg = DestReg; 3501 } 3502 3503 updateValueMap(II, SrcReg); 3504 return true; 3505 } 3506 case Intrinsic::sponentry: { 3507 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3508 3509 // SP = FP + Fixed Object + 16 3510 int FI = MFI.CreateFixedObject(4, 0, false); 3511 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3512 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3513 TII.get(AArch64::ADDXri), ResultReg) 3514 .addFrameIndex(FI) 3515 .addImm(0) 3516 .addImm(0); 3517 3518 updateValueMap(II, ResultReg); 3519 return true; 3520 } 3521 case Intrinsic::memcpy: 3522 case Intrinsic::memmove: { 3523 const auto *MTI = cast<MemTransferInst>(II); 3524 // Don't handle volatile. 3525 if (MTI->isVolatile()) 3526 return false; 3527 3528 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3529 // we would emit dead code because we don't currently handle memmoves. 3530 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3531 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3532 // Small memcpy's are common enough that we want to do them without a call 3533 // if possible. 3534 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3535 unsigned Alignment = MinAlign(MTI->getDestAlignment(), 3536 MTI->getSourceAlignment()); 3537 if (isMemCpySmall(Len, Alignment)) { 3538 Address Dest, Src; 3539 if (!computeAddress(MTI->getRawDest(), Dest) || 3540 !computeAddress(MTI->getRawSource(), Src)) 3541 return false; 3542 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3543 return true; 3544 } 3545 } 3546 3547 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3548 return false; 3549 3550 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3551 // Fast instruction selection doesn't support the special 3552 // address spaces. 3553 return false; 3554 3555 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3556 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); 3557 } 3558 case Intrinsic::memset: { 3559 const MemSetInst *MSI = cast<MemSetInst>(II); 3560 // Don't handle volatile. 3561 if (MSI->isVolatile()) 3562 return false; 3563 3564 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3565 return false; 3566 3567 if (MSI->getDestAddressSpace() > 255) 3568 // Fast instruction selection doesn't support the special 3569 // address spaces. 3570 return false; 3571 3572 return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); 3573 } 3574 case Intrinsic::sin: 3575 case Intrinsic::cos: 3576 case Intrinsic::pow: { 3577 MVT RetVT; 3578 if (!isTypeLegal(II->getType(), RetVT)) 3579 return false; 3580 3581 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3582 return false; 3583 3584 static const RTLIB::Libcall LibCallTable[3][2] = { 3585 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3586 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3587 { RTLIB::POW_F32, RTLIB::POW_F64 } 3588 }; 3589 RTLIB::Libcall LC; 3590 bool Is64Bit = RetVT == MVT::f64; 3591 switch (II->getIntrinsicID()) { 3592 default: 3593 llvm_unreachable("Unexpected intrinsic."); 3594 case Intrinsic::sin: 3595 LC = LibCallTable[0][Is64Bit]; 3596 break; 3597 case Intrinsic::cos: 3598 LC = LibCallTable[1][Is64Bit]; 3599 break; 3600 case Intrinsic::pow: 3601 LC = LibCallTable[2][Is64Bit]; 3602 break; 3603 } 3604 3605 ArgListTy Args; 3606 Args.reserve(II->getNumArgOperands()); 3607 3608 // Populate the argument list. 3609 for (auto &Arg : II->arg_operands()) { 3610 ArgListEntry Entry; 3611 Entry.Val = Arg; 3612 Entry.Ty = Arg->getType(); 3613 Args.push_back(Entry); 3614 } 3615 3616 CallLoweringInfo CLI; 3617 MCContext &Ctx = MF->getContext(); 3618 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3619 TLI.getLibcallName(LC), std::move(Args)); 3620 if (!lowerCallTo(CLI)) 3621 return false; 3622 updateValueMap(II, CLI.ResultReg); 3623 return true; 3624 } 3625 case Intrinsic::fabs: { 3626 MVT VT; 3627 if (!isTypeLegal(II->getType(), VT)) 3628 return false; 3629 3630 unsigned Opc; 3631 switch (VT.SimpleTy) { 3632 default: 3633 return false; 3634 case MVT::f32: 3635 Opc = AArch64::FABSSr; 3636 break; 3637 case MVT::f64: 3638 Opc = AArch64::FABSDr; 3639 break; 3640 } 3641 unsigned SrcReg = getRegForValue(II->getOperand(0)); 3642 if (!SrcReg) 3643 return false; 3644 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); 3645 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3646 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 3647 .addReg(SrcReg, getKillRegState(SrcRegIsKill)); 3648 updateValueMap(II, ResultReg); 3649 return true; 3650 } 3651 case Intrinsic::trap: 3652 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3653 .addImm(1); 3654 return true; 3655 case Intrinsic::debugtrap: { 3656 if (Subtarget->isTargetWindows()) { 3657 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3658 .addImm(0xF000); 3659 return true; 3660 } 3661 break; 3662 } 3663 3664 case Intrinsic::sqrt: { 3665 Type *RetTy = II->getCalledFunction()->getReturnType(); 3666 3667 MVT VT; 3668 if (!isTypeLegal(RetTy, VT)) 3669 return false; 3670 3671 unsigned Op0Reg = getRegForValue(II->getOperand(0)); 3672 if (!Op0Reg) 3673 return false; 3674 bool Op0IsKill = hasTrivialKill(II->getOperand(0)); 3675 3676 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); 3677 if (!ResultReg) 3678 return false; 3679 3680 updateValueMap(II, ResultReg); 3681 return true; 3682 } 3683 case Intrinsic::sadd_with_overflow: 3684 case Intrinsic::uadd_with_overflow: 3685 case Intrinsic::ssub_with_overflow: 3686 case Intrinsic::usub_with_overflow: 3687 case Intrinsic::smul_with_overflow: 3688 case Intrinsic::umul_with_overflow: { 3689 // This implements the basic lowering of the xalu with overflow intrinsics. 3690 const Function *Callee = II->getCalledFunction(); 3691 auto *Ty = cast<StructType>(Callee->getReturnType()); 3692 Type *RetTy = Ty->getTypeAtIndex(0U); 3693 3694 MVT VT; 3695 if (!isTypeLegal(RetTy, VT)) 3696 return false; 3697 3698 if (VT != MVT::i32 && VT != MVT::i64) 3699 return false; 3700 3701 const Value *LHS = II->getArgOperand(0); 3702 const Value *RHS = II->getArgOperand(1); 3703 // Canonicalize immediate to the RHS. 3704 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3705 isCommutativeIntrinsic(II)) 3706 std::swap(LHS, RHS); 3707 3708 // Simplify multiplies. 3709 Intrinsic::ID IID = II->getIntrinsicID(); 3710 switch (IID) { 3711 default: 3712 break; 3713 case Intrinsic::smul_with_overflow: 3714 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3715 if (C->getValue() == 2) { 3716 IID = Intrinsic::sadd_with_overflow; 3717 RHS = LHS; 3718 } 3719 break; 3720 case Intrinsic::umul_with_overflow: 3721 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3722 if (C->getValue() == 2) { 3723 IID = Intrinsic::uadd_with_overflow; 3724 RHS = LHS; 3725 } 3726 break; 3727 } 3728 3729 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3730 AArch64CC::CondCode CC = AArch64CC::Invalid; 3731 switch (IID) { 3732 default: llvm_unreachable("Unexpected intrinsic!"); 3733 case Intrinsic::sadd_with_overflow: 3734 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3735 CC = AArch64CC::VS; 3736 break; 3737 case Intrinsic::uadd_with_overflow: 3738 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3739 CC = AArch64CC::HS; 3740 break; 3741 case Intrinsic::ssub_with_overflow: 3742 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3743 CC = AArch64CC::VS; 3744 break; 3745 case Intrinsic::usub_with_overflow: 3746 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3747 CC = AArch64CC::LO; 3748 break; 3749 case Intrinsic::smul_with_overflow: { 3750 CC = AArch64CC::NE; 3751 unsigned LHSReg = getRegForValue(LHS); 3752 if (!LHSReg) 3753 return false; 3754 bool LHSIsKill = hasTrivialKill(LHS); 3755 3756 unsigned RHSReg = getRegForValue(RHS); 3757 if (!RHSReg) 3758 return false; 3759 bool RHSIsKill = hasTrivialKill(RHS); 3760 3761 if (VT == MVT::i32) { 3762 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3763 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 3764 /*IsKill=*/false, 32); 3765 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3766 AArch64::sub_32); 3767 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, 3768 AArch64::sub_32); 3769 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3770 AArch64_AM::ASR, 31, /*WantResult=*/false); 3771 } else { 3772 assert(VT == MVT::i64 && "Unexpected value type."); 3773 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3774 // reused in the next instruction. 3775 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3776 /*IsKill=*/false); 3777 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, 3778 RHSReg, RHSIsKill); 3779 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3780 AArch64_AM::ASR, 63, /*WantResult=*/false); 3781 } 3782 break; 3783 } 3784 case Intrinsic::umul_with_overflow: { 3785 CC = AArch64CC::NE; 3786 unsigned LHSReg = getRegForValue(LHS); 3787 if (!LHSReg) 3788 return false; 3789 bool LHSIsKill = hasTrivialKill(LHS); 3790 3791 unsigned RHSReg = getRegForValue(RHS); 3792 if (!RHSReg) 3793 return false; 3794 bool RHSIsKill = hasTrivialKill(RHS); 3795 3796 if (VT == MVT::i32) { 3797 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3798 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, 3799 /*IsKill=*/false, AArch64_AM::LSR, 32, 3800 /*WantResult=*/false); 3801 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3802 AArch64::sub_32); 3803 } else { 3804 assert(VT == MVT::i64 && "Unexpected value type."); 3805 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3806 // reused in the next instruction. 3807 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3808 /*IsKill=*/false); 3809 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, 3810 RHSReg, RHSIsKill); 3811 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, 3812 /*IsKill=*/false, /*WantResult=*/false); 3813 } 3814 break; 3815 } 3816 } 3817 3818 if (MulReg) { 3819 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3820 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3821 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3822 } 3823 3824 if (!ResultReg1) 3825 return false; 3826 3827 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3828 AArch64::WZR, /*IsKill=*/true, AArch64::WZR, 3829 /*IsKill=*/true, getInvertedCondCode(CC)); 3830 (void)ResultReg2; 3831 assert((ResultReg1 + 1) == ResultReg2 && 3832 "Nonconsecutive result registers."); 3833 updateValueMap(II, ResultReg1, 2); 3834 return true; 3835 } 3836 } 3837 return false; 3838 } 3839 3840 bool AArch64FastISel::selectRet(const Instruction *I) { 3841 const ReturnInst *Ret = cast<ReturnInst>(I); 3842 const Function &F = *I->getParent()->getParent(); 3843 3844 if (!FuncInfo.CanLowerReturn) 3845 return false; 3846 3847 if (F.isVarArg()) 3848 return false; 3849 3850 if (TLI.supportSwiftError() && 3851 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3852 return false; 3853 3854 if (TLI.supportSplitCSR(FuncInfo.MF)) 3855 return false; 3856 3857 // Build a list of return value registers. 3858 SmallVector<unsigned, 4> RetRegs; 3859 3860 if (Ret->getNumOperands() > 0) { 3861 CallingConv::ID CC = F.getCallingConv(); 3862 SmallVector<ISD::OutputArg, 4> Outs; 3863 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3864 3865 // Analyze operands of the call, assigning locations to each operand. 3866 SmallVector<CCValAssign, 16> ValLocs; 3867 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3868 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3869 : RetCC_AArch64_AAPCS; 3870 CCInfo.AnalyzeReturn(Outs, RetCC); 3871 3872 // Only handle a single return value for now. 3873 if (ValLocs.size() != 1) 3874 return false; 3875 3876 CCValAssign &VA = ValLocs[0]; 3877 const Value *RV = Ret->getOperand(0); 3878 3879 // Don't bother handling odd stuff for now. 3880 if ((VA.getLocInfo() != CCValAssign::Full) && 3881 (VA.getLocInfo() != CCValAssign::BCvt)) 3882 return false; 3883 3884 // Only handle register returns for now. 3885 if (!VA.isRegLoc()) 3886 return false; 3887 3888 unsigned Reg = getRegForValue(RV); 3889 if (Reg == 0) 3890 return false; 3891 3892 unsigned SrcReg = Reg + VA.getValNo(); 3893 Register DestReg = VA.getLocReg(); 3894 // Avoid a cross-class copy. This is very unlikely. 3895 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3896 return false; 3897 3898 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3899 if (!RVEVT.isSimple()) 3900 return false; 3901 3902 // Vectors (of > 1 lane) in big endian need tricky handling. 3903 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 && 3904 !Subtarget->isLittleEndian()) 3905 return false; 3906 3907 MVT RVVT = RVEVT.getSimpleVT(); 3908 if (RVVT == MVT::f128) 3909 return false; 3910 3911 MVT DestVT = VA.getValVT(); 3912 // Special handling for extended integers. 3913 if (RVVT != DestVT) { 3914 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3915 return false; 3916 3917 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3918 return false; 3919 3920 bool IsZExt = Outs[0].Flags.isZExt(); 3921 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3922 if (SrcReg == 0) 3923 return false; 3924 } 3925 3926 // "Callee" (i.e. value producer) zero extends pointers at function 3927 // boundary. 3928 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3929 SrcReg = emitAnd_ri(MVT::i64, SrcReg, false, 0xffffffff); 3930 3931 // Make the copy. 3932 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3933 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3934 3935 // Add register to return instruction. 3936 RetRegs.push_back(VA.getLocReg()); 3937 } 3938 3939 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3940 TII.get(AArch64::RET_ReallyLR)); 3941 for (unsigned RetReg : RetRegs) 3942 MIB.addReg(RetReg, RegState::Implicit); 3943 return true; 3944 } 3945 3946 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3947 Type *DestTy = I->getType(); 3948 Value *Op = I->getOperand(0); 3949 Type *SrcTy = Op->getType(); 3950 3951 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3952 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3953 if (!SrcEVT.isSimple()) 3954 return false; 3955 if (!DestEVT.isSimple()) 3956 return false; 3957 3958 MVT SrcVT = SrcEVT.getSimpleVT(); 3959 MVT DestVT = DestEVT.getSimpleVT(); 3960 3961 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3962 SrcVT != MVT::i8) 3963 return false; 3964 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3965 DestVT != MVT::i1) 3966 return false; 3967 3968 unsigned SrcReg = getRegForValue(Op); 3969 if (!SrcReg) 3970 return false; 3971 bool SrcIsKill = hasTrivialKill(Op); 3972 3973 // If we're truncating from i64 to a smaller non-legal type then generate an 3974 // AND. Otherwise, we know the high bits are undefined and a truncate only 3975 // generate a COPY. We cannot mark the source register also as result 3976 // register, because this can incorrectly transfer the kill flag onto the 3977 // source register. 3978 unsigned ResultReg; 3979 if (SrcVT == MVT::i64) { 3980 uint64_t Mask = 0; 3981 switch (DestVT.SimpleTy) { 3982 default: 3983 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3984 return false; 3985 case MVT::i1: 3986 Mask = 0x1; 3987 break; 3988 case MVT::i8: 3989 Mask = 0xff; 3990 break; 3991 case MVT::i16: 3992 Mask = 0xffff; 3993 break; 3994 } 3995 // Issue an extract_subreg to get the lower 32-bits. 3996 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 3997 AArch64::sub_32); 3998 // Create the AND instruction which performs the actual truncation. 3999 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); 4000 assert(ResultReg && "Unexpected AND instruction emission failure."); 4001 } else { 4002 ResultReg = createResultReg(&AArch64::GPR32RegClass); 4003 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4004 TII.get(TargetOpcode::COPY), ResultReg) 4005 .addReg(SrcReg, getKillRegState(SrcIsKill)); 4006 } 4007 4008 updateValueMap(I, ResultReg); 4009 return true; 4010 } 4011 4012 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 4013 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 4014 DestVT == MVT::i64) && 4015 "Unexpected value type."); 4016 // Handle i8 and i16 as i32. 4017 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4018 DestVT = MVT::i32; 4019 4020 if (IsZExt) { 4021 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 4022 assert(ResultReg && "Unexpected AND instruction emission failure."); 4023 if (DestVT == MVT::i64) { 4024 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 4025 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 4026 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4028 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4029 .addImm(0) 4030 .addReg(ResultReg) 4031 .addImm(AArch64::sub_32); 4032 ResultReg = Reg64; 4033 } 4034 return ResultReg; 4035 } else { 4036 if (DestVT == MVT::i64) { 4037 // FIXME: We're SExt i1 to i64. 4038 return 0; 4039 } 4040 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 4041 /*TODO:IsKill=*/false, 0, 0); 4042 } 4043 } 4044 4045 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4046 unsigned Op1, bool Op1IsKill) { 4047 unsigned Opc, ZReg; 4048 switch (RetVT.SimpleTy) { 4049 default: return 0; 4050 case MVT::i8: 4051 case MVT::i16: 4052 case MVT::i32: 4053 RetVT = MVT::i32; 4054 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4055 case MVT::i64: 4056 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4057 } 4058 4059 const TargetRegisterClass *RC = 4060 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4061 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, 4062 /*IsKill=*/ZReg, true); 4063 } 4064 4065 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4066 unsigned Op1, bool Op1IsKill) { 4067 if (RetVT != MVT::i64) 4068 return 0; 4069 4070 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4071 Op0, Op0IsKill, Op1, Op1IsKill, 4072 AArch64::XZR, /*IsKill=*/true); 4073 } 4074 4075 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4076 unsigned Op1, bool Op1IsKill) { 4077 if (RetVT != MVT::i64) 4078 return 0; 4079 4080 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4081 Op0, Op0IsKill, Op1, Op1IsKill, 4082 AArch64::XZR, /*IsKill=*/true); 4083 } 4084 4085 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4086 unsigned Op1Reg, bool Op1IsKill) { 4087 unsigned Opc = 0; 4088 bool NeedTrunc = false; 4089 uint64_t Mask = 0; 4090 switch (RetVT.SimpleTy) { 4091 default: return 0; 4092 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4093 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4094 case MVT::i32: Opc = AArch64::LSLVWr; break; 4095 case MVT::i64: Opc = AArch64::LSLVXr; break; 4096 } 4097 4098 const TargetRegisterClass *RC = 4099 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4100 if (NeedTrunc) { 4101 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4102 Op1IsKill = true; 4103 } 4104 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4105 Op1IsKill); 4106 if (NeedTrunc) 4107 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4108 return ResultReg; 4109 } 4110 4111 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4112 bool Op0IsKill, uint64_t Shift, 4113 bool IsZExt) { 4114 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4115 "Unexpected source/return type pair."); 4116 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4117 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4118 "Unexpected source value type."); 4119 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4120 RetVT == MVT::i64) && "Unexpected return value type."); 4121 4122 bool Is64Bit = (RetVT == MVT::i64); 4123 unsigned RegSize = Is64Bit ? 64 : 32; 4124 unsigned DstBits = RetVT.getSizeInBits(); 4125 unsigned SrcBits = SrcVT.getSizeInBits(); 4126 const TargetRegisterClass *RC = 4127 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4128 4129 // Just emit a copy for "zero" shifts. 4130 if (Shift == 0) { 4131 if (RetVT == SrcVT) { 4132 unsigned ResultReg = createResultReg(RC); 4133 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4134 TII.get(TargetOpcode::COPY), ResultReg) 4135 .addReg(Op0, getKillRegState(Op0IsKill)); 4136 return ResultReg; 4137 } else 4138 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4139 } 4140 4141 // Don't deal with undefined shifts. 4142 if (Shift >= DstBits) 4143 return 0; 4144 4145 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4146 // {S|U}BFM Wd, Wn, #r, #s 4147 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4148 4149 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4150 // %2 = shl i16 %1, 4 4151 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4152 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4153 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4154 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4155 4156 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4157 // %2 = shl i16 %1, 8 4158 // Wd<32+7-24,32-24> = Wn<7:0> 4159 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4160 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4161 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4162 4163 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4164 // %2 = shl i16 %1, 12 4165 // Wd<32+3-20,32-20> = Wn<3:0> 4166 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4167 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4168 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4169 4170 unsigned ImmR = RegSize - Shift; 4171 // Limit the width to the length of the source type. 4172 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4173 static const unsigned OpcTable[2][2] = { 4174 {AArch64::SBFMWri, AArch64::SBFMXri}, 4175 {AArch64::UBFMWri, AArch64::UBFMXri} 4176 }; 4177 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4178 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4179 Register TmpReg = MRI.createVirtualRegister(RC); 4180 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4181 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4182 .addImm(0) 4183 .addReg(Op0, getKillRegState(Op0IsKill)) 4184 .addImm(AArch64::sub_32); 4185 Op0 = TmpReg; 4186 Op0IsKill = true; 4187 } 4188 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4189 } 4190 4191 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4192 unsigned Op1Reg, bool Op1IsKill) { 4193 unsigned Opc = 0; 4194 bool NeedTrunc = false; 4195 uint64_t Mask = 0; 4196 switch (RetVT.SimpleTy) { 4197 default: return 0; 4198 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4199 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4200 case MVT::i32: Opc = AArch64::LSRVWr; break; 4201 case MVT::i64: Opc = AArch64::LSRVXr; break; 4202 } 4203 4204 const TargetRegisterClass *RC = 4205 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4206 if (NeedTrunc) { 4207 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); 4208 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4209 Op0IsKill = Op1IsKill = true; 4210 } 4211 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4212 Op1IsKill); 4213 if (NeedTrunc) 4214 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4215 return ResultReg; 4216 } 4217 4218 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4219 bool Op0IsKill, uint64_t Shift, 4220 bool IsZExt) { 4221 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4222 "Unexpected source/return type pair."); 4223 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4224 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4225 "Unexpected source value type."); 4226 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4227 RetVT == MVT::i64) && "Unexpected return value type."); 4228 4229 bool Is64Bit = (RetVT == MVT::i64); 4230 unsigned RegSize = Is64Bit ? 64 : 32; 4231 unsigned DstBits = RetVT.getSizeInBits(); 4232 unsigned SrcBits = SrcVT.getSizeInBits(); 4233 const TargetRegisterClass *RC = 4234 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4235 4236 // Just emit a copy for "zero" shifts. 4237 if (Shift == 0) { 4238 if (RetVT == SrcVT) { 4239 unsigned ResultReg = createResultReg(RC); 4240 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4241 TII.get(TargetOpcode::COPY), ResultReg) 4242 .addReg(Op0, getKillRegState(Op0IsKill)); 4243 return ResultReg; 4244 } else 4245 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4246 } 4247 4248 // Don't deal with undefined shifts. 4249 if (Shift >= DstBits) 4250 return 0; 4251 4252 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4253 // {S|U}BFM Wd, Wn, #r, #s 4254 // Wd<s-r:0> = Wn<s:r> when r <= s 4255 4256 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4257 // %2 = lshr i16 %1, 4 4258 // Wd<7-4:0> = Wn<7:4> 4259 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4260 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4261 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4262 4263 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4264 // %2 = lshr i16 %1, 8 4265 // Wd<7-7,0> = Wn<7:7> 4266 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4267 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4268 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4269 4270 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4271 // %2 = lshr i16 %1, 12 4272 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4273 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4274 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4275 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4276 4277 if (Shift >= SrcBits && IsZExt) 4278 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4279 4280 // It is not possible to fold a sign-extend into the LShr instruction. In this 4281 // case emit a sign-extend. 4282 if (!IsZExt) { 4283 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4284 if (!Op0) 4285 return 0; 4286 Op0IsKill = true; 4287 SrcVT = RetVT; 4288 SrcBits = SrcVT.getSizeInBits(); 4289 IsZExt = true; 4290 } 4291 4292 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4293 unsigned ImmS = SrcBits - 1; 4294 static const unsigned OpcTable[2][2] = { 4295 {AArch64::SBFMWri, AArch64::SBFMXri}, 4296 {AArch64::UBFMWri, AArch64::UBFMXri} 4297 }; 4298 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4299 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4300 Register TmpReg = MRI.createVirtualRegister(RC); 4301 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4302 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4303 .addImm(0) 4304 .addReg(Op0, getKillRegState(Op0IsKill)) 4305 .addImm(AArch64::sub_32); 4306 Op0 = TmpReg; 4307 Op0IsKill = true; 4308 } 4309 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4310 } 4311 4312 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4313 unsigned Op1Reg, bool Op1IsKill) { 4314 unsigned Opc = 0; 4315 bool NeedTrunc = false; 4316 uint64_t Mask = 0; 4317 switch (RetVT.SimpleTy) { 4318 default: return 0; 4319 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4320 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4321 case MVT::i32: Opc = AArch64::ASRVWr; break; 4322 case MVT::i64: Opc = AArch64::ASRVXr; break; 4323 } 4324 4325 const TargetRegisterClass *RC = 4326 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4327 if (NeedTrunc) { 4328 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4329 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4330 Op0IsKill = Op1IsKill = true; 4331 } 4332 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4333 Op1IsKill); 4334 if (NeedTrunc) 4335 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4336 return ResultReg; 4337 } 4338 4339 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4340 bool Op0IsKill, uint64_t Shift, 4341 bool IsZExt) { 4342 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4343 "Unexpected source/return type pair."); 4344 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4345 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4346 "Unexpected source value type."); 4347 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4348 RetVT == MVT::i64) && "Unexpected return value type."); 4349 4350 bool Is64Bit = (RetVT == MVT::i64); 4351 unsigned RegSize = Is64Bit ? 64 : 32; 4352 unsigned DstBits = RetVT.getSizeInBits(); 4353 unsigned SrcBits = SrcVT.getSizeInBits(); 4354 const TargetRegisterClass *RC = 4355 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4356 4357 // Just emit a copy for "zero" shifts. 4358 if (Shift == 0) { 4359 if (RetVT == SrcVT) { 4360 unsigned ResultReg = createResultReg(RC); 4361 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4362 TII.get(TargetOpcode::COPY), ResultReg) 4363 .addReg(Op0, getKillRegState(Op0IsKill)); 4364 return ResultReg; 4365 } else 4366 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4367 } 4368 4369 // Don't deal with undefined shifts. 4370 if (Shift >= DstBits) 4371 return 0; 4372 4373 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4374 // {S|U}BFM Wd, Wn, #r, #s 4375 // Wd<s-r:0> = Wn<s:r> when r <= s 4376 4377 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4378 // %2 = ashr i16 %1, 4 4379 // Wd<7-4:0> = Wn<7:4> 4380 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4381 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4382 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4383 4384 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4385 // %2 = ashr i16 %1, 8 4386 // Wd<7-7,0> = Wn<7:7> 4387 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4388 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4389 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4390 4391 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4392 // %2 = ashr i16 %1, 12 4393 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4394 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4395 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4396 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4397 4398 if (Shift >= SrcBits && IsZExt) 4399 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4400 4401 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4402 unsigned ImmS = SrcBits - 1; 4403 static const unsigned OpcTable[2][2] = { 4404 {AArch64::SBFMWri, AArch64::SBFMXri}, 4405 {AArch64::UBFMWri, AArch64::UBFMXri} 4406 }; 4407 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4408 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4409 Register TmpReg = MRI.createVirtualRegister(RC); 4410 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4411 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4412 .addImm(0) 4413 .addReg(Op0, getKillRegState(Op0IsKill)) 4414 .addImm(AArch64::sub_32); 4415 Op0 = TmpReg; 4416 Op0IsKill = true; 4417 } 4418 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4419 } 4420 4421 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4422 bool IsZExt) { 4423 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4424 4425 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4426 // DestVT are odd things, so test to make sure that they are both types we can 4427 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4428 // bail out to SelectionDAG. 4429 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4430 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4431 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4432 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4433 return 0; 4434 4435 unsigned Opc; 4436 unsigned Imm = 0; 4437 4438 switch (SrcVT.SimpleTy) { 4439 default: 4440 return 0; 4441 case MVT::i1: 4442 return emiti1Ext(SrcReg, DestVT, IsZExt); 4443 case MVT::i8: 4444 if (DestVT == MVT::i64) 4445 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4446 else 4447 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4448 Imm = 7; 4449 break; 4450 case MVT::i16: 4451 if (DestVT == MVT::i64) 4452 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4453 else 4454 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4455 Imm = 15; 4456 break; 4457 case MVT::i32: 4458 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4459 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4460 Imm = 31; 4461 break; 4462 } 4463 4464 // Handle i8 and i16 as i32. 4465 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4466 DestVT = MVT::i32; 4467 else if (DestVT == MVT::i64) { 4468 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4469 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4470 TII.get(AArch64::SUBREG_TO_REG), Src64) 4471 .addImm(0) 4472 .addReg(SrcReg) 4473 .addImm(AArch64::sub_32); 4474 SrcReg = Src64; 4475 } 4476 4477 const TargetRegisterClass *RC = 4478 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4479 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); 4480 } 4481 4482 static bool isZExtLoad(const MachineInstr *LI) { 4483 switch (LI->getOpcode()) { 4484 default: 4485 return false; 4486 case AArch64::LDURBBi: 4487 case AArch64::LDURHHi: 4488 case AArch64::LDURWi: 4489 case AArch64::LDRBBui: 4490 case AArch64::LDRHHui: 4491 case AArch64::LDRWui: 4492 case AArch64::LDRBBroX: 4493 case AArch64::LDRHHroX: 4494 case AArch64::LDRWroX: 4495 case AArch64::LDRBBroW: 4496 case AArch64::LDRHHroW: 4497 case AArch64::LDRWroW: 4498 return true; 4499 } 4500 } 4501 4502 static bool isSExtLoad(const MachineInstr *LI) { 4503 switch (LI->getOpcode()) { 4504 default: 4505 return false; 4506 case AArch64::LDURSBWi: 4507 case AArch64::LDURSHWi: 4508 case AArch64::LDURSBXi: 4509 case AArch64::LDURSHXi: 4510 case AArch64::LDURSWi: 4511 case AArch64::LDRSBWui: 4512 case AArch64::LDRSHWui: 4513 case AArch64::LDRSBXui: 4514 case AArch64::LDRSHXui: 4515 case AArch64::LDRSWui: 4516 case AArch64::LDRSBWroX: 4517 case AArch64::LDRSHWroX: 4518 case AArch64::LDRSBXroX: 4519 case AArch64::LDRSHXroX: 4520 case AArch64::LDRSWroX: 4521 case AArch64::LDRSBWroW: 4522 case AArch64::LDRSHWroW: 4523 case AArch64::LDRSBXroW: 4524 case AArch64::LDRSHXroW: 4525 case AArch64::LDRSWroW: 4526 return true; 4527 } 4528 } 4529 4530 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4531 MVT SrcVT) { 4532 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4533 if (!LI || !LI->hasOneUse()) 4534 return false; 4535 4536 // Check if the load instruction has already been selected. 4537 unsigned Reg = lookUpRegForValue(LI); 4538 if (!Reg) 4539 return false; 4540 4541 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4542 if (!MI) 4543 return false; 4544 4545 // Check if the correct load instruction has been emitted - SelectionDAG might 4546 // have emitted a zero-extending load, but we need a sign-extending load. 4547 bool IsZExt = isa<ZExtInst>(I); 4548 const auto *LoadMI = MI; 4549 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4550 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4551 Register LoadReg = MI->getOperand(1).getReg(); 4552 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4553 assert(LoadMI && "Expected valid instruction"); 4554 } 4555 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4556 return false; 4557 4558 // Nothing to be done. 4559 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4560 updateValueMap(I, Reg); 4561 return true; 4562 } 4563 4564 if (IsZExt) { 4565 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 4566 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4567 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4568 .addImm(0) 4569 .addReg(Reg, getKillRegState(true)) 4570 .addImm(AArch64::sub_32); 4571 Reg = Reg64; 4572 } else { 4573 assert((MI->getOpcode() == TargetOpcode::COPY && 4574 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4575 "Expected copy instruction"); 4576 Reg = MI->getOperand(1).getReg(); 4577 MachineBasicBlock::iterator I(MI); 4578 removeDeadCode(I, std::next(I)); 4579 } 4580 updateValueMap(I, Reg); 4581 return true; 4582 } 4583 4584 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4585 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4586 "Unexpected integer extend instruction."); 4587 MVT RetVT; 4588 MVT SrcVT; 4589 if (!isTypeSupported(I->getType(), RetVT)) 4590 return false; 4591 4592 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4593 return false; 4594 4595 // Try to optimize already sign-/zero-extended values from load instructions. 4596 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4597 return true; 4598 4599 unsigned SrcReg = getRegForValue(I->getOperand(0)); 4600 if (!SrcReg) 4601 return false; 4602 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 4603 4604 // Try to optimize already sign-/zero-extended values from function arguments. 4605 bool IsZExt = isa<ZExtInst>(I); 4606 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4607 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4608 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4609 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); 4610 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4611 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4612 .addImm(0) 4613 .addReg(SrcReg, getKillRegState(SrcIsKill)) 4614 .addImm(AArch64::sub_32); 4615 SrcReg = ResultReg; 4616 } 4617 // Conservatively clear all kill flags from all uses, because we are 4618 // replacing a sign-/zero-extend instruction at IR level with a nop at MI 4619 // level. The result of the instruction at IR level might have been 4620 // trivially dead, which is now not longer true. 4621 unsigned UseReg = lookUpRegForValue(I); 4622 if (UseReg) 4623 MRI.clearKillFlags(UseReg); 4624 4625 updateValueMap(I, SrcReg); 4626 return true; 4627 } 4628 } 4629 4630 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4631 if (!ResultReg) 4632 return false; 4633 4634 updateValueMap(I, ResultReg); 4635 return true; 4636 } 4637 4638 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4639 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4640 if (!DestEVT.isSimple()) 4641 return false; 4642 4643 MVT DestVT = DestEVT.getSimpleVT(); 4644 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4645 return false; 4646 4647 unsigned DivOpc; 4648 bool Is64bit = (DestVT == MVT::i64); 4649 switch (ISDOpcode) { 4650 default: 4651 return false; 4652 case ISD::SREM: 4653 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4654 break; 4655 case ISD::UREM: 4656 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4657 break; 4658 } 4659 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4660 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4661 if (!Src0Reg) 4662 return false; 4663 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4664 4665 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4666 if (!Src1Reg) 4667 return false; 4668 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4669 4670 const TargetRegisterClass *RC = 4671 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4672 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, 4673 Src1Reg, /*IsKill=*/false); 4674 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4675 // The remainder is computed as numerator - (quotient * denominator) using the 4676 // MSUB instruction. 4677 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, 4678 Src1Reg, Src1IsKill, Src0Reg, 4679 Src0IsKill); 4680 updateValueMap(I, ResultReg); 4681 return true; 4682 } 4683 4684 bool AArch64FastISel::selectMul(const Instruction *I) { 4685 MVT VT; 4686 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4687 return false; 4688 4689 if (VT.isVector()) 4690 return selectBinaryOp(I, ISD::MUL); 4691 4692 const Value *Src0 = I->getOperand(0); 4693 const Value *Src1 = I->getOperand(1); 4694 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4695 if (C->getValue().isPowerOf2()) 4696 std::swap(Src0, Src1); 4697 4698 // Try to simplify to a shift instruction. 4699 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4700 if (C->getValue().isPowerOf2()) { 4701 uint64_t ShiftVal = C->getValue().logBase2(); 4702 MVT SrcVT = VT; 4703 bool IsZExt = true; 4704 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4705 if (!isIntExtFree(ZExt)) { 4706 MVT VT; 4707 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4708 SrcVT = VT; 4709 IsZExt = true; 4710 Src0 = ZExt->getOperand(0); 4711 } 4712 } 4713 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4714 if (!isIntExtFree(SExt)) { 4715 MVT VT; 4716 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4717 SrcVT = VT; 4718 IsZExt = false; 4719 Src0 = SExt->getOperand(0); 4720 } 4721 } 4722 } 4723 4724 unsigned Src0Reg = getRegForValue(Src0); 4725 if (!Src0Reg) 4726 return false; 4727 bool Src0IsKill = hasTrivialKill(Src0); 4728 4729 unsigned ResultReg = 4730 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); 4731 4732 if (ResultReg) { 4733 updateValueMap(I, ResultReg); 4734 return true; 4735 } 4736 } 4737 4738 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4739 if (!Src0Reg) 4740 return false; 4741 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4742 4743 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4744 if (!Src1Reg) 4745 return false; 4746 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4747 4748 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); 4749 4750 if (!ResultReg) 4751 return false; 4752 4753 updateValueMap(I, ResultReg); 4754 return true; 4755 } 4756 4757 bool AArch64FastISel::selectShift(const Instruction *I) { 4758 MVT RetVT; 4759 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4760 return false; 4761 4762 if (RetVT.isVector()) 4763 return selectOperator(I, I->getOpcode()); 4764 4765 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4766 unsigned ResultReg = 0; 4767 uint64_t ShiftVal = C->getZExtValue(); 4768 MVT SrcVT = RetVT; 4769 bool IsZExt = I->getOpcode() != Instruction::AShr; 4770 const Value *Op0 = I->getOperand(0); 4771 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4772 if (!isIntExtFree(ZExt)) { 4773 MVT TmpVT; 4774 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4775 SrcVT = TmpVT; 4776 IsZExt = true; 4777 Op0 = ZExt->getOperand(0); 4778 } 4779 } 4780 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4781 if (!isIntExtFree(SExt)) { 4782 MVT TmpVT; 4783 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4784 SrcVT = TmpVT; 4785 IsZExt = false; 4786 Op0 = SExt->getOperand(0); 4787 } 4788 } 4789 } 4790 4791 unsigned Op0Reg = getRegForValue(Op0); 4792 if (!Op0Reg) 4793 return false; 4794 bool Op0IsKill = hasTrivialKill(Op0); 4795 4796 switch (I->getOpcode()) { 4797 default: llvm_unreachable("Unexpected instruction."); 4798 case Instruction::Shl: 4799 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4800 break; 4801 case Instruction::AShr: 4802 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4803 break; 4804 case Instruction::LShr: 4805 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4806 break; 4807 } 4808 if (!ResultReg) 4809 return false; 4810 4811 updateValueMap(I, ResultReg); 4812 return true; 4813 } 4814 4815 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4816 if (!Op0Reg) 4817 return false; 4818 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4819 4820 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 4821 if (!Op1Reg) 4822 return false; 4823 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 4824 4825 unsigned ResultReg = 0; 4826 switch (I->getOpcode()) { 4827 default: llvm_unreachable("Unexpected instruction."); 4828 case Instruction::Shl: 4829 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4830 break; 4831 case Instruction::AShr: 4832 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4833 break; 4834 case Instruction::LShr: 4835 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4836 break; 4837 } 4838 4839 if (!ResultReg) 4840 return false; 4841 4842 updateValueMap(I, ResultReg); 4843 return true; 4844 } 4845 4846 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4847 MVT RetVT, SrcVT; 4848 4849 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4850 return false; 4851 if (!isTypeLegal(I->getType(), RetVT)) 4852 return false; 4853 4854 unsigned Opc; 4855 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4856 Opc = AArch64::FMOVWSr; 4857 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4858 Opc = AArch64::FMOVXDr; 4859 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4860 Opc = AArch64::FMOVSWr; 4861 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4862 Opc = AArch64::FMOVDXr; 4863 else 4864 return false; 4865 4866 const TargetRegisterClass *RC = nullptr; 4867 switch (RetVT.SimpleTy) { 4868 default: llvm_unreachable("Unexpected value type."); 4869 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4870 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4871 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4872 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4873 } 4874 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4875 if (!Op0Reg) 4876 return false; 4877 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4878 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); 4879 4880 if (!ResultReg) 4881 return false; 4882 4883 updateValueMap(I, ResultReg); 4884 return true; 4885 } 4886 4887 bool AArch64FastISel::selectFRem(const Instruction *I) { 4888 MVT RetVT; 4889 if (!isTypeLegal(I->getType(), RetVT)) 4890 return false; 4891 4892 RTLIB::Libcall LC; 4893 switch (RetVT.SimpleTy) { 4894 default: 4895 return false; 4896 case MVT::f32: 4897 LC = RTLIB::REM_F32; 4898 break; 4899 case MVT::f64: 4900 LC = RTLIB::REM_F64; 4901 break; 4902 } 4903 4904 ArgListTy Args; 4905 Args.reserve(I->getNumOperands()); 4906 4907 // Populate the argument list. 4908 for (auto &Arg : I->operands()) { 4909 ArgListEntry Entry; 4910 Entry.Val = Arg; 4911 Entry.Ty = Arg->getType(); 4912 Args.push_back(Entry); 4913 } 4914 4915 CallLoweringInfo CLI; 4916 MCContext &Ctx = MF->getContext(); 4917 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4918 TLI.getLibcallName(LC), std::move(Args)); 4919 if (!lowerCallTo(CLI)) 4920 return false; 4921 updateValueMap(I, CLI.ResultReg); 4922 return true; 4923 } 4924 4925 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4926 MVT VT; 4927 if (!isTypeLegal(I->getType(), VT)) 4928 return false; 4929 4930 if (!isa<ConstantInt>(I->getOperand(1))) 4931 return selectBinaryOp(I, ISD::SDIV); 4932 4933 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4934 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4935 !(C.isPowerOf2() || (-C).isPowerOf2())) 4936 return selectBinaryOp(I, ISD::SDIV); 4937 4938 unsigned Lg2 = C.countTrailingZeros(); 4939 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4940 if (!Src0Reg) 4941 return false; 4942 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4943 4944 if (cast<BinaryOperator>(I)->isExact()) { 4945 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); 4946 if (!ResultReg) 4947 return false; 4948 updateValueMap(I, ResultReg); 4949 return true; 4950 } 4951 4952 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4953 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); 4954 if (!AddReg) 4955 return false; 4956 4957 // (Src0 < 0) ? Pow2 - 1 : 0; 4958 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) 4959 return false; 4960 4961 unsigned SelectOpc; 4962 const TargetRegisterClass *RC; 4963 if (VT == MVT::i64) { 4964 SelectOpc = AArch64::CSELXr; 4965 RC = &AArch64::GPR64RegClass; 4966 } else { 4967 SelectOpc = AArch64::CSELWr; 4968 RC = &AArch64::GPR32RegClass; 4969 } 4970 unsigned SelectReg = 4971 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, 4972 Src0IsKill, AArch64CC::LT); 4973 if (!SelectReg) 4974 return false; 4975 4976 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4977 // negate the result. 4978 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4979 unsigned ResultReg; 4980 if (C.isNegative()) 4981 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, 4982 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); 4983 else 4984 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); 4985 4986 if (!ResultReg) 4987 return false; 4988 4989 updateValueMap(I, ResultReg); 4990 return true; 4991 } 4992 4993 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4994 /// have to duplicate it for AArch64, because otherwise we would fail during the 4995 /// sign-extend emission. 4996 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4997 unsigned IdxN = getRegForValue(Idx); 4998 if (IdxN == 0) 4999 // Unhandled operand. Halt "fast" selection and bail. 5000 return std::pair<unsigned, bool>(0, false); 5001 5002 bool IdxNIsKill = hasTrivialKill(Idx); 5003 5004 // If the index is smaller or larger than intptr_t, truncate or extend it. 5005 MVT PtrVT = TLI.getPointerTy(DL); 5006 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 5007 if (IdxVT.bitsLT(PtrVT)) { 5008 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 5009 IdxNIsKill = true; 5010 } else if (IdxVT.bitsGT(PtrVT)) 5011 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 5012 return std::pair<unsigned, bool>(IdxN, IdxNIsKill); 5013 } 5014 5015 /// This is mostly a copy of the existing FastISel GEP code, but we have to 5016 /// duplicate it for AArch64, because otherwise we would bail out even for 5017 /// simple cases. This is because the standard fastEmit functions don't cover 5018 /// MUL at all and ADD is lowered very inefficientily. 5019 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 5020 if (Subtarget->isTargetILP32()) 5021 return false; 5022 5023 unsigned N = getRegForValue(I->getOperand(0)); 5024 if (!N) 5025 return false; 5026 bool NIsKill = hasTrivialKill(I->getOperand(0)); 5027 5028 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 5029 // into a single N = N + TotalOffset. 5030 uint64_t TotalOffs = 0; 5031 MVT VT = TLI.getPointerTy(DL); 5032 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 5033 GTI != E; ++GTI) { 5034 const Value *Idx = GTI.getOperand(); 5035 if (auto *StTy = GTI.getStructTypeOrNull()) { 5036 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 5037 // N = N + Offset 5038 if (Field) 5039 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 5040 } else { 5041 Type *Ty = GTI.getIndexedType(); 5042 5043 // If this is a constant subscript, handle it quickly. 5044 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 5045 if (CI->isZero()) 5046 continue; 5047 // N = N + Offset 5048 TotalOffs += 5049 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 5050 continue; 5051 } 5052 if (TotalOffs) { 5053 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 5054 if (!N) 5055 return false; 5056 NIsKill = true; 5057 TotalOffs = 0; 5058 } 5059 5060 // N = N + Idx * ElementSize; 5061 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 5062 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); 5063 unsigned IdxN = Pair.first; 5064 bool IdxNIsKill = Pair.second; 5065 if (!IdxN) 5066 return false; 5067 5068 if (ElementSize != 1) { 5069 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 5070 if (!C) 5071 return false; 5072 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); 5073 if (!IdxN) 5074 return false; 5075 IdxNIsKill = true; 5076 } 5077 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); 5078 if (!N) 5079 return false; 5080 } 5081 } 5082 if (TotalOffs) { 5083 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 5084 if (!N) 5085 return false; 5086 } 5087 updateValueMap(I, N); 5088 return true; 5089 } 5090 5091 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 5092 assert(TM.getOptLevel() == CodeGenOpt::None && 5093 "cmpxchg survived AtomicExpand at optlevel > -O0"); 5094 5095 auto *RetPairTy = cast<StructType>(I->getType()); 5096 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5097 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5098 "cmpxchg has a non-i1 status result"); 5099 5100 MVT VT; 5101 if (!isTypeLegal(RetTy, VT)) 5102 return false; 5103 5104 const TargetRegisterClass *ResRC; 5105 unsigned Opc, CmpOpc; 5106 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5107 // extractvalue selection doesn't support that. 5108 if (VT == MVT::i32) { 5109 Opc = AArch64::CMP_SWAP_32; 5110 CmpOpc = AArch64::SUBSWrs; 5111 ResRC = &AArch64::GPR32RegClass; 5112 } else if (VT == MVT::i64) { 5113 Opc = AArch64::CMP_SWAP_64; 5114 CmpOpc = AArch64::SUBSXrs; 5115 ResRC = &AArch64::GPR64RegClass; 5116 } else { 5117 return false; 5118 } 5119 5120 const MCInstrDesc &II = TII.get(Opc); 5121 5122 const unsigned AddrReg = constrainOperandRegClass( 5123 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 5124 const unsigned DesiredReg = constrainOperandRegClass( 5125 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 5126 const unsigned NewReg = constrainOperandRegClass( 5127 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5128 5129 const unsigned ResultReg1 = createResultReg(ResRC); 5130 const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5131 const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5132 5133 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5134 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 5135 .addDef(ResultReg1) 5136 .addDef(ScratchReg) 5137 .addUse(AddrReg) 5138 .addUse(DesiredReg) 5139 .addUse(NewReg); 5140 5141 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 5142 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5143 .addUse(ResultReg1) 5144 .addUse(DesiredReg) 5145 .addImm(0); 5146 5147 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) 5148 .addDef(ResultReg2) 5149 .addUse(AArch64::WZR) 5150 .addUse(AArch64::WZR) 5151 .addImm(AArch64CC::NE); 5152 5153 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5154 updateValueMap(I, ResultReg1, 2); 5155 return true; 5156 } 5157 5158 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5159 switch (I->getOpcode()) { 5160 default: 5161 break; 5162 case Instruction::Add: 5163 case Instruction::Sub: 5164 return selectAddSub(I); 5165 case Instruction::Mul: 5166 return selectMul(I); 5167 case Instruction::SDiv: 5168 return selectSDiv(I); 5169 case Instruction::SRem: 5170 if (!selectBinaryOp(I, ISD::SREM)) 5171 return selectRem(I, ISD::SREM); 5172 return true; 5173 case Instruction::URem: 5174 if (!selectBinaryOp(I, ISD::UREM)) 5175 return selectRem(I, ISD::UREM); 5176 return true; 5177 case Instruction::Shl: 5178 case Instruction::LShr: 5179 case Instruction::AShr: 5180 return selectShift(I); 5181 case Instruction::And: 5182 case Instruction::Or: 5183 case Instruction::Xor: 5184 return selectLogicalOp(I); 5185 case Instruction::Br: 5186 return selectBranch(I); 5187 case Instruction::IndirectBr: 5188 return selectIndirectBr(I); 5189 case Instruction::BitCast: 5190 if (!FastISel::selectBitCast(I)) 5191 return selectBitCast(I); 5192 return true; 5193 case Instruction::FPToSI: 5194 if (!selectCast(I, ISD::FP_TO_SINT)) 5195 return selectFPToInt(I, /*Signed=*/true); 5196 return true; 5197 case Instruction::FPToUI: 5198 return selectFPToInt(I, /*Signed=*/false); 5199 case Instruction::ZExt: 5200 case Instruction::SExt: 5201 return selectIntExt(I); 5202 case Instruction::Trunc: 5203 if (!selectCast(I, ISD::TRUNCATE)) 5204 return selectTrunc(I); 5205 return true; 5206 case Instruction::FPExt: 5207 return selectFPExt(I); 5208 case Instruction::FPTrunc: 5209 return selectFPTrunc(I); 5210 case Instruction::SIToFP: 5211 if (!selectCast(I, ISD::SINT_TO_FP)) 5212 return selectIntToFP(I, /*Signed=*/true); 5213 return true; 5214 case Instruction::UIToFP: 5215 return selectIntToFP(I, /*Signed=*/false); 5216 case Instruction::Load: 5217 return selectLoad(I); 5218 case Instruction::Store: 5219 return selectStore(I); 5220 case Instruction::FCmp: 5221 case Instruction::ICmp: 5222 return selectCmp(I); 5223 case Instruction::Select: 5224 return selectSelect(I); 5225 case Instruction::Ret: 5226 return selectRet(I); 5227 case Instruction::FRem: 5228 return selectFRem(I); 5229 case Instruction::GetElementPtr: 5230 return selectGetElementPtr(I); 5231 case Instruction::AtomicCmpXchg: 5232 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5233 } 5234 5235 // fall-back to target-independent instruction selection. 5236 return selectOperator(I, I->getOpcode()); 5237 } 5238 5239 namespace llvm { 5240 5241 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5242 const TargetLibraryInfo *LibInfo) { 5243 return new AArch64FastISel(FuncInfo, LibInfo); 5244 } 5245 5246 } // end namespace llvm 5247