1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/APFloat.h" 23 #include "llvm/ADT/APInt.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/Analysis/BranchProbabilityInfo.h" 27 #include "llvm/CodeGen/CallingConvLower.h" 28 #include "llvm/CodeGen/FastISel.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineConstantPool.h" 33 #include "llvm/CodeGen/MachineFrameInfo.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineMemOperand.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/MachineValueType.h" 39 #include "llvm/CodeGen/RuntimeLibcalls.h" 40 #include "llvm/CodeGen/ValueTypes.h" 41 #include "llvm/IR/Argument.h" 42 #include "llvm/IR/Attributes.h" 43 #include "llvm/IR/BasicBlock.h" 44 #include "llvm/IR/CallingConv.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/Constants.h" 47 #include "llvm/IR/DataLayout.h" 48 #include "llvm/IR/DerivedTypes.h" 49 #include "llvm/IR/Function.h" 50 #include "llvm/IR/GetElementPtrTypeIterator.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/InstrTypes.h" 53 #include "llvm/IR/Instruction.h" 54 #include "llvm/IR/Instructions.h" 55 #include "llvm/IR/IntrinsicInst.h" 56 #include "llvm/IR/Intrinsics.h" 57 #include "llvm/IR/IntrinsicsAArch64.h" 58 #include "llvm/IR/Operator.h" 59 #include "llvm/IR/Type.h" 60 #include "llvm/IR/User.h" 61 #include "llvm/IR/Value.h" 62 #include "llvm/MC/MCInstrDesc.h" 63 #include "llvm/MC/MCRegisterInfo.h" 64 #include "llvm/MC/MCSymbol.h" 65 #include "llvm/Support/AtomicOrdering.h" 66 #include "llvm/Support/Casting.h" 67 #include "llvm/Support/CodeGen.h" 68 #include "llvm/Support/Compiler.h" 69 #include "llvm/Support/ErrorHandling.h" 70 #include "llvm/Support/MathExtras.h" 71 #include <algorithm> 72 #include <cassert> 73 #include <cstdint> 74 #include <iterator> 75 #include <utility> 76 77 using namespace llvm; 78 79 namespace { 80 81 class AArch64FastISel final : public FastISel { 82 class Address { 83 public: 84 using BaseKind = enum { 85 RegBase, 86 FrameIndexBase 87 }; 88 89 private: 90 BaseKind Kind = RegBase; 91 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 92 union { 93 unsigned Reg; 94 int FI; 95 } Base; 96 unsigned OffsetReg = 0; 97 unsigned Shift = 0; 98 int64_t Offset = 0; 99 const GlobalValue *GV = nullptr; 100 101 public: 102 Address() { Base.Reg = 0; } 103 104 void setKind(BaseKind K) { Kind = K; } 105 BaseKind getKind() const { return Kind; } 106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 108 bool isRegBase() const { return Kind == RegBase; } 109 bool isFIBase() const { return Kind == FrameIndexBase; } 110 111 void setReg(unsigned Reg) { 112 assert(isRegBase() && "Invalid base register access!"); 113 Base.Reg = Reg; 114 } 115 116 unsigned getReg() const { 117 assert(isRegBase() && "Invalid base register access!"); 118 return Base.Reg; 119 } 120 121 void setOffsetReg(unsigned Reg) { 122 OffsetReg = Reg; 123 } 124 125 unsigned getOffsetReg() const { 126 return OffsetReg; 127 } 128 129 void setFI(unsigned FI) { 130 assert(isFIBase() && "Invalid base frame index access!"); 131 Base.FI = FI; 132 } 133 134 unsigned getFI() const { 135 assert(isFIBase() && "Invalid base frame index access!"); 136 return Base.FI; 137 } 138 139 void setOffset(int64_t O) { Offset = O; } 140 int64_t getOffset() { return Offset; } 141 void setShift(unsigned S) { Shift = S; } 142 unsigned getShift() { return Shift; } 143 144 void setGlobalValue(const GlobalValue *G) { GV = G; } 145 const GlobalValue *getGlobalValue() { return GV; } 146 }; 147 148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 149 /// make the right decision when generating code for different targets. 150 const AArch64Subtarget *Subtarget; 151 LLVMContext *Context; 152 153 bool fastLowerArguments() override; 154 bool fastLowerCall(CallLoweringInfo &CLI) override; 155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 156 157 private: 158 // Selection routines. 159 bool selectAddSub(const Instruction *I); 160 bool selectLogicalOp(const Instruction *I); 161 bool selectLoad(const Instruction *I); 162 bool selectStore(const Instruction *I); 163 bool selectBranch(const Instruction *I); 164 bool selectIndirectBr(const Instruction *I); 165 bool selectCmp(const Instruction *I); 166 bool selectSelect(const Instruction *I); 167 bool selectFPExt(const Instruction *I); 168 bool selectFPTrunc(const Instruction *I); 169 bool selectFPToInt(const Instruction *I, bool Signed); 170 bool selectIntToFP(const Instruction *I, bool Signed); 171 bool selectRem(const Instruction *I, unsigned ISDOpcode); 172 bool selectRet(const Instruction *I); 173 bool selectTrunc(const Instruction *I); 174 bool selectIntExt(const Instruction *I); 175 bool selectMul(const Instruction *I); 176 bool selectShift(const Instruction *I); 177 bool selectBitCast(const Instruction *I); 178 bool selectFRem(const Instruction *I); 179 bool selectSDiv(const Instruction *I); 180 bool selectGetElementPtr(const Instruction *I); 181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 182 183 // Utility helper routines. 184 bool isTypeLegal(Type *Ty, MVT &VT); 185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 186 bool isValueAvailable(const Value *V) const; 187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 188 bool computeCallAddress(const Value *V, Address &Addr); 189 bool simplifyAddress(Address &Addr, MVT VT); 190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 191 MachineMemOperand::Flags Flags, 192 unsigned ScaleFactor, MachineMemOperand *MMO); 193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); 194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 195 MaybeAlign Alignment); 196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 197 const Value *Cond); 198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 199 bool optimizeSelect(const SelectInst *SI); 200 unsigned getRegForGEPIndex(const Value *Idx); 201 202 // Emit helper routines. 203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 204 const Value *RHS, bool SetFlags = false, 205 bool WantResult = true, bool IsZExt = false); 206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 207 unsigned RHSReg, bool SetFlags = false, 208 bool WantResult = true); 209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 210 uint64_t Imm, bool SetFlags = false, 211 bool WantResult = true); 212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, 214 uint64_t ShiftImm, bool SetFlags = false, 215 bool WantResult = true); 216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, 218 uint64_t ShiftImm, bool SetFlags = false, 219 bool WantResult = true); 220 221 // Emit functions. 222 bool emitCompareAndBranch(const BranchInst *BI); 223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 228 MachineMemOperand *MMO = nullptr); 229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 230 MachineMemOperand *MMO = nullptr); 231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 232 MachineMemOperand *MMO = nullptr); 233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 236 bool SetFlags = false, bool WantResult = true, 237 bool IsZExt = false); 238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); 239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 240 bool SetFlags = false, bool WantResult = true, 241 bool IsZExt = false); 242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 243 bool WantResult = true); 244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 246 bool WantResult = true); 247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 248 const Value *RHS); 249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 250 uint64_t Imm); 251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 252 unsigned RHSReg, uint64_t ShiftImm); 253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); 255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 259 bool IsZExt = true); 260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 262 bool IsZExt = true); 263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 265 bool IsZExt = false); 266 267 unsigned materializeInt(const ConstantInt *CI, MVT VT); 268 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 269 unsigned materializeGV(const GlobalValue *GV); 270 271 // Call handling routines. 272 private: 273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 275 unsigned &NumBytes); 276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes); 277 278 public: 279 // Backend specific FastISel code. 280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 281 unsigned fastMaterializeConstant(const Constant *C) override; 282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 283 284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 285 const TargetLibraryInfo *LibInfo) 286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); 288 Context = &FuncInfo.Fn->getContext(); 289 } 290 291 bool fastSelectInstruction(const Instruction *I) override; 292 293 #include "AArch64GenFastISel.inc" 294 }; 295 296 } // end anonymous namespace 297 298 /// Check if the sign-/zero-extend will be a noop. 299 static bool isIntExtFree(const Instruction *I) { 300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 301 "Unexpected integer extend instruction."); 302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 303 "Unexpected value type."); 304 bool IsZExt = isa<ZExtInst>(I); 305 306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 307 if (LI->hasOneUse()) 308 return true; 309 310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 312 return true; 313 314 return false; 315 } 316 317 /// Determine the implicit scale factor that is applied by a memory 318 /// operation for a given value type. 319 static unsigned getImplicitScaleFactor(MVT VT) { 320 switch (VT.SimpleTy) { 321 default: 322 return 0; // invalid 323 case MVT::i1: // fall-through 324 case MVT::i8: 325 return 1; 326 case MVT::i16: 327 return 2; 328 case MVT::i32: // fall-through 329 case MVT::f32: 330 return 4; 331 case MVT::i64: // fall-through 332 case MVT::f64: 333 return 8; 334 } 335 } 336 337 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 338 if (CC == CallingConv::WebKit_JS) 339 return CC_AArch64_WebKit_JS; 340 if (CC == CallingConv::GHC) 341 return CC_AArch64_GHC; 342 if (CC == CallingConv::CFGuard_Check) 343 return CC_AArch64_Win64_CFGuard_Check; 344 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 345 } 346 347 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 348 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 349 "Alloca should always return a pointer."); 350 351 // Don't handle dynamic allocas. 352 if (!FuncInfo.StaticAllocaMap.count(AI)) 353 return 0; 354 355 DenseMap<const AllocaInst *, int>::iterator SI = 356 FuncInfo.StaticAllocaMap.find(AI); 357 358 if (SI != FuncInfo.StaticAllocaMap.end()) { 359 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 360 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 361 ResultReg) 362 .addFrameIndex(SI->second) 363 .addImm(0) 364 .addImm(0); 365 return ResultReg; 366 } 367 368 return 0; 369 } 370 371 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 372 if (VT > MVT::i64) 373 return 0; 374 375 if (!CI->isZero()) 376 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 377 378 // Create a copy from the zero register to materialize a "0" value. 379 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 380 : &AArch64::GPR32RegClass; 381 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 382 Register ResultReg = createResultReg(RC); 383 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 384 ResultReg).addReg(ZeroReg, getKillRegState(true)); 385 return ResultReg; 386 } 387 388 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 389 // Positive zero (+0.0) has to be materialized with a fmov from the zero 390 // register, because the immediate version of fmov cannot encode zero. 391 if (CFP->isNullValue()) 392 return fastMaterializeFloatZero(CFP); 393 394 if (VT != MVT::f32 && VT != MVT::f64) 395 return 0; 396 397 const APFloat Val = CFP->getValueAPF(); 398 bool Is64Bit = (VT == MVT::f64); 399 // This checks to see if we can use FMOV instructions to materialize 400 // a constant, otherwise we have to materialize via the constant pool. 401 int Imm = 402 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 403 if (Imm != -1) { 404 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 405 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 406 } 407 408 // For the large code model materialize the FP constant in code. 409 if (TM.getCodeModel() == CodeModel::Large) { 410 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 411 const TargetRegisterClass *RC = Is64Bit ? 412 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 413 414 Register TmpReg = createResultReg(RC); 415 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg) 416 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 417 418 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 419 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 420 TII.get(TargetOpcode::COPY), ResultReg) 421 .addReg(TmpReg, getKillRegState(true)); 422 423 return ResultReg; 424 } 425 426 // Materialize via constant pool. MachineConstantPool wants an explicit 427 // alignment. 428 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 429 430 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 431 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 432 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 433 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 434 435 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 436 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 437 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 438 .addReg(ADRPReg) 439 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 440 return ResultReg; 441 } 442 443 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 444 // We can't handle thread-local variables quickly yet. 445 if (GV->isThreadLocal()) 446 return 0; 447 448 // MachO still uses GOT for large code-model accesses, but ELF requires 449 // movz/movk sequences, which FastISel doesn't handle yet. 450 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 451 return 0; 452 453 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 454 455 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 456 if (!DestEVT.isSimple()) 457 return 0; 458 459 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 460 unsigned ResultReg; 461 462 if (OpFlags & AArch64II::MO_GOT) { 463 // ADRP + LDRX 464 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 465 ADRPReg) 466 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 467 468 unsigned LdrOpc; 469 if (Subtarget->isTargetILP32()) { 470 ResultReg = createResultReg(&AArch64::GPR32RegClass); 471 LdrOpc = AArch64::LDRWui; 472 } else { 473 ResultReg = createResultReg(&AArch64::GPR64RegClass); 474 LdrOpc = AArch64::LDRXui; 475 } 476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc), 477 ResultReg) 478 .addReg(ADRPReg) 479 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 480 AArch64II::MO_NC | OpFlags); 481 if (!Subtarget->isTargetILP32()) 482 return ResultReg; 483 484 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 485 // so we must extend the result on ILP32. 486 Register Result64 = createResultReg(&AArch64::GPR64RegClass); 487 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 488 TII.get(TargetOpcode::SUBREG_TO_REG)) 489 .addDef(Result64) 490 .addImm(0) 491 .addReg(ResultReg, RegState::Kill) 492 .addImm(AArch64::sub_32); 493 return Result64; 494 } else { 495 // ADRP + ADDX 496 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 497 ADRPReg) 498 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 499 500 if (OpFlags & AArch64II::MO_TAGGED) { 501 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 502 // We do so by creating a MOVK that sets bits 48-63 of the register to 503 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 504 // the small code model so we can assume a binary size of <= 4GB, which 505 // makes the untagged PC relative offset positive. The binary must also be 506 // loaded into address range [0, 2^48). Both of these properties need to 507 // be ensured at runtime when using tagged addresses. 508 // 509 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that 510 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands 511 // are not exactly 1:1 with FastISel so we cannot easily abstract this 512 // out. At some point, it would be nice to find a way to not have this 513 // duplciate code. 514 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass); 515 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi), 516 DstReg) 517 .addReg(ADRPReg) 518 .addGlobalAddress(GV, /*Offset=*/0x100000000, 519 AArch64II::MO_PREL | AArch64II::MO_G3) 520 .addImm(48); 521 ADRPReg = DstReg; 522 } 523 524 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 525 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 526 ResultReg) 527 .addReg(ADRPReg) 528 .addGlobalAddress(GV, 0, 529 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 530 .addImm(0); 531 } 532 return ResultReg; 533 } 534 535 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 536 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 537 538 // Only handle simple types. 539 if (!CEVT.isSimple()) 540 return 0; 541 MVT VT = CEVT.getSimpleVT(); 542 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 543 // 'null' pointers need to have a somewhat special treatment. 544 if (isa<ConstantPointerNull>(C)) { 545 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 546 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 547 } 548 549 if (const auto *CI = dyn_cast<ConstantInt>(C)) 550 return materializeInt(CI, VT); 551 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 552 return materializeFP(CFP, VT); 553 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 554 return materializeGV(GV); 555 556 return 0; 557 } 558 559 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 560 assert(CFP->isNullValue() && 561 "Floating-point constant is not a positive zero."); 562 MVT VT; 563 if (!isTypeLegal(CFP->getType(), VT)) 564 return 0; 565 566 if (VT != MVT::f32 && VT != MVT::f64) 567 return 0; 568 569 bool Is64Bit = (VT == MVT::f64); 570 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 571 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 572 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg); 573 } 574 575 /// Check if the multiply is by a power-of-2 constant. 576 static bool isMulPowOf2(const Value *I) { 577 if (const auto *MI = dyn_cast<MulOperator>(I)) { 578 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 579 if (C->getValue().isPowerOf2()) 580 return true; 581 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 582 if (C->getValue().isPowerOf2()) 583 return true; 584 } 585 return false; 586 } 587 588 // Computes the address to get to an object. 589 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 590 { 591 const User *U = nullptr; 592 unsigned Opcode = Instruction::UserOp1; 593 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 594 // Don't walk into other basic blocks unless the object is an alloca from 595 // another block, otherwise it may not have a virtual register assigned. 596 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 597 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 598 Opcode = I->getOpcode(); 599 U = I; 600 } 601 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 602 Opcode = C->getOpcode(); 603 U = C; 604 } 605 606 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 607 if (Ty->getAddressSpace() > 255) 608 // Fast instruction selection doesn't support the special 609 // address spaces. 610 return false; 611 612 switch (Opcode) { 613 default: 614 break; 615 case Instruction::BitCast: 616 // Look through bitcasts. 617 return computeAddress(U->getOperand(0), Addr, Ty); 618 619 case Instruction::IntToPtr: 620 // Look past no-op inttoptrs. 621 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 622 TLI.getPointerTy(DL)) 623 return computeAddress(U->getOperand(0), Addr, Ty); 624 break; 625 626 case Instruction::PtrToInt: 627 // Look past no-op ptrtoints. 628 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 629 return computeAddress(U->getOperand(0), Addr, Ty); 630 break; 631 632 case Instruction::GetElementPtr: { 633 Address SavedAddr = Addr; 634 uint64_t TmpOffset = Addr.getOffset(); 635 636 // Iterate through the GEP folding the constants into offsets where 637 // we can. 638 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 639 GTI != E; ++GTI) { 640 const Value *Op = GTI.getOperand(); 641 if (StructType *STy = GTI.getStructTypeOrNull()) { 642 const StructLayout *SL = DL.getStructLayout(STy); 643 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 644 TmpOffset += SL->getElementOffset(Idx); 645 } else { 646 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 647 while (true) { 648 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 649 // Constant-offset addressing. 650 TmpOffset += CI->getSExtValue() * S; 651 break; 652 } 653 if (canFoldAddIntoGEP(U, Op)) { 654 // A compatible add with a constant operand. Fold the constant. 655 ConstantInt *CI = 656 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 657 TmpOffset += CI->getSExtValue() * S; 658 // Iterate on the other operand. 659 Op = cast<AddOperator>(Op)->getOperand(0); 660 continue; 661 } 662 // Unsupported 663 goto unsupported_gep; 664 } 665 } 666 } 667 668 // Try to grab the base operand now. 669 Addr.setOffset(TmpOffset); 670 if (computeAddress(U->getOperand(0), Addr, Ty)) 671 return true; 672 673 // We failed, restore everything and try the other options. 674 Addr = SavedAddr; 675 676 unsupported_gep: 677 break; 678 } 679 case Instruction::Alloca: { 680 const AllocaInst *AI = cast<AllocaInst>(Obj); 681 DenseMap<const AllocaInst *, int>::iterator SI = 682 FuncInfo.StaticAllocaMap.find(AI); 683 if (SI != FuncInfo.StaticAllocaMap.end()) { 684 Addr.setKind(Address::FrameIndexBase); 685 Addr.setFI(SI->second); 686 return true; 687 } 688 break; 689 } 690 case Instruction::Add: { 691 // Adds of constants are common and easy enough. 692 const Value *LHS = U->getOperand(0); 693 const Value *RHS = U->getOperand(1); 694 695 if (isa<ConstantInt>(LHS)) 696 std::swap(LHS, RHS); 697 698 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 699 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 700 return computeAddress(LHS, Addr, Ty); 701 } 702 703 Address Backup = Addr; 704 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 705 return true; 706 Addr = Backup; 707 708 break; 709 } 710 case Instruction::Sub: { 711 // Subs of constants are common and easy enough. 712 const Value *LHS = U->getOperand(0); 713 const Value *RHS = U->getOperand(1); 714 715 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 716 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 717 return computeAddress(LHS, Addr, Ty); 718 } 719 break; 720 } 721 case Instruction::Shl: { 722 if (Addr.getOffsetReg()) 723 break; 724 725 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 726 if (!CI) 727 break; 728 729 unsigned Val = CI->getZExtValue(); 730 if (Val < 1 || Val > 3) 731 break; 732 733 uint64_t NumBytes = 0; 734 if (Ty && Ty->isSized()) { 735 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 736 NumBytes = NumBits / 8; 737 if (!isPowerOf2_64(NumBits)) 738 NumBytes = 0; 739 } 740 741 if (NumBytes != (1ULL << Val)) 742 break; 743 744 Addr.setShift(Val); 745 Addr.setExtendType(AArch64_AM::LSL); 746 747 const Value *Src = U->getOperand(0); 748 if (const auto *I = dyn_cast<Instruction>(Src)) { 749 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 750 // Fold the zext or sext when it won't become a noop. 751 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 752 if (!isIntExtFree(ZE) && 753 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 754 Addr.setExtendType(AArch64_AM::UXTW); 755 Src = ZE->getOperand(0); 756 } 757 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 758 if (!isIntExtFree(SE) && 759 SE->getOperand(0)->getType()->isIntegerTy(32)) { 760 Addr.setExtendType(AArch64_AM::SXTW); 761 Src = SE->getOperand(0); 762 } 763 } 764 } 765 } 766 767 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 768 if (AI->getOpcode() == Instruction::And) { 769 const Value *LHS = AI->getOperand(0); 770 const Value *RHS = AI->getOperand(1); 771 772 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 773 if (C->getValue() == 0xffffffff) 774 std::swap(LHS, RHS); 775 776 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 777 if (C->getValue() == 0xffffffff) { 778 Addr.setExtendType(AArch64_AM::UXTW); 779 Register Reg = getRegForValue(LHS); 780 if (!Reg) 781 return false; 782 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 783 Addr.setOffsetReg(Reg); 784 return true; 785 } 786 } 787 788 Register Reg = getRegForValue(Src); 789 if (!Reg) 790 return false; 791 Addr.setOffsetReg(Reg); 792 return true; 793 } 794 case Instruction::Mul: { 795 if (Addr.getOffsetReg()) 796 break; 797 798 if (!isMulPowOf2(U)) 799 break; 800 801 const Value *LHS = U->getOperand(0); 802 const Value *RHS = U->getOperand(1); 803 804 // Canonicalize power-of-2 value to the RHS. 805 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 806 if (C->getValue().isPowerOf2()) 807 std::swap(LHS, RHS); 808 809 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 810 const auto *C = cast<ConstantInt>(RHS); 811 unsigned Val = C->getValue().logBase2(); 812 if (Val < 1 || Val > 3) 813 break; 814 815 uint64_t NumBytes = 0; 816 if (Ty && Ty->isSized()) { 817 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 818 NumBytes = NumBits / 8; 819 if (!isPowerOf2_64(NumBits)) 820 NumBytes = 0; 821 } 822 823 if (NumBytes != (1ULL << Val)) 824 break; 825 826 Addr.setShift(Val); 827 Addr.setExtendType(AArch64_AM::LSL); 828 829 const Value *Src = LHS; 830 if (const auto *I = dyn_cast<Instruction>(Src)) { 831 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 832 // Fold the zext or sext when it won't become a noop. 833 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 834 if (!isIntExtFree(ZE) && 835 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 836 Addr.setExtendType(AArch64_AM::UXTW); 837 Src = ZE->getOperand(0); 838 } 839 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 840 if (!isIntExtFree(SE) && 841 SE->getOperand(0)->getType()->isIntegerTy(32)) { 842 Addr.setExtendType(AArch64_AM::SXTW); 843 Src = SE->getOperand(0); 844 } 845 } 846 } 847 } 848 849 Register Reg = getRegForValue(Src); 850 if (!Reg) 851 return false; 852 Addr.setOffsetReg(Reg); 853 return true; 854 } 855 case Instruction::And: { 856 if (Addr.getOffsetReg()) 857 break; 858 859 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 860 break; 861 862 const Value *LHS = U->getOperand(0); 863 const Value *RHS = U->getOperand(1); 864 865 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 866 if (C->getValue() == 0xffffffff) 867 std::swap(LHS, RHS); 868 869 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 870 if (C->getValue() == 0xffffffff) { 871 Addr.setShift(0); 872 Addr.setExtendType(AArch64_AM::LSL); 873 Addr.setExtendType(AArch64_AM::UXTW); 874 875 Register Reg = getRegForValue(LHS); 876 if (!Reg) 877 return false; 878 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 879 Addr.setOffsetReg(Reg); 880 return true; 881 } 882 break; 883 } 884 case Instruction::SExt: 885 case Instruction::ZExt: { 886 if (!Addr.getReg() || Addr.getOffsetReg()) 887 break; 888 889 const Value *Src = nullptr; 890 // Fold the zext or sext when it won't become a noop. 891 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 892 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 893 Addr.setExtendType(AArch64_AM::UXTW); 894 Src = ZE->getOperand(0); 895 } 896 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 897 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 898 Addr.setExtendType(AArch64_AM::SXTW); 899 Src = SE->getOperand(0); 900 } 901 } 902 903 if (!Src) 904 break; 905 906 Addr.setShift(0); 907 Register Reg = getRegForValue(Src); 908 if (!Reg) 909 return false; 910 Addr.setOffsetReg(Reg); 911 return true; 912 } 913 } // end switch 914 915 if (Addr.isRegBase() && !Addr.getReg()) { 916 Register Reg = getRegForValue(Obj); 917 if (!Reg) 918 return false; 919 Addr.setReg(Reg); 920 return true; 921 } 922 923 if (!Addr.getOffsetReg()) { 924 Register Reg = getRegForValue(Obj); 925 if (!Reg) 926 return false; 927 Addr.setOffsetReg(Reg); 928 return true; 929 } 930 931 return false; 932 } 933 934 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 935 const User *U = nullptr; 936 unsigned Opcode = Instruction::UserOp1; 937 bool InMBB = true; 938 939 if (const auto *I = dyn_cast<Instruction>(V)) { 940 Opcode = I->getOpcode(); 941 U = I; 942 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 943 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 944 Opcode = C->getOpcode(); 945 U = C; 946 } 947 948 switch (Opcode) { 949 default: break; 950 case Instruction::BitCast: 951 // Look past bitcasts if its operand is in the same BB. 952 if (InMBB) 953 return computeCallAddress(U->getOperand(0), Addr); 954 break; 955 case Instruction::IntToPtr: 956 // Look past no-op inttoptrs if its operand is in the same BB. 957 if (InMBB && 958 TLI.getValueType(DL, U->getOperand(0)->getType()) == 959 TLI.getPointerTy(DL)) 960 return computeCallAddress(U->getOperand(0), Addr); 961 break; 962 case Instruction::PtrToInt: 963 // Look past no-op ptrtoints if its operand is in the same BB. 964 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 965 return computeCallAddress(U->getOperand(0), Addr); 966 break; 967 } 968 969 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 970 Addr.setGlobalValue(GV); 971 return true; 972 } 973 974 // If all else fails, try to materialize the value in a register. 975 if (!Addr.getGlobalValue()) { 976 Addr.setReg(getRegForValue(V)); 977 return Addr.getReg() != 0; 978 } 979 980 return false; 981 } 982 983 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 984 EVT evt = TLI.getValueType(DL, Ty, true); 985 986 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 987 return false; 988 989 // Only handle simple types. 990 if (evt == MVT::Other || !evt.isSimple()) 991 return false; 992 VT = evt.getSimpleVT(); 993 994 // This is a legal type, but it's not something we handle in fast-isel. 995 if (VT == MVT::f128) 996 return false; 997 998 // Handle all other legal types, i.e. a register that will directly hold this 999 // value. 1000 return TLI.isTypeLegal(VT); 1001 } 1002 1003 /// Determine if the value type is supported by FastISel. 1004 /// 1005 /// FastISel for AArch64 can handle more value types than are legal. This adds 1006 /// simple value type such as i1, i8, and i16. 1007 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 1008 if (Ty->isVectorTy() && !IsVectorAllowed) 1009 return false; 1010 1011 if (isTypeLegal(Ty, VT)) 1012 return true; 1013 1014 // If this is a type than can be sign or zero-extended to a basic operation 1015 // go ahead and accept it now. 1016 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 1017 return true; 1018 1019 return false; 1020 } 1021 1022 bool AArch64FastISel::isValueAvailable(const Value *V) const { 1023 if (!isa<Instruction>(V)) 1024 return true; 1025 1026 const auto *I = cast<Instruction>(V); 1027 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 1028 } 1029 1030 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1031 if (Subtarget->isTargetILP32()) 1032 return false; 1033 1034 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1035 if (!ScaleFactor) 1036 return false; 1037 1038 bool ImmediateOffsetNeedsLowering = false; 1039 bool RegisterOffsetNeedsLowering = false; 1040 int64_t Offset = Addr.getOffset(); 1041 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1042 ImmediateOffsetNeedsLowering = true; 1043 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1044 !isUInt<12>(Offset / ScaleFactor)) 1045 ImmediateOffsetNeedsLowering = true; 1046 1047 // Cannot encode an offset register and an immediate offset in the same 1048 // instruction. Fold the immediate offset into the load/store instruction and 1049 // emit an additional add to take care of the offset register. 1050 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1051 RegisterOffsetNeedsLowering = true; 1052 1053 // Cannot encode zero register as base. 1054 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1055 RegisterOffsetNeedsLowering = true; 1056 1057 // If this is a stack pointer and the offset needs to be simplified then put 1058 // the alloca address into a register, set the base type back to register and 1059 // continue. This should almost never happen. 1060 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1061 { 1062 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1063 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 1064 ResultReg) 1065 .addFrameIndex(Addr.getFI()) 1066 .addImm(0) 1067 .addImm(0); 1068 Addr.setKind(Address::RegBase); 1069 Addr.setReg(ResultReg); 1070 } 1071 1072 if (RegisterOffsetNeedsLowering) { 1073 unsigned ResultReg = 0; 1074 if (Addr.getReg()) { 1075 if (Addr.getExtendType() == AArch64_AM::SXTW || 1076 Addr.getExtendType() == AArch64_AM::UXTW ) 1077 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1078 Addr.getOffsetReg(), Addr.getExtendType(), 1079 Addr.getShift()); 1080 else 1081 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1082 Addr.getOffsetReg(), AArch64_AM::LSL, 1083 Addr.getShift()); 1084 } else { 1085 if (Addr.getExtendType() == AArch64_AM::UXTW) 1086 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1087 Addr.getShift(), /*IsZExt=*/true); 1088 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1089 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1090 Addr.getShift(), /*IsZExt=*/false); 1091 else 1092 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1093 Addr.getShift()); 1094 } 1095 if (!ResultReg) 1096 return false; 1097 1098 Addr.setReg(ResultReg); 1099 Addr.setOffsetReg(0); 1100 Addr.setShift(0); 1101 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1102 } 1103 1104 // Since the offset is too large for the load/store instruction get the 1105 // reg+offset into a register. 1106 if (ImmediateOffsetNeedsLowering) { 1107 unsigned ResultReg; 1108 if (Addr.getReg()) 1109 // Try to fold the immediate into the add instruction. 1110 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); 1111 else 1112 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1113 1114 if (!ResultReg) 1115 return false; 1116 Addr.setReg(ResultReg); 1117 Addr.setOffset(0); 1118 } 1119 return true; 1120 } 1121 1122 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1123 const MachineInstrBuilder &MIB, 1124 MachineMemOperand::Flags Flags, 1125 unsigned ScaleFactor, 1126 MachineMemOperand *MMO) { 1127 int64_t Offset = Addr.getOffset() / ScaleFactor; 1128 // Frame base works a bit differently. Handle it separately. 1129 if (Addr.isFIBase()) { 1130 int FI = Addr.getFI(); 1131 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1132 // and alignment should be based on the VT. 1133 MMO = FuncInfo.MF->getMachineMemOperand( 1134 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1135 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1136 // Now add the rest of the operands. 1137 MIB.addFrameIndex(FI).addImm(Offset); 1138 } else { 1139 assert(Addr.isRegBase() && "Unexpected address kind."); 1140 const MCInstrDesc &II = MIB->getDesc(); 1141 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1142 Addr.setReg( 1143 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1144 Addr.setOffsetReg( 1145 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1146 if (Addr.getOffsetReg()) { 1147 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1148 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1149 Addr.getExtendType() == AArch64_AM::SXTX; 1150 MIB.addReg(Addr.getReg()); 1151 MIB.addReg(Addr.getOffsetReg()); 1152 MIB.addImm(IsSigned); 1153 MIB.addImm(Addr.getShift() != 0); 1154 } else 1155 MIB.addReg(Addr.getReg()).addImm(Offset); 1156 } 1157 1158 if (MMO) 1159 MIB.addMemOperand(MMO); 1160 } 1161 1162 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1163 const Value *RHS, bool SetFlags, 1164 bool WantResult, bool IsZExt) { 1165 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1166 bool NeedExtend = false; 1167 switch (RetVT.SimpleTy) { 1168 default: 1169 return 0; 1170 case MVT::i1: 1171 NeedExtend = true; 1172 break; 1173 case MVT::i8: 1174 NeedExtend = true; 1175 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1176 break; 1177 case MVT::i16: 1178 NeedExtend = true; 1179 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1180 break; 1181 case MVT::i32: // fall-through 1182 case MVT::i64: 1183 break; 1184 } 1185 MVT SrcVT = RetVT; 1186 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1187 1188 // Canonicalize immediates to the RHS first. 1189 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1190 std::swap(LHS, RHS); 1191 1192 // Canonicalize mul by power of 2 to the RHS. 1193 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1194 if (isMulPowOf2(LHS)) 1195 std::swap(LHS, RHS); 1196 1197 // Canonicalize shift immediate to the RHS. 1198 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1199 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1200 if (isa<ConstantInt>(SI->getOperand(1))) 1201 if (SI->getOpcode() == Instruction::Shl || 1202 SI->getOpcode() == Instruction::LShr || 1203 SI->getOpcode() == Instruction::AShr ) 1204 std::swap(LHS, RHS); 1205 1206 Register LHSReg = getRegForValue(LHS); 1207 if (!LHSReg) 1208 return 0; 1209 1210 if (NeedExtend) 1211 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1212 1213 unsigned ResultReg = 0; 1214 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1215 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1216 if (C->isNegative()) 1217 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags, 1218 WantResult); 1219 else 1220 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, 1221 WantResult); 1222 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1223 if (C->isNullValue()) 1224 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult); 1225 1226 if (ResultReg) 1227 return ResultReg; 1228 1229 // Only extend the RHS within the instruction if there is a valid extend type. 1230 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1231 isValueAvailable(RHS)) { 1232 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1233 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1234 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1235 Register RHSReg = getRegForValue(SI->getOperand(0)); 1236 if (!RHSReg) 1237 return 0; 1238 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 1239 C->getZExtValue(), SetFlags, WantResult); 1240 } 1241 Register RHSReg = getRegForValue(RHS); 1242 if (!RHSReg) 1243 return 0; 1244 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0, 1245 SetFlags, WantResult); 1246 } 1247 1248 // Check if the mul can be folded into the instruction. 1249 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1250 if (isMulPowOf2(RHS)) { 1251 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1252 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1253 1254 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1255 if (C->getValue().isPowerOf2()) 1256 std::swap(MulLHS, MulRHS); 1257 1258 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1259 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1260 Register RHSReg = getRegForValue(MulLHS); 1261 if (!RHSReg) 1262 return 0; 1263 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL, 1264 ShiftVal, SetFlags, WantResult); 1265 if (ResultReg) 1266 return ResultReg; 1267 } 1268 } 1269 1270 // Check if the shift can be folded into the instruction. 1271 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1272 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1273 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1274 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1275 switch (SI->getOpcode()) { 1276 default: break; 1277 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1278 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1279 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1280 } 1281 uint64_t ShiftVal = C->getZExtValue(); 1282 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1283 Register RHSReg = getRegForValue(SI->getOperand(0)); 1284 if (!RHSReg) 1285 return 0; 1286 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, 1287 ShiftVal, SetFlags, WantResult); 1288 if (ResultReg) 1289 return ResultReg; 1290 } 1291 } 1292 } 1293 } 1294 1295 Register RHSReg = getRegForValue(RHS); 1296 if (!RHSReg) 1297 return 0; 1298 1299 if (NeedExtend) 1300 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1301 1302 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); 1303 } 1304 1305 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1306 unsigned RHSReg, bool SetFlags, 1307 bool WantResult) { 1308 assert(LHSReg && RHSReg && "Invalid register number."); 1309 1310 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1311 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1312 return 0; 1313 1314 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1315 return 0; 1316 1317 static const unsigned OpcTable[2][2][2] = { 1318 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1319 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1320 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1321 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1322 }; 1323 bool Is64Bit = RetVT == MVT::i64; 1324 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1325 const TargetRegisterClass *RC = 1326 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1327 unsigned ResultReg; 1328 if (WantResult) 1329 ResultReg = createResultReg(RC); 1330 else 1331 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1332 1333 const MCInstrDesc &II = TII.get(Opc); 1334 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1335 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1336 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1337 .addReg(LHSReg) 1338 .addReg(RHSReg); 1339 return ResultReg; 1340 } 1341 1342 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1343 uint64_t Imm, bool SetFlags, 1344 bool WantResult) { 1345 assert(LHSReg && "Invalid register number."); 1346 1347 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1348 return 0; 1349 1350 unsigned ShiftImm; 1351 if (isUInt<12>(Imm)) 1352 ShiftImm = 0; 1353 else if ((Imm & 0xfff000) == Imm) { 1354 ShiftImm = 12; 1355 Imm >>= 12; 1356 } else 1357 return 0; 1358 1359 static const unsigned OpcTable[2][2][2] = { 1360 { { AArch64::SUBWri, AArch64::SUBXri }, 1361 { AArch64::ADDWri, AArch64::ADDXri } }, 1362 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1363 { AArch64::ADDSWri, AArch64::ADDSXri } } 1364 }; 1365 bool Is64Bit = RetVT == MVT::i64; 1366 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1367 const TargetRegisterClass *RC; 1368 if (SetFlags) 1369 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1370 else 1371 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1372 unsigned ResultReg; 1373 if (WantResult) 1374 ResultReg = createResultReg(RC); 1375 else 1376 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1377 1378 const MCInstrDesc &II = TII.get(Opc); 1379 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1380 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1381 .addReg(LHSReg) 1382 .addImm(Imm) 1383 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1384 return ResultReg; 1385 } 1386 1387 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1388 unsigned RHSReg, 1389 AArch64_AM::ShiftExtendType ShiftType, 1390 uint64_t ShiftImm, bool SetFlags, 1391 bool WantResult) { 1392 assert(LHSReg && RHSReg && "Invalid register number."); 1393 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1394 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1395 1396 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1397 return 0; 1398 1399 // Don't deal with undefined shifts. 1400 if (ShiftImm >= RetVT.getSizeInBits()) 1401 return 0; 1402 1403 static const unsigned OpcTable[2][2][2] = { 1404 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1405 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1406 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1407 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1408 }; 1409 bool Is64Bit = RetVT == MVT::i64; 1410 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1411 const TargetRegisterClass *RC = 1412 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1413 unsigned ResultReg; 1414 if (WantResult) 1415 ResultReg = createResultReg(RC); 1416 else 1417 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1418 1419 const MCInstrDesc &II = TII.get(Opc); 1420 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1421 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1423 .addReg(LHSReg) 1424 .addReg(RHSReg) 1425 .addImm(getShifterImm(ShiftType, ShiftImm)); 1426 return ResultReg; 1427 } 1428 1429 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1430 unsigned RHSReg, 1431 AArch64_AM::ShiftExtendType ExtType, 1432 uint64_t ShiftImm, bool SetFlags, 1433 bool WantResult) { 1434 assert(LHSReg && RHSReg && "Invalid register number."); 1435 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1436 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1437 1438 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1439 return 0; 1440 1441 if (ShiftImm >= 4) 1442 return 0; 1443 1444 static const unsigned OpcTable[2][2][2] = { 1445 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1446 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1447 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1448 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1449 }; 1450 bool Is64Bit = RetVT == MVT::i64; 1451 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1452 const TargetRegisterClass *RC = nullptr; 1453 if (SetFlags) 1454 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1455 else 1456 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1457 unsigned ResultReg; 1458 if (WantResult) 1459 ResultReg = createResultReg(RC); 1460 else 1461 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1462 1463 const MCInstrDesc &II = TII.get(Opc); 1464 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1465 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1467 .addReg(LHSReg) 1468 .addReg(RHSReg) 1469 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1470 return ResultReg; 1471 } 1472 1473 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1474 Type *Ty = LHS->getType(); 1475 EVT EVT = TLI.getValueType(DL, Ty, true); 1476 if (!EVT.isSimple()) 1477 return false; 1478 MVT VT = EVT.getSimpleVT(); 1479 1480 switch (VT.SimpleTy) { 1481 default: 1482 return false; 1483 case MVT::i1: 1484 case MVT::i8: 1485 case MVT::i16: 1486 case MVT::i32: 1487 case MVT::i64: 1488 return emitICmp(VT, LHS, RHS, IsZExt); 1489 case MVT::f32: 1490 case MVT::f64: 1491 return emitFCmp(VT, LHS, RHS); 1492 } 1493 } 1494 1495 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1496 bool IsZExt) { 1497 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1498 IsZExt) != 0; 1499 } 1500 1501 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { 1502 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, 1503 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1504 } 1505 1506 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1507 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1508 return false; 1509 1510 // Check to see if the 2nd operand is a constant that we can encode directly 1511 // in the compare. 1512 bool UseImm = false; 1513 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1514 if (CFP->isZero() && !CFP->isNegative()) 1515 UseImm = true; 1516 1517 Register LHSReg = getRegForValue(LHS); 1518 if (!LHSReg) 1519 return false; 1520 1521 if (UseImm) { 1522 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1524 .addReg(LHSReg); 1525 return true; 1526 } 1527 1528 Register RHSReg = getRegForValue(RHS); 1529 if (!RHSReg) 1530 return false; 1531 1532 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1533 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1534 .addReg(LHSReg) 1535 .addReg(RHSReg); 1536 return true; 1537 } 1538 1539 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1540 bool SetFlags, bool WantResult, bool IsZExt) { 1541 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1542 IsZExt); 1543 } 1544 1545 /// This method is a wrapper to simplify add emission. 1546 /// 1547 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1548 /// that fails, then try to materialize the immediate into a register and use 1549 /// emitAddSub_rr instead. 1550 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { 1551 unsigned ResultReg; 1552 if (Imm < 0) 1553 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm); 1554 else 1555 ResultReg = emitAddSub_ri(true, VT, Op0, Imm); 1556 1557 if (ResultReg) 1558 return ResultReg; 1559 1560 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1561 if (!CReg) 1562 return 0; 1563 1564 ResultReg = emitAddSub_rr(true, VT, Op0, CReg); 1565 return ResultReg; 1566 } 1567 1568 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1569 bool SetFlags, bool WantResult, bool IsZExt) { 1570 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1571 IsZExt); 1572 } 1573 1574 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1575 unsigned RHSReg, bool WantResult) { 1576 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, 1577 /*SetFlags=*/true, WantResult); 1578 } 1579 1580 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1581 unsigned RHSReg, 1582 AArch64_AM::ShiftExtendType ShiftType, 1583 uint64_t ShiftImm, bool WantResult) { 1584 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, 1585 ShiftImm, /*SetFlags=*/true, WantResult); 1586 } 1587 1588 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1589 const Value *LHS, const Value *RHS) { 1590 // Canonicalize immediates to the RHS first. 1591 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1592 std::swap(LHS, RHS); 1593 1594 // Canonicalize mul by power-of-2 to the RHS. 1595 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1596 if (isMulPowOf2(LHS)) 1597 std::swap(LHS, RHS); 1598 1599 // Canonicalize shift immediate to the RHS. 1600 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1601 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1602 if (isa<ConstantInt>(SI->getOperand(1))) 1603 std::swap(LHS, RHS); 1604 1605 Register LHSReg = getRegForValue(LHS); 1606 if (!LHSReg) 1607 return 0; 1608 1609 unsigned ResultReg = 0; 1610 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1611 uint64_t Imm = C->getZExtValue(); 1612 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); 1613 } 1614 if (ResultReg) 1615 return ResultReg; 1616 1617 // Check if the mul can be folded into the instruction. 1618 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1619 if (isMulPowOf2(RHS)) { 1620 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1621 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1622 1623 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1624 if (C->getValue().isPowerOf2()) 1625 std::swap(MulLHS, MulRHS); 1626 1627 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1628 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1629 1630 Register RHSReg = getRegForValue(MulLHS); 1631 if (!RHSReg) 1632 return 0; 1633 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1634 if (ResultReg) 1635 return ResultReg; 1636 } 1637 } 1638 1639 // Check if the shift can be folded into the instruction. 1640 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1641 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1642 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1643 uint64_t ShiftVal = C->getZExtValue(); 1644 Register RHSReg = getRegForValue(SI->getOperand(0)); 1645 if (!RHSReg) 1646 return 0; 1647 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1648 if (ResultReg) 1649 return ResultReg; 1650 } 1651 } 1652 1653 Register RHSReg = getRegForValue(RHS); 1654 if (!RHSReg) 1655 return 0; 1656 1657 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1658 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg); 1659 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1660 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1661 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1662 } 1663 return ResultReg; 1664 } 1665 1666 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1667 unsigned LHSReg, uint64_t Imm) { 1668 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1669 "ISD nodes are not consecutive!"); 1670 static const unsigned OpcTable[3][2] = { 1671 { AArch64::ANDWri, AArch64::ANDXri }, 1672 { AArch64::ORRWri, AArch64::ORRXri }, 1673 { AArch64::EORWri, AArch64::EORXri } 1674 }; 1675 const TargetRegisterClass *RC; 1676 unsigned Opc; 1677 unsigned RegSize; 1678 switch (RetVT.SimpleTy) { 1679 default: 1680 return 0; 1681 case MVT::i1: 1682 case MVT::i8: 1683 case MVT::i16: 1684 case MVT::i32: { 1685 unsigned Idx = ISDOpc - ISD::AND; 1686 Opc = OpcTable[Idx][0]; 1687 RC = &AArch64::GPR32spRegClass; 1688 RegSize = 32; 1689 break; 1690 } 1691 case MVT::i64: 1692 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1693 RC = &AArch64::GPR64spRegClass; 1694 RegSize = 64; 1695 break; 1696 } 1697 1698 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1699 return 0; 1700 1701 Register ResultReg = 1702 fastEmitInst_ri(Opc, RC, LHSReg, 1703 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1704 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1705 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1706 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1707 } 1708 return ResultReg; 1709 } 1710 1711 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1712 unsigned LHSReg, unsigned RHSReg, 1713 uint64_t ShiftImm) { 1714 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1715 "ISD nodes are not consecutive!"); 1716 static const unsigned OpcTable[3][2] = { 1717 { AArch64::ANDWrs, AArch64::ANDXrs }, 1718 { AArch64::ORRWrs, AArch64::ORRXrs }, 1719 { AArch64::EORWrs, AArch64::EORXrs } 1720 }; 1721 1722 // Don't deal with undefined shifts. 1723 if (ShiftImm >= RetVT.getSizeInBits()) 1724 return 0; 1725 1726 const TargetRegisterClass *RC; 1727 unsigned Opc; 1728 switch (RetVT.SimpleTy) { 1729 default: 1730 return 0; 1731 case MVT::i1: 1732 case MVT::i8: 1733 case MVT::i16: 1734 case MVT::i32: 1735 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1736 RC = &AArch64::GPR32RegClass; 1737 break; 1738 case MVT::i64: 1739 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1740 RC = &AArch64::GPR64RegClass; 1741 break; 1742 } 1743 Register ResultReg = 1744 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg, 1745 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1746 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1747 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1748 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1749 } 1750 return ResultReg; 1751 } 1752 1753 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, 1754 uint64_t Imm) { 1755 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm); 1756 } 1757 1758 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1759 bool WantZExt, MachineMemOperand *MMO) { 1760 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1761 return 0; 1762 1763 // Simplify this down to something we can handle. 1764 if (!simplifyAddress(Addr, VT)) 1765 return 0; 1766 1767 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1768 if (!ScaleFactor) 1769 llvm_unreachable("Unexpected value type."); 1770 1771 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1772 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1773 bool UseScaled = true; 1774 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1775 UseScaled = false; 1776 ScaleFactor = 1; 1777 } 1778 1779 static const unsigned GPOpcTable[2][8][4] = { 1780 // Sign-extend. 1781 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1782 AArch64::LDURXi }, 1783 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1784 AArch64::LDURXi }, 1785 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1786 AArch64::LDRXui }, 1787 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1788 AArch64::LDRXui }, 1789 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1790 AArch64::LDRXroX }, 1791 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1792 AArch64::LDRXroX }, 1793 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1794 AArch64::LDRXroW }, 1795 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1796 AArch64::LDRXroW } 1797 }, 1798 // Zero-extend. 1799 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1800 AArch64::LDURXi }, 1801 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1802 AArch64::LDURXi }, 1803 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1804 AArch64::LDRXui }, 1805 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1806 AArch64::LDRXui }, 1807 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1808 AArch64::LDRXroX }, 1809 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1810 AArch64::LDRXroX }, 1811 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1812 AArch64::LDRXroW }, 1813 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1814 AArch64::LDRXroW } 1815 } 1816 }; 1817 1818 static const unsigned FPOpcTable[4][2] = { 1819 { AArch64::LDURSi, AArch64::LDURDi }, 1820 { AArch64::LDRSui, AArch64::LDRDui }, 1821 { AArch64::LDRSroX, AArch64::LDRDroX }, 1822 { AArch64::LDRSroW, AArch64::LDRDroW } 1823 }; 1824 1825 unsigned Opc; 1826 const TargetRegisterClass *RC; 1827 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1828 Addr.getOffsetReg(); 1829 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1830 if (Addr.getExtendType() == AArch64_AM::UXTW || 1831 Addr.getExtendType() == AArch64_AM::SXTW) 1832 Idx++; 1833 1834 bool IsRet64Bit = RetVT == MVT::i64; 1835 switch (VT.SimpleTy) { 1836 default: 1837 llvm_unreachable("Unexpected value type."); 1838 case MVT::i1: // Intentional fall-through. 1839 case MVT::i8: 1840 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1841 RC = (IsRet64Bit && !WantZExt) ? 1842 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1843 break; 1844 case MVT::i16: 1845 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1846 RC = (IsRet64Bit && !WantZExt) ? 1847 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1848 break; 1849 case MVT::i32: 1850 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1851 RC = (IsRet64Bit && !WantZExt) ? 1852 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1853 break; 1854 case MVT::i64: 1855 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1856 RC = &AArch64::GPR64RegClass; 1857 break; 1858 case MVT::f32: 1859 Opc = FPOpcTable[Idx][0]; 1860 RC = &AArch64::FPR32RegClass; 1861 break; 1862 case MVT::f64: 1863 Opc = FPOpcTable[Idx][1]; 1864 RC = &AArch64::FPR64RegClass; 1865 break; 1866 } 1867 1868 // Create the base instruction, then add the operands. 1869 Register ResultReg = createResultReg(RC); 1870 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1871 TII.get(Opc), ResultReg); 1872 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1873 1874 // Loading an i1 requires special handling. 1875 if (VT == MVT::i1) { 1876 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); 1877 assert(ANDReg && "Unexpected AND instruction emission failure."); 1878 ResultReg = ANDReg; 1879 } 1880 1881 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1882 // the 32bit reg to a 64bit reg. 1883 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1884 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 1885 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1886 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1887 .addImm(0) 1888 .addReg(ResultReg, getKillRegState(true)) 1889 .addImm(AArch64::sub_32); 1890 ResultReg = Reg64; 1891 } 1892 return ResultReg; 1893 } 1894 1895 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1896 MVT VT; 1897 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1898 return false; 1899 1900 if (VT.isVector()) 1901 return selectOperator(I, I->getOpcode()); 1902 1903 unsigned ResultReg; 1904 switch (I->getOpcode()) { 1905 default: 1906 llvm_unreachable("Unexpected instruction."); 1907 case Instruction::Add: 1908 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1909 break; 1910 case Instruction::Sub: 1911 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1912 break; 1913 } 1914 if (!ResultReg) 1915 return false; 1916 1917 updateValueMap(I, ResultReg); 1918 return true; 1919 } 1920 1921 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1922 MVT VT; 1923 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1924 return false; 1925 1926 if (VT.isVector()) 1927 return selectOperator(I, I->getOpcode()); 1928 1929 unsigned ResultReg; 1930 switch (I->getOpcode()) { 1931 default: 1932 llvm_unreachable("Unexpected instruction."); 1933 case Instruction::And: 1934 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1935 break; 1936 case Instruction::Or: 1937 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1938 break; 1939 case Instruction::Xor: 1940 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1941 break; 1942 } 1943 if (!ResultReg) 1944 return false; 1945 1946 updateValueMap(I, ResultReg); 1947 return true; 1948 } 1949 1950 bool AArch64FastISel::selectLoad(const Instruction *I) { 1951 MVT VT; 1952 // Verify we have a legal type before going any further. Currently, we handle 1953 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1954 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1955 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1956 cast<LoadInst>(I)->isAtomic()) 1957 return false; 1958 1959 const Value *SV = I->getOperand(0); 1960 if (TLI.supportSwiftError()) { 1961 // Swifterror values can come from either a function parameter with 1962 // swifterror attribute or an alloca with swifterror attribute. 1963 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1964 if (Arg->hasSwiftErrorAttr()) 1965 return false; 1966 } 1967 1968 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1969 if (Alloca->isSwiftError()) 1970 return false; 1971 } 1972 } 1973 1974 // See if we can handle this address. 1975 Address Addr; 1976 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1977 return false; 1978 1979 // Fold the following sign-/zero-extend into the load instruction. 1980 bool WantZExt = true; 1981 MVT RetVT = VT; 1982 const Value *IntExtVal = nullptr; 1983 if (I->hasOneUse()) { 1984 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1985 if (isTypeSupported(ZE->getType(), RetVT)) 1986 IntExtVal = ZE; 1987 else 1988 RetVT = VT; 1989 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1990 if (isTypeSupported(SE->getType(), RetVT)) 1991 IntExtVal = SE; 1992 else 1993 RetVT = VT; 1994 WantZExt = false; 1995 } 1996 } 1997 1998 unsigned ResultReg = 1999 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 2000 if (!ResultReg) 2001 return false; 2002 2003 // There are a few different cases we have to handle, because the load or the 2004 // sign-/zero-extend might not be selected by FastISel if we fall-back to 2005 // SelectionDAG. There is also an ordering issue when both instructions are in 2006 // different basic blocks. 2007 // 1.) The load instruction is selected by FastISel, but the integer extend 2008 // not. This usually happens when the integer extend is in a different 2009 // basic block and SelectionDAG took over for that basic block. 2010 // 2.) The load instruction is selected before the integer extend. This only 2011 // happens when the integer extend is in a different basic block. 2012 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2013 // by FastISel. This happens if there are instructions between the load 2014 // and the integer extend that couldn't be selected by FastISel. 2015 if (IntExtVal) { 2016 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2017 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2018 // it when it selects the integer extend. 2019 Register Reg = lookUpRegForValue(IntExtVal); 2020 auto *MI = MRI.getUniqueVRegDef(Reg); 2021 if (!MI) { 2022 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2023 if (WantZExt) { 2024 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2025 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2026 ResultReg = std::prev(I)->getOperand(0).getReg(); 2027 removeDeadCode(I, std::next(I)); 2028 } else 2029 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2030 AArch64::sub_32); 2031 } 2032 updateValueMap(I, ResultReg); 2033 return true; 2034 } 2035 2036 // The integer extend has already been emitted - delete all the instructions 2037 // that have been emitted by the integer extend lowering code and use the 2038 // result from the load instruction directly. 2039 while (MI) { 2040 Reg = 0; 2041 for (auto &Opnd : MI->uses()) { 2042 if (Opnd.isReg()) { 2043 Reg = Opnd.getReg(); 2044 break; 2045 } 2046 } 2047 MachineBasicBlock::iterator I(MI); 2048 removeDeadCode(I, std::next(I)); 2049 MI = nullptr; 2050 if (Reg) 2051 MI = MRI.getUniqueVRegDef(Reg); 2052 } 2053 updateValueMap(IntExtVal, ResultReg); 2054 return true; 2055 } 2056 2057 updateValueMap(I, ResultReg); 2058 return true; 2059 } 2060 2061 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2062 unsigned AddrReg, 2063 MachineMemOperand *MMO) { 2064 unsigned Opc; 2065 switch (VT.SimpleTy) { 2066 default: return false; 2067 case MVT::i8: Opc = AArch64::STLRB; break; 2068 case MVT::i16: Opc = AArch64::STLRH; break; 2069 case MVT::i32: Opc = AArch64::STLRW; break; 2070 case MVT::i64: Opc = AArch64::STLRX; break; 2071 } 2072 2073 const MCInstrDesc &II = TII.get(Opc); 2074 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2075 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2076 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2077 .addReg(SrcReg) 2078 .addReg(AddrReg) 2079 .addMemOperand(MMO); 2080 return true; 2081 } 2082 2083 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2084 MachineMemOperand *MMO) { 2085 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2086 return false; 2087 2088 // Simplify this down to something we can handle. 2089 if (!simplifyAddress(Addr, VT)) 2090 return false; 2091 2092 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2093 if (!ScaleFactor) 2094 llvm_unreachable("Unexpected value type."); 2095 2096 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2097 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2098 bool UseScaled = true; 2099 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2100 UseScaled = false; 2101 ScaleFactor = 1; 2102 } 2103 2104 static const unsigned OpcTable[4][6] = { 2105 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2106 AArch64::STURSi, AArch64::STURDi }, 2107 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2108 AArch64::STRSui, AArch64::STRDui }, 2109 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2110 AArch64::STRSroX, AArch64::STRDroX }, 2111 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2112 AArch64::STRSroW, AArch64::STRDroW } 2113 }; 2114 2115 unsigned Opc; 2116 bool VTIsi1 = false; 2117 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2118 Addr.getOffsetReg(); 2119 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2120 if (Addr.getExtendType() == AArch64_AM::UXTW || 2121 Addr.getExtendType() == AArch64_AM::SXTW) 2122 Idx++; 2123 2124 switch (VT.SimpleTy) { 2125 default: llvm_unreachable("Unexpected value type."); 2126 case MVT::i1: VTIsi1 = true; [[fallthrough]]; 2127 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2128 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2129 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2130 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2131 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2132 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2133 } 2134 2135 // Storing an i1 requires special handling. 2136 if (VTIsi1 && SrcReg != AArch64::WZR) { 2137 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); 2138 assert(ANDReg && "Unexpected AND instruction emission failure."); 2139 SrcReg = ANDReg; 2140 } 2141 // Create the base instruction, then add the operands. 2142 const MCInstrDesc &II = TII.get(Opc); 2143 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2144 MachineInstrBuilder MIB = 2145 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg); 2146 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2147 2148 return true; 2149 } 2150 2151 bool AArch64FastISel::selectStore(const Instruction *I) { 2152 MVT VT; 2153 const Value *Op0 = I->getOperand(0); 2154 // Verify we have a legal type before going any further. Currently, we handle 2155 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2156 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2157 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2158 return false; 2159 2160 const Value *PtrV = I->getOperand(1); 2161 if (TLI.supportSwiftError()) { 2162 // Swifterror values can come from either a function parameter with 2163 // swifterror attribute or an alloca with swifterror attribute. 2164 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2165 if (Arg->hasSwiftErrorAttr()) 2166 return false; 2167 } 2168 2169 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2170 if (Alloca->isSwiftError()) 2171 return false; 2172 } 2173 } 2174 2175 // Get the value to be stored into a register. Use the zero register directly 2176 // when possible to avoid an unnecessary copy and a wasted register. 2177 unsigned SrcReg = 0; 2178 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2179 if (CI->isZero()) 2180 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2181 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2182 if (CF->isZero() && !CF->isNegative()) { 2183 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2184 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2185 } 2186 } 2187 2188 if (!SrcReg) 2189 SrcReg = getRegForValue(Op0); 2190 2191 if (!SrcReg) 2192 return false; 2193 2194 auto *SI = cast<StoreInst>(I); 2195 2196 // Try to emit a STLR for seq_cst/release. 2197 if (SI->isAtomic()) { 2198 AtomicOrdering Ord = SI->getOrdering(); 2199 // The non-atomic instructions are sufficient for relaxed stores. 2200 if (isReleaseOrStronger(Ord)) { 2201 // The STLR addressing mode only supports a base reg; pass that directly. 2202 Register AddrReg = getRegForValue(PtrV); 2203 return emitStoreRelease(VT, SrcReg, AddrReg, 2204 createMachineMemOperandFor(I)); 2205 } 2206 } 2207 2208 // See if we can handle this address. 2209 Address Addr; 2210 if (!computeAddress(PtrV, Addr, Op0->getType())) 2211 return false; 2212 2213 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2214 return false; 2215 return true; 2216 } 2217 2218 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2219 switch (Pred) { 2220 case CmpInst::FCMP_ONE: 2221 case CmpInst::FCMP_UEQ: 2222 default: 2223 // AL is our "false" for now. The other two need more compares. 2224 return AArch64CC::AL; 2225 case CmpInst::ICMP_EQ: 2226 case CmpInst::FCMP_OEQ: 2227 return AArch64CC::EQ; 2228 case CmpInst::ICMP_SGT: 2229 case CmpInst::FCMP_OGT: 2230 return AArch64CC::GT; 2231 case CmpInst::ICMP_SGE: 2232 case CmpInst::FCMP_OGE: 2233 return AArch64CC::GE; 2234 case CmpInst::ICMP_UGT: 2235 case CmpInst::FCMP_UGT: 2236 return AArch64CC::HI; 2237 case CmpInst::FCMP_OLT: 2238 return AArch64CC::MI; 2239 case CmpInst::ICMP_ULE: 2240 case CmpInst::FCMP_OLE: 2241 return AArch64CC::LS; 2242 case CmpInst::FCMP_ORD: 2243 return AArch64CC::VC; 2244 case CmpInst::FCMP_UNO: 2245 return AArch64CC::VS; 2246 case CmpInst::FCMP_UGE: 2247 return AArch64CC::PL; 2248 case CmpInst::ICMP_SLT: 2249 case CmpInst::FCMP_ULT: 2250 return AArch64CC::LT; 2251 case CmpInst::ICMP_SLE: 2252 case CmpInst::FCMP_ULE: 2253 return AArch64CC::LE; 2254 case CmpInst::FCMP_UNE: 2255 case CmpInst::ICMP_NE: 2256 return AArch64CC::NE; 2257 case CmpInst::ICMP_UGE: 2258 return AArch64CC::HS; 2259 case CmpInst::ICMP_ULT: 2260 return AArch64CC::LO; 2261 } 2262 } 2263 2264 /// Try to emit a combined compare-and-branch instruction. 2265 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2266 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2267 // will not be produced, as they are conditional branch instructions that do 2268 // not set flags. 2269 if (FuncInfo.MF->getFunction().hasFnAttribute( 2270 Attribute::SpeculativeLoadHardening)) 2271 return false; 2272 2273 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2274 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2275 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2276 2277 const Value *LHS = CI->getOperand(0); 2278 const Value *RHS = CI->getOperand(1); 2279 2280 MVT VT; 2281 if (!isTypeSupported(LHS->getType(), VT)) 2282 return false; 2283 2284 unsigned BW = VT.getSizeInBits(); 2285 if (BW > 64) 2286 return false; 2287 2288 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2289 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2290 2291 // Try to take advantage of fallthrough opportunities. 2292 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2293 std::swap(TBB, FBB); 2294 Predicate = CmpInst::getInversePredicate(Predicate); 2295 } 2296 2297 int TestBit = -1; 2298 bool IsCmpNE; 2299 switch (Predicate) { 2300 default: 2301 return false; 2302 case CmpInst::ICMP_EQ: 2303 case CmpInst::ICMP_NE: 2304 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2305 std::swap(LHS, RHS); 2306 2307 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2308 return false; 2309 2310 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2311 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2312 const Value *AndLHS = AI->getOperand(0); 2313 const Value *AndRHS = AI->getOperand(1); 2314 2315 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2316 if (C->getValue().isPowerOf2()) 2317 std::swap(AndLHS, AndRHS); 2318 2319 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2320 if (C->getValue().isPowerOf2()) { 2321 TestBit = C->getValue().logBase2(); 2322 LHS = AndLHS; 2323 } 2324 } 2325 2326 if (VT == MVT::i1) 2327 TestBit = 0; 2328 2329 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2330 break; 2331 case CmpInst::ICMP_SLT: 2332 case CmpInst::ICMP_SGE: 2333 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2334 return false; 2335 2336 TestBit = BW - 1; 2337 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2338 break; 2339 case CmpInst::ICMP_SGT: 2340 case CmpInst::ICMP_SLE: 2341 if (!isa<ConstantInt>(RHS)) 2342 return false; 2343 2344 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2345 return false; 2346 2347 TestBit = BW - 1; 2348 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2349 break; 2350 } // end switch 2351 2352 static const unsigned OpcTable[2][2][2] = { 2353 { {AArch64::CBZW, AArch64::CBZX }, 2354 {AArch64::CBNZW, AArch64::CBNZX} }, 2355 { {AArch64::TBZW, AArch64::TBZX }, 2356 {AArch64::TBNZW, AArch64::TBNZX} } 2357 }; 2358 2359 bool IsBitTest = TestBit != -1; 2360 bool Is64Bit = BW == 64; 2361 if (TestBit < 32 && TestBit >= 0) 2362 Is64Bit = false; 2363 2364 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2365 const MCInstrDesc &II = TII.get(Opc); 2366 2367 Register SrcReg = getRegForValue(LHS); 2368 if (!SrcReg) 2369 return false; 2370 2371 if (BW == 64 && !Is64Bit) 2372 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); 2373 2374 if ((BW < 32) && !IsBitTest) 2375 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2376 2377 // Emit the combined compare and branch instruction. 2378 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2379 MachineInstrBuilder MIB = 2380 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 2381 .addReg(SrcReg); 2382 if (IsBitTest) 2383 MIB.addImm(TestBit); 2384 MIB.addMBB(TBB); 2385 2386 finishCondBranch(BI->getParent(), TBB, FBB); 2387 return true; 2388 } 2389 2390 bool AArch64FastISel::selectBranch(const Instruction *I) { 2391 const BranchInst *BI = cast<BranchInst>(I); 2392 if (BI->isUnconditional()) { 2393 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2394 fastEmitBranch(MSucc, BI->getDebugLoc()); 2395 return true; 2396 } 2397 2398 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2399 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2400 2401 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2402 if (CI->hasOneUse() && isValueAvailable(CI)) { 2403 // Try to optimize or fold the cmp. 2404 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2405 switch (Predicate) { 2406 default: 2407 break; 2408 case CmpInst::FCMP_FALSE: 2409 fastEmitBranch(FBB, MIMD.getDL()); 2410 return true; 2411 case CmpInst::FCMP_TRUE: 2412 fastEmitBranch(TBB, MIMD.getDL()); 2413 return true; 2414 } 2415 2416 // Try to emit a combined compare-and-branch first. 2417 if (emitCompareAndBranch(BI)) 2418 return true; 2419 2420 // Try to take advantage of fallthrough opportunities. 2421 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2422 std::swap(TBB, FBB); 2423 Predicate = CmpInst::getInversePredicate(Predicate); 2424 } 2425 2426 // Emit the cmp. 2427 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2428 return false; 2429 2430 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2431 // instruction. 2432 AArch64CC::CondCode CC = getCompareCC(Predicate); 2433 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2434 switch (Predicate) { 2435 default: 2436 break; 2437 case CmpInst::FCMP_UEQ: 2438 ExtraCC = AArch64CC::EQ; 2439 CC = AArch64CC::VS; 2440 break; 2441 case CmpInst::FCMP_ONE: 2442 ExtraCC = AArch64CC::MI; 2443 CC = AArch64CC::GT; 2444 break; 2445 } 2446 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2447 2448 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2449 if (ExtraCC != AArch64CC::AL) { 2450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2451 .addImm(ExtraCC) 2452 .addMBB(TBB); 2453 } 2454 2455 // Emit the branch. 2456 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2457 .addImm(CC) 2458 .addMBB(TBB); 2459 2460 finishCondBranch(BI->getParent(), TBB, FBB); 2461 return true; 2462 } 2463 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2464 uint64_t Imm = CI->getZExtValue(); 2465 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B)) 2467 .addMBB(Target); 2468 2469 // Obtain the branch probability and add the target to the successor list. 2470 if (FuncInfo.BPI) { 2471 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2472 BI->getParent(), Target->getBasicBlock()); 2473 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2474 } else 2475 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2476 return true; 2477 } else { 2478 AArch64CC::CondCode CC = AArch64CC::NE; 2479 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2480 // Fake request the condition, otherwise the intrinsic might be completely 2481 // optimized away. 2482 Register CondReg = getRegForValue(BI->getCondition()); 2483 if (!CondReg) 2484 return false; 2485 2486 // Emit the branch. 2487 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2488 .addImm(CC) 2489 .addMBB(TBB); 2490 2491 finishCondBranch(BI->getParent(), TBB, FBB); 2492 return true; 2493 } 2494 } 2495 2496 Register CondReg = getRegForValue(BI->getCondition()); 2497 if (CondReg == 0) 2498 return false; 2499 2500 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2501 unsigned Opcode = AArch64::TBNZW; 2502 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2503 std::swap(TBB, FBB); 2504 Opcode = AArch64::TBZW; 2505 } 2506 2507 const MCInstrDesc &II = TII.get(Opcode); 2508 Register ConstrainedCondReg 2509 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2510 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2511 .addReg(ConstrainedCondReg) 2512 .addImm(0) 2513 .addMBB(TBB); 2514 2515 finishCondBranch(BI->getParent(), TBB, FBB); 2516 return true; 2517 } 2518 2519 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2520 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2521 Register AddrReg = getRegForValue(BI->getOperand(0)); 2522 if (AddrReg == 0) 2523 return false; 2524 2525 // Emit the indirect branch. 2526 const MCInstrDesc &II = TII.get(AArch64::BR); 2527 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg); 2529 2530 // Make sure the CFG is up-to-date. 2531 for (const auto *Succ : BI->successors()) 2532 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2533 2534 return true; 2535 } 2536 2537 bool AArch64FastISel::selectCmp(const Instruction *I) { 2538 const CmpInst *CI = cast<CmpInst>(I); 2539 2540 // Vectors of i1 are weird: bail out. 2541 if (CI->getType()->isVectorTy()) 2542 return false; 2543 2544 // Try to optimize or fold the cmp. 2545 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2546 unsigned ResultReg = 0; 2547 switch (Predicate) { 2548 default: 2549 break; 2550 case CmpInst::FCMP_FALSE: 2551 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2552 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 2553 TII.get(TargetOpcode::COPY), ResultReg) 2554 .addReg(AArch64::WZR, getKillRegState(true)); 2555 break; 2556 case CmpInst::FCMP_TRUE: 2557 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2558 break; 2559 } 2560 2561 if (ResultReg) { 2562 updateValueMap(I, ResultReg); 2563 return true; 2564 } 2565 2566 // Emit the cmp. 2567 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2568 return false; 2569 2570 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2571 2572 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2573 // condition codes are inverted, because they are used by CSINC. 2574 static unsigned CondCodeTable[2][2] = { 2575 { AArch64CC::NE, AArch64CC::VC }, 2576 { AArch64CC::PL, AArch64CC::LE } 2577 }; 2578 unsigned *CondCodes = nullptr; 2579 switch (Predicate) { 2580 default: 2581 break; 2582 case CmpInst::FCMP_UEQ: 2583 CondCodes = &CondCodeTable[0][0]; 2584 break; 2585 case CmpInst::FCMP_ONE: 2586 CondCodes = &CondCodeTable[1][0]; 2587 break; 2588 } 2589 2590 if (CondCodes) { 2591 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2593 TmpReg1) 2594 .addReg(AArch64::WZR, getKillRegState(true)) 2595 .addReg(AArch64::WZR, getKillRegState(true)) 2596 .addImm(CondCodes[0]); 2597 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2598 ResultReg) 2599 .addReg(TmpReg1, getKillRegState(true)) 2600 .addReg(AArch64::WZR, getKillRegState(true)) 2601 .addImm(CondCodes[1]); 2602 2603 updateValueMap(I, ResultReg); 2604 return true; 2605 } 2606 2607 // Now set a register based on the comparison. 2608 AArch64CC::CondCode CC = getCompareCC(Predicate); 2609 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2610 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2612 ResultReg) 2613 .addReg(AArch64::WZR, getKillRegState(true)) 2614 .addReg(AArch64::WZR, getKillRegState(true)) 2615 .addImm(invertedCC); 2616 2617 updateValueMap(I, ResultReg); 2618 return true; 2619 } 2620 2621 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2622 /// value. 2623 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2624 if (!SI->getType()->isIntegerTy(1)) 2625 return false; 2626 2627 const Value *Src1Val, *Src2Val; 2628 unsigned Opc = 0; 2629 bool NeedExtraOp = false; 2630 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2631 if (CI->isOne()) { 2632 Src1Val = SI->getCondition(); 2633 Src2Val = SI->getFalseValue(); 2634 Opc = AArch64::ORRWrr; 2635 } else { 2636 assert(CI->isZero()); 2637 Src1Val = SI->getFalseValue(); 2638 Src2Val = SI->getCondition(); 2639 Opc = AArch64::BICWrr; 2640 } 2641 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2642 if (CI->isOne()) { 2643 Src1Val = SI->getCondition(); 2644 Src2Val = SI->getTrueValue(); 2645 Opc = AArch64::ORRWrr; 2646 NeedExtraOp = true; 2647 } else { 2648 assert(CI->isZero()); 2649 Src1Val = SI->getCondition(); 2650 Src2Val = SI->getTrueValue(); 2651 Opc = AArch64::ANDWrr; 2652 } 2653 } 2654 2655 if (!Opc) 2656 return false; 2657 2658 Register Src1Reg = getRegForValue(Src1Val); 2659 if (!Src1Reg) 2660 return false; 2661 2662 Register Src2Reg = getRegForValue(Src2Val); 2663 if (!Src2Reg) 2664 return false; 2665 2666 if (NeedExtraOp) 2667 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1); 2668 2669 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2670 Src2Reg); 2671 updateValueMap(SI, ResultReg); 2672 return true; 2673 } 2674 2675 bool AArch64FastISel::selectSelect(const Instruction *I) { 2676 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2677 MVT VT; 2678 if (!isTypeSupported(I->getType(), VT)) 2679 return false; 2680 2681 unsigned Opc; 2682 const TargetRegisterClass *RC; 2683 switch (VT.SimpleTy) { 2684 default: 2685 return false; 2686 case MVT::i1: 2687 case MVT::i8: 2688 case MVT::i16: 2689 case MVT::i32: 2690 Opc = AArch64::CSELWr; 2691 RC = &AArch64::GPR32RegClass; 2692 break; 2693 case MVT::i64: 2694 Opc = AArch64::CSELXr; 2695 RC = &AArch64::GPR64RegClass; 2696 break; 2697 case MVT::f32: 2698 Opc = AArch64::FCSELSrrr; 2699 RC = &AArch64::FPR32RegClass; 2700 break; 2701 case MVT::f64: 2702 Opc = AArch64::FCSELDrrr; 2703 RC = &AArch64::FPR64RegClass; 2704 break; 2705 } 2706 2707 const SelectInst *SI = cast<SelectInst>(I); 2708 const Value *Cond = SI->getCondition(); 2709 AArch64CC::CondCode CC = AArch64CC::NE; 2710 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2711 2712 if (optimizeSelect(SI)) 2713 return true; 2714 2715 // Try to pickup the flags, so we don't have to emit another compare. 2716 if (foldXALUIntrinsic(CC, I, Cond)) { 2717 // Fake request the condition to force emission of the XALU intrinsic. 2718 Register CondReg = getRegForValue(Cond); 2719 if (!CondReg) 2720 return false; 2721 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2722 isValueAvailable(Cond)) { 2723 const auto *Cmp = cast<CmpInst>(Cond); 2724 // Try to optimize or fold the cmp. 2725 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2726 const Value *FoldSelect = nullptr; 2727 switch (Predicate) { 2728 default: 2729 break; 2730 case CmpInst::FCMP_FALSE: 2731 FoldSelect = SI->getFalseValue(); 2732 break; 2733 case CmpInst::FCMP_TRUE: 2734 FoldSelect = SI->getTrueValue(); 2735 break; 2736 } 2737 2738 if (FoldSelect) { 2739 Register SrcReg = getRegForValue(FoldSelect); 2740 if (!SrcReg) 2741 return false; 2742 2743 updateValueMap(I, SrcReg); 2744 return true; 2745 } 2746 2747 // Emit the cmp. 2748 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2749 return false; 2750 2751 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2752 CC = getCompareCC(Predicate); 2753 switch (Predicate) { 2754 default: 2755 break; 2756 case CmpInst::FCMP_UEQ: 2757 ExtraCC = AArch64CC::EQ; 2758 CC = AArch64CC::VS; 2759 break; 2760 case CmpInst::FCMP_ONE: 2761 ExtraCC = AArch64CC::MI; 2762 CC = AArch64CC::GT; 2763 break; 2764 } 2765 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2766 } else { 2767 Register CondReg = getRegForValue(Cond); 2768 if (!CondReg) 2769 return false; 2770 2771 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2772 CondReg = constrainOperandRegClass(II, CondReg, 1); 2773 2774 // Emit a TST instruction (ANDS wzr, reg, #imm). 2775 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, 2776 AArch64::WZR) 2777 .addReg(CondReg) 2778 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2779 } 2780 2781 Register Src1Reg = getRegForValue(SI->getTrueValue()); 2782 Register Src2Reg = getRegForValue(SI->getFalseValue()); 2783 2784 if (!Src1Reg || !Src2Reg) 2785 return false; 2786 2787 if (ExtraCC != AArch64CC::AL) 2788 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC); 2789 2790 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC); 2791 updateValueMap(I, ResultReg); 2792 return true; 2793 } 2794 2795 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2796 Value *V = I->getOperand(0); 2797 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2798 return false; 2799 2800 Register Op = getRegForValue(V); 2801 if (Op == 0) 2802 return false; 2803 2804 Register ResultReg = createResultReg(&AArch64::FPR64RegClass); 2805 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr), 2806 ResultReg).addReg(Op); 2807 updateValueMap(I, ResultReg); 2808 return true; 2809 } 2810 2811 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2812 Value *V = I->getOperand(0); 2813 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2814 return false; 2815 2816 Register Op = getRegForValue(V); 2817 if (Op == 0) 2818 return false; 2819 2820 Register ResultReg = createResultReg(&AArch64::FPR32RegClass); 2821 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr), 2822 ResultReg).addReg(Op); 2823 updateValueMap(I, ResultReg); 2824 return true; 2825 } 2826 2827 // FPToUI and FPToSI 2828 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2829 MVT DestVT; 2830 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2831 return false; 2832 2833 Register SrcReg = getRegForValue(I->getOperand(0)); 2834 if (SrcReg == 0) 2835 return false; 2836 2837 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2838 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2839 return false; 2840 2841 unsigned Opc; 2842 if (SrcVT == MVT::f64) { 2843 if (Signed) 2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2845 else 2846 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2847 } else { 2848 if (Signed) 2849 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2850 else 2851 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2852 } 2853 Register ResultReg = createResultReg( 2854 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2855 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 2856 .addReg(SrcReg); 2857 updateValueMap(I, ResultReg); 2858 return true; 2859 } 2860 2861 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2862 MVT DestVT; 2863 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2864 return false; 2865 // Let regular ISEL handle FP16 2866 if (DestVT == MVT::f16) 2867 return false; 2868 2869 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2870 "Unexpected value type."); 2871 2872 Register SrcReg = getRegForValue(I->getOperand(0)); 2873 if (!SrcReg) 2874 return false; 2875 2876 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2877 2878 // Handle sign-extension. 2879 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2880 SrcReg = 2881 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2882 if (!SrcReg) 2883 return false; 2884 } 2885 2886 unsigned Opc; 2887 if (SrcVT == MVT::i64) { 2888 if (Signed) 2889 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2890 else 2891 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2892 } else { 2893 if (Signed) 2894 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2895 else 2896 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2897 } 2898 2899 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg); 2900 updateValueMap(I, ResultReg); 2901 return true; 2902 } 2903 2904 bool AArch64FastISel::fastLowerArguments() { 2905 if (!FuncInfo.CanLowerReturn) 2906 return false; 2907 2908 const Function *F = FuncInfo.Fn; 2909 if (F->isVarArg()) 2910 return false; 2911 2912 CallingConv::ID CC = F->getCallingConv(); 2913 if (CC != CallingConv::C && CC != CallingConv::Swift) 2914 return false; 2915 2916 if (Subtarget->hasCustomCallingConv()) 2917 return false; 2918 2919 // Only handle simple cases of up to 8 GPR and FPR each. 2920 unsigned GPRCnt = 0; 2921 unsigned FPRCnt = 0; 2922 for (auto const &Arg : F->args()) { 2923 if (Arg.hasAttribute(Attribute::ByVal) || 2924 Arg.hasAttribute(Attribute::InReg) || 2925 Arg.hasAttribute(Attribute::StructRet) || 2926 Arg.hasAttribute(Attribute::SwiftSelf) || 2927 Arg.hasAttribute(Attribute::SwiftAsync) || 2928 Arg.hasAttribute(Attribute::SwiftError) || 2929 Arg.hasAttribute(Attribute::Nest)) 2930 return false; 2931 2932 Type *ArgTy = Arg.getType(); 2933 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2934 return false; 2935 2936 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2937 if (!ArgVT.isSimple()) 2938 return false; 2939 2940 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2941 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2942 return false; 2943 2944 if (VT.isVector() && 2945 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2946 return false; 2947 2948 if (VT >= MVT::i1 && VT <= MVT::i64) 2949 ++GPRCnt; 2950 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2951 VT.is128BitVector()) 2952 ++FPRCnt; 2953 else 2954 return false; 2955 2956 if (GPRCnt > 8 || FPRCnt > 8) 2957 return false; 2958 } 2959 2960 static const MCPhysReg Registers[6][8] = { 2961 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2962 AArch64::W5, AArch64::W6, AArch64::W7 }, 2963 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2964 AArch64::X5, AArch64::X6, AArch64::X7 }, 2965 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2966 AArch64::H5, AArch64::H6, AArch64::H7 }, 2967 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2968 AArch64::S5, AArch64::S6, AArch64::S7 }, 2969 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2970 AArch64::D5, AArch64::D6, AArch64::D7 }, 2971 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2972 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2973 }; 2974 2975 unsigned GPRIdx = 0; 2976 unsigned FPRIdx = 0; 2977 for (auto const &Arg : F->args()) { 2978 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2979 unsigned SrcReg; 2980 const TargetRegisterClass *RC; 2981 if (VT >= MVT::i1 && VT <= MVT::i32) { 2982 SrcReg = Registers[0][GPRIdx++]; 2983 RC = &AArch64::GPR32RegClass; 2984 VT = MVT::i32; 2985 } else if (VT == MVT::i64) { 2986 SrcReg = Registers[1][GPRIdx++]; 2987 RC = &AArch64::GPR64RegClass; 2988 } else if (VT == MVT::f16) { 2989 SrcReg = Registers[2][FPRIdx++]; 2990 RC = &AArch64::FPR16RegClass; 2991 } else if (VT == MVT::f32) { 2992 SrcReg = Registers[3][FPRIdx++]; 2993 RC = &AArch64::FPR32RegClass; 2994 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2995 SrcReg = Registers[4][FPRIdx++]; 2996 RC = &AArch64::FPR64RegClass; 2997 } else if (VT.is128BitVector()) { 2998 SrcReg = Registers[5][FPRIdx++]; 2999 RC = &AArch64::FPR128RegClass; 3000 } else 3001 llvm_unreachable("Unexpected value type."); 3002 3003 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 3004 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 3005 // Without this, EmitLiveInCopies may eliminate the livein if its only 3006 // use is a bitcast (which isn't turned into an instruction). 3007 Register ResultReg = createResultReg(RC); 3008 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3009 TII.get(TargetOpcode::COPY), ResultReg) 3010 .addReg(DstReg, getKillRegState(true)); 3011 updateValueMap(&Arg, ResultReg); 3012 } 3013 return true; 3014 } 3015 3016 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3017 SmallVectorImpl<MVT> &OutVTs, 3018 unsigned &NumBytes) { 3019 CallingConv::ID CC = CLI.CallConv; 3020 SmallVector<CCValAssign, 16> ArgLocs; 3021 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3022 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3023 3024 // Get a count of how many bytes are to be pushed on the stack. 3025 NumBytes = CCInfo.getStackSize(); 3026 3027 // Issue CALLSEQ_START 3028 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown)) 3030 .addImm(NumBytes).addImm(0); 3031 3032 // Process the args. 3033 for (CCValAssign &VA : ArgLocs) { 3034 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3035 MVT ArgVT = OutVTs[VA.getValNo()]; 3036 3037 Register ArgReg = getRegForValue(ArgVal); 3038 if (!ArgReg) 3039 return false; 3040 3041 // Handle arg promotion: SExt, ZExt, AExt. 3042 switch (VA.getLocInfo()) { 3043 case CCValAssign::Full: 3044 break; 3045 case CCValAssign::SExt: { 3046 MVT DestVT = VA.getLocVT(); 3047 MVT SrcVT = ArgVT; 3048 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3049 if (!ArgReg) 3050 return false; 3051 break; 3052 } 3053 case CCValAssign::AExt: 3054 // Intentional fall-through. 3055 case CCValAssign::ZExt: { 3056 MVT DestVT = VA.getLocVT(); 3057 MVT SrcVT = ArgVT; 3058 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3059 if (!ArgReg) 3060 return false; 3061 break; 3062 } 3063 default: 3064 llvm_unreachable("Unknown arg promotion!"); 3065 } 3066 3067 // Now copy/store arg to correct locations. 3068 if (VA.isRegLoc() && !VA.needsCustom()) { 3069 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3070 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3071 CLI.OutRegs.push_back(VA.getLocReg()); 3072 } else if (VA.needsCustom()) { 3073 // FIXME: Handle custom args. 3074 return false; 3075 } else { 3076 assert(VA.isMemLoc() && "Assuming store on stack."); 3077 3078 // Don't emit stores for undef values. 3079 if (isa<UndefValue>(ArgVal)) 3080 continue; 3081 3082 // Need to store on the stack. 3083 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3084 3085 unsigned BEAlign = 0; 3086 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3087 BEAlign = 8 - ArgSize; 3088 3089 Address Addr; 3090 Addr.setKind(Address::RegBase); 3091 Addr.setReg(AArch64::SP); 3092 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3093 3094 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3095 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3096 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3097 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3098 3099 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3100 return false; 3101 } 3102 } 3103 return true; 3104 } 3105 3106 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) { 3107 CallingConv::ID CC = CLI.CallConv; 3108 3109 // Issue CALLSEQ_END 3110 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3111 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp)) 3112 .addImm(NumBytes).addImm(0); 3113 3114 // Now the return values. 3115 SmallVector<CCValAssign, 16> RVLocs; 3116 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3117 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC)); 3118 3119 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy); 3120 for (unsigned i = 0; i != RVLocs.size(); ++i) { 3121 CCValAssign &VA = RVLocs[i]; 3122 MVT CopyVT = VA.getValVT(); 3123 unsigned CopyReg = ResultReg + i; 3124 3125 // TODO: Handle big-endian results 3126 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3127 return false; 3128 3129 // Copy result out of their specified physreg. 3130 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 3131 CopyReg) 3132 .addReg(VA.getLocReg()); 3133 CLI.InRegs.push_back(VA.getLocReg()); 3134 } 3135 3136 CLI.ResultReg = ResultReg; 3137 CLI.NumResultRegs = RVLocs.size(); 3138 3139 return true; 3140 } 3141 3142 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3143 CallingConv::ID CC = CLI.CallConv; 3144 bool IsTailCall = CLI.IsTailCall; 3145 bool IsVarArg = CLI.IsVarArg; 3146 const Value *Callee = CLI.Callee; 3147 MCSymbol *Symbol = CLI.Symbol; 3148 3149 if (!Callee && !Symbol) 3150 return false; 3151 3152 // Allow SelectionDAG isel to handle calls to functions like setjmp that need 3153 // a bti instruction following the call. 3154 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && 3155 !Subtarget->noBTIAtReturnTwice() && 3156 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 3157 return false; 3158 3159 // Allow SelectionDAG isel to handle indirect calls with KCFI checks. 3160 if (CLI.CB && CLI.CB->isIndirectCall() && 3161 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi)) 3162 return false; 3163 3164 // Allow SelectionDAG isel to handle tail calls. 3165 if (IsTailCall) 3166 return false; 3167 3168 // FIXME: we could and should support this, but for now correctness at -O0 is 3169 // more important. 3170 if (Subtarget->isTargetILP32()) 3171 return false; 3172 3173 CodeModel::Model CM = TM.getCodeModel(); 3174 // Only support the small-addressing and large code models. 3175 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3176 return false; 3177 3178 // FIXME: Add large code model support for ELF. 3179 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3180 return false; 3181 3182 // Let SDISel handle vararg functions. 3183 if (IsVarArg) 3184 return false; 3185 3186 for (auto Flag : CLI.OutFlags) 3187 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3188 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) 3189 return false; 3190 3191 // Set up the argument vectors. 3192 SmallVector<MVT, 16> OutVTs; 3193 OutVTs.reserve(CLI.OutVals.size()); 3194 3195 for (auto *Val : CLI.OutVals) { 3196 MVT VT; 3197 if (!isTypeLegal(Val->getType(), VT) && 3198 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3199 return false; 3200 3201 // We don't handle vector parameters yet. 3202 if (VT.isVector() || VT.getSizeInBits() > 64) 3203 return false; 3204 3205 OutVTs.push_back(VT); 3206 } 3207 3208 Address Addr; 3209 if (Callee && !computeCallAddress(Callee, Addr)) 3210 return false; 3211 3212 // The weak function target may be zero; in that case we must use indirect 3213 // addressing via a stub on windows as it may be out of range for a 3214 // PC-relative jump. 3215 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3216 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3217 return false; 3218 3219 // Handle the arguments now that we've gotten them. 3220 unsigned NumBytes; 3221 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3222 return false; 3223 3224 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3225 if (RegInfo->isAnyArgRegReserved(*MF)) 3226 RegInfo->emitReservedArgRegCallError(*MF); 3227 3228 // Issue the call. 3229 MachineInstrBuilder MIB; 3230 if (Subtarget->useSmallAddressing()) { 3231 const MCInstrDesc &II = 3232 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3233 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II); 3234 if (Symbol) 3235 MIB.addSym(Symbol, 0); 3236 else if (Addr.getGlobalValue()) 3237 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3238 else if (Addr.getReg()) { 3239 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3240 MIB.addReg(Reg); 3241 } else 3242 return false; 3243 } else { 3244 unsigned CallReg = 0; 3245 if (Symbol) { 3246 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3247 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 3248 ADRPReg) 3249 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3250 3251 CallReg = createResultReg(&AArch64::GPR64RegClass); 3252 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3253 TII.get(AArch64::LDRXui), CallReg) 3254 .addReg(ADRPReg) 3255 .addSym(Symbol, 3256 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3257 } else if (Addr.getGlobalValue()) 3258 CallReg = materializeGV(Addr.getGlobalValue()); 3259 else if (Addr.getReg()) 3260 CallReg = Addr.getReg(); 3261 3262 if (!CallReg) 3263 return false; 3264 3265 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3266 CallReg = constrainOperandRegClass(II, CallReg, 0); 3267 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg); 3268 } 3269 3270 // Add implicit physical register uses to the call. 3271 for (auto Reg : CLI.OutRegs) 3272 MIB.addReg(Reg, RegState::Implicit); 3273 3274 // Add a register mask with the call-preserved registers. 3275 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3276 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3277 3278 CLI.Call = MIB; 3279 3280 // Finish off the call including any return values. 3281 return finishCall(CLI, NumBytes); 3282 } 3283 3284 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) { 3285 if (Alignment) 3286 return Len / Alignment->value() <= 4; 3287 else 3288 return Len < 32; 3289 } 3290 3291 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3292 uint64_t Len, MaybeAlign Alignment) { 3293 // Make sure we don't bloat code by inlining very large memcpy's. 3294 if (!isMemCpySmall(Len, Alignment)) 3295 return false; 3296 3297 int64_t UnscaledOffset = 0; 3298 Address OrigDest = Dest; 3299 Address OrigSrc = Src; 3300 3301 while (Len) { 3302 MVT VT; 3303 if (!Alignment || *Alignment >= 8) { 3304 if (Len >= 8) 3305 VT = MVT::i64; 3306 else if (Len >= 4) 3307 VT = MVT::i32; 3308 else if (Len >= 2) 3309 VT = MVT::i16; 3310 else { 3311 VT = MVT::i8; 3312 } 3313 } else { 3314 assert(Alignment && "Alignment is set in this branch"); 3315 // Bound based on alignment. 3316 if (Len >= 4 && *Alignment == 4) 3317 VT = MVT::i32; 3318 else if (Len >= 2 && *Alignment == 2) 3319 VT = MVT::i16; 3320 else { 3321 VT = MVT::i8; 3322 } 3323 } 3324 3325 unsigned ResultReg = emitLoad(VT, VT, Src); 3326 if (!ResultReg) 3327 return false; 3328 3329 if (!emitStore(VT, ResultReg, Dest)) 3330 return false; 3331 3332 int64_t Size = VT.getSizeInBits() / 8; 3333 Len -= Size; 3334 UnscaledOffset += Size; 3335 3336 // We need to recompute the unscaled offset for each iteration. 3337 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3338 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3339 } 3340 3341 return true; 3342 } 3343 3344 /// Check if it is possible to fold the condition from the XALU intrinsic 3345 /// into the user. The condition code will only be updated on success. 3346 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3347 const Instruction *I, 3348 const Value *Cond) { 3349 if (!isa<ExtractValueInst>(Cond)) 3350 return false; 3351 3352 const auto *EV = cast<ExtractValueInst>(Cond); 3353 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3354 return false; 3355 3356 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3357 MVT RetVT; 3358 const Function *Callee = II->getCalledFunction(); 3359 Type *RetTy = 3360 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3361 if (!isTypeLegal(RetTy, RetVT)) 3362 return false; 3363 3364 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3365 return false; 3366 3367 const Value *LHS = II->getArgOperand(0); 3368 const Value *RHS = II->getArgOperand(1); 3369 3370 // Canonicalize immediate to the RHS. 3371 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3372 std::swap(LHS, RHS); 3373 3374 // Simplify multiplies. 3375 Intrinsic::ID IID = II->getIntrinsicID(); 3376 switch (IID) { 3377 default: 3378 break; 3379 case Intrinsic::smul_with_overflow: 3380 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3381 if (C->getValue() == 2) 3382 IID = Intrinsic::sadd_with_overflow; 3383 break; 3384 case Intrinsic::umul_with_overflow: 3385 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3386 if (C->getValue() == 2) 3387 IID = Intrinsic::uadd_with_overflow; 3388 break; 3389 } 3390 3391 AArch64CC::CondCode TmpCC; 3392 switch (IID) { 3393 default: 3394 return false; 3395 case Intrinsic::sadd_with_overflow: 3396 case Intrinsic::ssub_with_overflow: 3397 TmpCC = AArch64CC::VS; 3398 break; 3399 case Intrinsic::uadd_with_overflow: 3400 TmpCC = AArch64CC::HS; 3401 break; 3402 case Intrinsic::usub_with_overflow: 3403 TmpCC = AArch64CC::LO; 3404 break; 3405 case Intrinsic::smul_with_overflow: 3406 case Intrinsic::umul_with_overflow: 3407 TmpCC = AArch64CC::NE; 3408 break; 3409 } 3410 3411 // Check if both instructions are in the same basic block. 3412 if (!isValueAvailable(II)) 3413 return false; 3414 3415 // Make sure nothing is in the way 3416 BasicBlock::const_iterator Start(I); 3417 BasicBlock::const_iterator End(II); 3418 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3419 // We only expect extractvalue instructions between the intrinsic and the 3420 // instruction to be selected. 3421 if (!isa<ExtractValueInst>(Itr)) 3422 return false; 3423 3424 // Check that the extractvalue operand comes from the intrinsic. 3425 const auto *EVI = cast<ExtractValueInst>(Itr); 3426 if (EVI->getAggregateOperand() != II) 3427 return false; 3428 } 3429 3430 CC = TmpCC; 3431 return true; 3432 } 3433 3434 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3435 // FIXME: Handle more intrinsics. 3436 switch (II->getIntrinsicID()) { 3437 default: return false; 3438 case Intrinsic::frameaddress: { 3439 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3440 MFI.setFrameAddressIsTaken(true); 3441 3442 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3443 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3444 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3445 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3446 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3447 // Recursively load frame address 3448 // ldr x0, [fp] 3449 // ldr x0, [x0] 3450 // ldr x0, [x0] 3451 // ... 3452 unsigned DestReg; 3453 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3454 while (Depth--) { 3455 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3456 SrcReg, 0); 3457 assert(DestReg && "Unexpected LDR instruction emission failure."); 3458 SrcReg = DestReg; 3459 } 3460 3461 updateValueMap(II, SrcReg); 3462 return true; 3463 } 3464 case Intrinsic::sponentry: { 3465 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3466 3467 // SP = FP + Fixed Object + 16 3468 int FI = MFI.CreateFixedObject(4, 0, false); 3469 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3470 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3471 TII.get(AArch64::ADDXri), ResultReg) 3472 .addFrameIndex(FI) 3473 .addImm(0) 3474 .addImm(0); 3475 3476 updateValueMap(II, ResultReg); 3477 return true; 3478 } 3479 case Intrinsic::memcpy: 3480 case Intrinsic::memmove: { 3481 const auto *MTI = cast<MemTransferInst>(II); 3482 // Don't handle volatile. 3483 if (MTI->isVolatile()) 3484 return false; 3485 3486 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3487 // we would emit dead code because we don't currently handle memmoves. 3488 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3489 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3490 // Small memcpy's are common enough that we want to do them without a call 3491 // if possible. 3492 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3493 MaybeAlign Alignment; 3494 if (MTI->getDestAlign() || MTI->getSourceAlign()) 3495 Alignment = std::min(MTI->getDestAlign().valueOrOne(), 3496 MTI->getSourceAlign().valueOrOne()); 3497 if (isMemCpySmall(Len, Alignment)) { 3498 Address Dest, Src; 3499 if (!computeAddress(MTI->getRawDest(), Dest) || 3500 !computeAddress(MTI->getRawSource(), Src)) 3501 return false; 3502 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3503 return true; 3504 } 3505 } 3506 3507 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3508 return false; 3509 3510 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3511 // Fast instruction selection doesn't support the special 3512 // address spaces. 3513 return false; 3514 3515 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3516 return lowerCallTo(II, IntrMemName, II->arg_size() - 1); 3517 } 3518 case Intrinsic::memset: { 3519 const MemSetInst *MSI = cast<MemSetInst>(II); 3520 // Don't handle volatile. 3521 if (MSI->isVolatile()) 3522 return false; 3523 3524 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3525 return false; 3526 3527 if (MSI->getDestAddressSpace() > 255) 3528 // Fast instruction selection doesn't support the special 3529 // address spaces. 3530 return false; 3531 3532 return lowerCallTo(II, "memset", II->arg_size() - 1); 3533 } 3534 case Intrinsic::sin: 3535 case Intrinsic::cos: 3536 case Intrinsic::pow: { 3537 MVT RetVT; 3538 if (!isTypeLegal(II->getType(), RetVT)) 3539 return false; 3540 3541 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3542 return false; 3543 3544 static const RTLIB::Libcall LibCallTable[3][2] = { 3545 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3546 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3547 { RTLIB::POW_F32, RTLIB::POW_F64 } 3548 }; 3549 RTLIB::Libcall LC; 3550 bool Is64Bit = RetVT == MVT::f64; 3551 switch (II->getIntrinsicID()) { 3552 default: 3553 llvm_unreachable("Unexpected intrinsic."); 3554 case Intrinsic::sin: 3555 LC = LibCallTable[0][Is64Bit]; 3556 break; 3557 case Intrinsic::cos: 3558 LC = LibCallTable[1][Is64Bit]; 3559 break; 3560 case Intrinsic::pow: 3561 LC = LibCallTable[2][Is64Bit]; 3562 break; 3563 } 3564 3565 ArgListTy Args; 3566 Args.reserve(II->arg_size()); 3567 3568 // Populate the argument list. 3569 for (auto &Arg : II->args()) { 3570 ArgListEntry Entry; 3571 Entry.Val = Arg; 3572 Entry.Ty = Arg->getType(); 3573 Args.push_back(Entry); 3574 } 3575 3576 CallLoweringInfo CLI; 3577 MCContext &Ctx = MF->getContext(); 3578 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3579 TLI.getLibcallName(LC), std::move(Args)); 3580 if (!lowerCallTo(CLI)) 3581 return false; 3582 updateValueMap(II, CLI.ResultReg); 3583 return true; 3584 } 3585 case Intrinsic::fabs: { 3586 MVT VT; 3587 if (!isTypeLegal(II->getType(), VT)) 3588 return false; 3589 3590 unsigned Opc; 3591 switch (VT.SimpleTy) { 3592 default: 3593 return false; 3594 case MVT::f32: 3595 Opc = AArch64::FABSSr; 3596 break; 3597 case MVT::f64: 3598 Opc = AArch64::FABSDr; 3599 break; 3600 } 3601 Register SrcReg = getRegForValue(II->getOperand(0)); 3602 if (!SrcReg) 3603 return false; 3604 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3605 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 3606 .addReg(SrcReg); 3607 updateValueMap(II, ResultReg); 3608 return true; 3609 } 3610 case Intrinsic::trap: 3611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3612 .addImm(1); 3613 return true; 3614 case Intrinsic::debugtrap: 3615 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3616 .addImm(0xF000); 3617 return true; 3618 3619 case Intrinsic::sqrt: { 3620 Type *RetTy = II->getCalledFunction()->getReturnType(); 3621 3622 MVT VT; 3623 if (!isTypeLegal(RetTy, VT)) 3624 return false; 3625 3626 Register Op0Reg = getRegForValue(II->getOperand(0)); 3627 if (!Op0Reg) 3628 return false; 3629 3630 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg); 3631 if (!ResultReg) 3632 return false; 3633 3634 updateValueMap(II, ResultReg); 3635 return true; 3636 } 3637 case Intrinsic::sadd_with_overflow: 3638 case Intrinsic::uadd_with_overflow: 3639 case Intrinsic::ssub_with_overflow: 3640 case Intrinsic::usub_with_overflow: 3641 case Intrinsic::smul_with_overflow: 3642 case Intrinsic::umul_with_overflow: { 3643 // This implements the basic lowering of the xalu with overflow intrinsics. 3644 const Function *Callee = II->getCalledFunction(); 3645 auto *Ty = cast<StructType>(Callee->getReturnType()); 3646 Type *RetTy = Ty->getTypeAtIndex(0U); 3647 3648 MVT VT; 3649 if (!isTypeLegal(RetTy, VT)) 3650 return false; 3651 3652 if (VT != MVT::i32 && VT != MVT::i64) 3653 return false; 3654 3655 const Value *LHS = II->getArgOperand(0); 3656 const Value *RHS = II->getArgOperand(1); 3657 // Canonicalize immediate to the RHS. 3658 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3659 std::swap(LHS, RHS); 3660 3661 // Simplify multiplies. 3662 Intrinsic::ID IID = II->getIntrinsicID(); 3663 switch (IID) { 3664 default: 3665 break; 3666 case Intrinsic::smul_with_overflow: 3667 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3668 if (C->getValue() == 2) { 3669 IID = Intrinsic::sadd_with_overflow; 3670 RHS = LHS; 3671 } 3672 break; 3673 case Intrinsic::umul_with_overflow: 3674 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3675 if (C->getValue() == 2) { 3676 IID = Intrinsic::uadd_with_overflow; 3677 RHS = LHS; 3678 } 3679 break; 3680 } 3681 3682 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3683 AArch64CC::CondCode CC = AArch64CC::Invalid; 3684 switch (IID) { 3685 default: llvm_unreachable("Unexpected intrinsic!"); 3686 case Intrinsic::sadd_with_overflow: 3687 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3688 CC = AArch64CC::VS; 3689 break; 3690 case Intrinsic::uadd_with_overflow: 3691 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3692 CC = AArch64CC::HS; 3693 break; 3694 case Intrinsic::ssub_with_overflow: 3695 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3696 CC = AArch64CC::VS; 3697 break; 3698 case Intrinsic::usub_with_overflow: 3699 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3700 CC = AArch64CC::LO; 3701 break; 3702 case Intrinsic::smul_with_overflow: { 3703 CC = AArch64CC::NE; 3704 Register LHSReg = getRegForValue(LHS); 3705 if (!LHSReg) 3706 return false; 3707 3708 Register RHSReg = getRegForValue(RHS); 3709 if (!RHSReg) 3710 return false; 3711 3712 if (VT == MVT::i32) { 3713 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg); 3714 Register MulSubReg = 3715 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3716 // cmp xreg, wreg, sxtw 3717 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg, 3718 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, 3719 /*WantResult=*/false); 3720 MulReg = MulSubReg; 3721 } else { 3722 assert(VT == MVT::i64 && "Unexpected value type."); 3723 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3724 // reused in the next instruction. 3725 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3726 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg); 3727 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63, 3728 /*WantResult=*/false); 3729 } 3730 break; 3731 } 3732 case Intrinsic::umul_with_overflow: { 3733 CC = AArch64CC::NE; 3734 Register LHSReg = getRegForValue(LHS); 3735 if (!LHSReg) 3736 return false; 3737 3738 Register RHSReg = getRegForValue(RHS); 3739 if (!RHSReg) 3740 return false; 3741 3742 if (VT == MVT::i32) { 3743 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg); 3744 // tst xreg, #0xffffffff00000000 3745 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3746 TII.get(AArch64::ANDSXri), AArch64::XZR) 3747 .addReg(MulReg) 3748 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64)); 3749 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3750 } else { 3751 assert(VT == MVT::i64 && "Unexpected value type."); 3752 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3753 // reused in the next instruction. 3754 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3755 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg); 3756 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false); 3757 } 3758 break; 3759 } 3760 } 3761 3762 if (MulReg) { 3763 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3764 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3765 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3766 } 3767 3768 if (!ResultReg1) 3769 return false; 3770 3771 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3772 AArch64::WZR, AArch64::WZR, 3773 getInvertedCondCode(CC)); 3774 (void)ResultReg2; 3775 assert((ResultReg1 + 1) == ResultReg2 && 3776 "Nonconsecutive result registers."); 3777 updateValueMap(II, ResultReg1, 2); 3778 return true; 3779 } 3780 case Intrinsic::aarch64_crc32b: 3781 case Intrinsic::aarch64_crc32h: 3782 case Intrinsic::aarch64_crc32w: 3783 case Intrinsic::aarch64_crc32x: 3784 case Intrinsic::aarch64_crc32cb: 3785 case Intrinsic::aarch64_crc32ch: 3786 case Intrinsic::aarch64_crc32cw: 3787 case Intrinsic::aarch64_crc32cx: { 3788 if (!Subtarget->hasCRC()) 3789 return false; 3790 3791 unsigned Opc; 3792 switch (II->getIntrinsicID()) { 3793 default: 3794 llvm_unreachable("Unexpected intrinsic!"); 3795 case Intrinsic::aarch64_crc32b: 3796 Opc = AArch64::CRC32Brr; 3797 break; 3798 case Intrinsic::aarch64_crc32h: 3799 Opc = AArch64::CRC32Hrr; 3800 break; 3801 case Intrinsic::aarch64_crc32w: 3802 Opc = AArch64::CRC32Wrr; 3803 break; 3804 case Intrinsic::aarch64_crc32x: 3805 Opc = AArch64::CRC32Xrr; 3806 break; 3807 case Intrinsic::aarch64_crc32cb: 3808 Opc = AArch64::CRC32CBrr; 3809 break; 3810 case Intrinsic::aarch64_crc32ch: 3811 Opc = AArch64::CRC32CHrr; 3812 break; 3813 case Intrinsic::aarch64_crc32cw: 3814 Opc = AArch64::CRC32CWrr; 3815 break; 3816 case Intrinsic::aarch64_crc32cx: 3817 Opc = AArch64::CRC32CXrr; 3818 break; 3819 } 3820 3821 Register LHSReg = getRegForValue(II->getArgOperand(0)); 3822 Register RHSReg = getRegForValue(II->getArgOperand(1)); 3823 if (!LHSReg || !RHSReg) 3824 return false; 3825 3826 Register ResultReg = 3827 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg); 3828 updateValueMap(II, ResultReg); 3829 return true; 3830 } 3831 } 3832 return false; 3833 } 3834 3835 bool AArch64FastISel::selectRet(const Instruction *I) { 3836 const ReturnInst *Ret = cast<ReturnInst>(I); 3837 const Function &F = *I->getParent()->getParent(); 3838 3839 if (!FuncInfo.CanLowerReturn) 3840 return false; 3841 3842 if (F.isVarArg()) 3843 return false; 3844 3845 if (TLI.supportSwiftError() && 3846 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3847 return false; 3848 3849 if (TLI.supportSplitCSR(FuncInfo.MF)) 3850 return false; 3851 3852 // Build a list of return value registers. 3853 SmallVector<unsigned, 4> RetRegs; 3854 3855 if (Ret->getNumOperands() > 0) { 3856 CallingConv::ID CC = F.getCallingConv(); 3857 SmallVector<ISD::OutputArg, 4> Outs; 3858 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3859 3860 // Analyze operands of the call, assigning locations to each operand. 3861 SmallVector<CCValAssign, 16> ValLocs; 3862 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3863 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3864 : RetCC_AArch64_AAPCS; 3865 CCInfo.AnalyzeReturn(Outs, RetCC); 3866 3867 // Only handle a single return value for now. 3868 if (ValLocs.size() != 1) 3869 return false; 3870 3871 CCValAssign &VA = ValLocs[0]; 3872 const Value *RV = Ret->getOperand(0); 3873 3874 // Don't bother handling odd stuff for now. 3875 if ((VA.getLocInfo() != CCValAssign::Full) && 3876 (VA.getLocInfo() != CCValAssign::BCvt)) 3877 return false; 3878 3879 // Only handle register returns for now. 3880 if (!VA.isRegLoc()) 3881 return false; 3882 3883 Register Reg = getRegForValue(RV); 3884 if (Reg == 0) 3885 return false; 3886 3887 unsigned SrcReg = Reg + VA.getValNo(); 3888 Register DestReg = VA.getLocReg(); 3889 // Avoid a cross-class copy. This is very unlikely. 3890 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3891 return false; 3892 3893 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3894 if (!RVEVT.isSimple()) 3895 return false; 3896 3897 // Vectors (of > 1 lane) in big endian need tricky handling. 3898 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && 3899 !Subtarget->isLittleEndian()) 3900 return false; 3901 3902 MVT RVVT = RVEVT.getSimpleVT(); 3903 if (RVVT == MVT::f128) 3904 return false; 3905 3906 MVT DestVT = VA.getValVT(); 3907 // Special handling for extended integers. 3908 if (RVVT != DestVT) { 3909 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3910 return false; 3911 3912 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3913 return false; 3914 3915 bool IsZExt = Outs[0].Flags.isZExt(); 3916 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3917 if (SrcReg == 0) 3918 return false; 3919 } 3920 3921 // "Callee" (i.e. value producer) zero extends pointers at function 3922 // boundary. 3923 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3924 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff); 3925 3926 // Make the copy. 3927 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3928 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3929 3930 // Add register to return instruction. 3931 RetRegs.push_back(VA.getLocReg()); 3932 } 3933 3934 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3935 TII.get(AArch64::RET_ReallyLR)); 3936 for (unsigned RetReg : RetRegs) 3937 MIB.addReg(RetReg, RegState::Implicit); 3938 return true; 3939 } 3940 3941 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3942 Type *DestTy = I->getType(); 3943 Value *Op = I->getOperand(0); 3944 Type *SrcTy = Op->getType(); 3945 3946 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3947 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3948 if (!SrcEVT.isSimple()) 3949 return false; 3950 if (!DestEVT.isSimple()) 3951 return false; 3952 3953 MVT SrcVT = SrcEVT.getSimpleVT(); 3954 MVT DestVT = DestEVT.getSimpleVT(); 3955 3956 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3957 SrcVT != MVT::i8) 3958 return false; 3959 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3960 DestVT != MVT::i1) 3961 return false; 3962 3963 Register SrcReg = getRegForValue(Op); 3964 if (!SrcReg) 3965 return false; 3966 3967 // If we're truncating from i64 to a smaller non-legal type then generate an 3968 // AND. Otherwise, we know the high bits are undefined and a truncate only 3969 // generate a COPY. We cannot mark the source register also as result 3970 // register, because this can incorrectly transfer the kill flag onto the 3971 // source register. 3972 unsigned ResultReg; 3973 if (SrcVT == MVT::i64) { 3974 uint64_t Mask = 0; 3975 switch (DestVT.SimpleTy) { 3976 default: 3977 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3978 return false; 3979 case MVT::i1: 3980 Mask = 0x1; 3981 break; 3982 case MVT::i8: 3983 Mask = 0xff; 3984 break; 3985 case MVT::i16: 3986 Mask = 0xffff; 3987 break; 3988 } 3989 // Issue an extract_subreg to get the lower 32-bits. 3990 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, 3991 AArch64::sub_32); 3992 // Create the AND instruction which performs the actual truncation. 3993 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask); 3994 assert(ResultReg && "Unexpected AND instruction emission failure."); 3995 } else { 3996 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3997 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3998 TII.get(TargetOpcode::COPY), ResultReg) 3999 .addReg(SrcReg); 4000 } 4001 4002 updateValueMap(I, ResultReg); 4003 return true; 4004 } 4005 4006 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 4007 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 4008 DestVT == MVT::i64) && 4009 "Unexpected value type."); 4010 // Handle i8 and i16 as i32. 4011 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4012 DestVT = MVT::i32; 4013 4014 if (IsZExt) { 4015 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1); 4016 assert(ResultReg && "Unexpected AND instruction emission failure."); 4017 if (DestVT == MVT::i64) { 4018 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 4019 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 4020 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4021 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4022 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4023 .addImm(0) 4024 .addReg(ResultReg) 4025 .addImm(AArch64::sub_32); 4026 ResultReg = Reg64; 4027 } 4028 return ResultReg; 4029 } else { 4030 if (DestVT == MVT::i64) { 4031 // FIXME: We're SExt i1 to i64. 4032 return 0; 4033 } 4034 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 4035 0, 0); 4036 } 4037 } 4038 4039 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4040 unsigned Opc, ZReg; 4041 switch (RetVT.SimpleTy) { 4042 default: return 0; 4043 case MVT::i8: 4044 case MVT::i16: 4045 case MVT::i32: 4046 RetVT = MVT::i32; 4047 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4048 case MVT::i64: 4049 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4050 } 4051 4052 const TargetRegisterClass *RC = 4053 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4054 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg); 4055 } 4056 4057 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4058 if (RetVT != MVT::i64) 4059 return 0; 4060 4061 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4062 Op0, Op1, AArch64::XZR); 4063 } 4064 4065 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4066 if (RetVT != MVT::i64) 4067 return 0; 4068 4069 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4070 Op0, Op1, AArch64::XZR); 4071 } 4072 4073 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, 4074 unsigned Op1Reg) { 4075 unsigned Opc = 0; 4076 bool NeedTrunc = false; 4077 uint64_t Mask = 0; 4078 switch (RetVT.SimpleTy) { 4079 default: return 0; 4080 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4081 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4082 case MVT::i32: Opc = AArch64::LSLVWr; break; 4083 case MVT::i64: Opc = AArch64::LSLVXr; break; 4084 } 4085 4086 const TargetRegisterClass *RC = 4087 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4088 if (NeedTrunc) 4089 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4090 4091 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4092 if (NeedTrunc) 4093 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4094 return ResultReg; 4095 } 4096 4097 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4098 uint64_t Shift, bool IsZExt) { 4099 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4100 "Unexpected source/return type pair."); 4101 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4102 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4103 "Unexpected source value type."); 4104 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4105 RetVT == MVT::i64) && "Unexpected return value type."); 4106 4107 bool Is64Bit = (RetVT == MVT::i64); 4108 unsigned RegSize = Is64Bit ? 64 : 32; 4109 unsigned DstBits = RetVT.getSizeInBits(); 4110 unsigned SrcBits = SrcVT.getSizeInBits(); 4111 const TargetRegisterClass *RC = 4112 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4113 4114 // Just emit a copy for "zero" shifts. 4115 if (Shift == 0) { 4116 if (RetVT == SrcVT) { 4117 Register ResultReg = createResultReg(RC); 4118 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4119 TII.get(TargetOpcode::COPY), ResultReg) 4120 .addReg(Op0); 4121 return ResultReg; 4122 } else 4123 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4124 } 4125 4126 // Don't deal with undefined shifts. 4127 if (Shift >= DstBits) 4128 return 0; 4129 4130 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4131 // {S|U}BFM Wd, Wn, #r, #s 4132 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4133 4134 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4135 // %2 = shl i16 %1, 4 4136 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4137 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4138 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4139 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4140 4141 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4142 // %2 = shl i16 %1, 8 4143 // Wd<32+7-24,32-24> = Wn<7:0> 4144 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4145 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4146 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4147 4148 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4149 // %2 = shl i16 %1, 12 4150 // Wd<32+3-20,32-20> = Wn<3:0> 4151 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4152 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4153 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4154 4155 unsigned ImmR = RegSize - Shift; 4156 // Limit the width to the length of the source type. 4157 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4158 static const unsigned OpcTable[2][2] = { 4159 {AArch64::SBFMWri, AArch64::SBFMXri}, 4160 {AArch64::UBFMWri, AArch64::UBFMXri} 4161 }; 4162 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4163 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4164 Register TmpReg = MRI.createVirtualRegister(RC); 4165 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4166 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4167 .addImm(0) 4168 .addReg(Op0) 4169 .addImm(AArch64::sub_32); 4170 Op0 = TmpReg; 4171 } 4172 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4173 } 4174 4175 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, 4176 unsigned Op1Reg) { 4177 unsigned Opc = 0; 4178 bool NeedTrunc = false; 4179 uint64_t Mask = 0; 4180 switch (RetVT.SimpleTy) { 4181 default: return 0; 4182 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4183 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4184 case MVT::i32: Opc = AArch64::LSRVWr; break; 4185 case MVT::i64: Opc = AArch64::LSRVXr; break; 4186 } 4187 4188 const TargetRegisterClass *RC = 4189 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4190 if (NeedTrunc) { 4191 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask); 4192 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4193 } 4194 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4195 if (NeedTrunc) 4196 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4197 return ResultReg; 4198 } 4199 4200 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4201 uint64_t Shift, bool IsZExt) { 4202 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4203 "Unexpected source/return type pair."); 4204 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4205 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4206 "Unexpected source value type."); 4207 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4208 RetVT == MVT::i64) && "Unexpected return value type."); 4209 4210 bool Is64Bit = (RetVT == MVT::i64); 4211 unsigned RegSize = Is64Bit ? 64 : 32; 4212 unsigned DstBits = RetVT.getSizeInBits(); 4213 unsigned SrcBits = SrcVT.getSizeInBits(); 4214 const TargetRegisterClass *RC = 4215 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4216 4217 // Just emit a copy for "zero" shifts. 4218 if (Shift == 0) { 4219 if (RetVT == SrcVT) { 4220 Register ResultReg = createResultReg(RC); 4221 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4222 TII.get(TargetOpcode::COPY), ResultReg) 4223 .addReg(Op0); 4224 return ResultReg; 4225 } else 4226 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4227 } 4228 4229 // Don't deal with undefined shifts. 4230 if (Shift >= DstBits) 4231 return 0; 4232 4233 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4234 // {S|U}BFM Wd, Wn, #r, #s 4235 // Wd<s-r:0> = Wn<s:r> when r <= s 4236 4237 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4238 // %2 = lshr i16 %1, 4 4239 // Wd<7-4:0> = Wn<7:4> 4240 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4241 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4242 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4243 4244 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4245 // %2 = lshr i16 %1, 8 4246 // Wd<7-7,0> = Wn<7:7> 4247 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4248 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4249 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4250 4251 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4252 // %2 = lshr i16 %1, 12 4253 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4254 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4255 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4256 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4257 4258 if (Shift >= SrcBits && IsZExt) 4259 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4260 4261 // It is not possible to fold a sign-extend into the LShr instruction. In this 4262 // case emit a sign-extend. 4263 if (!IsZExt) { 4264 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4265 if (!Op0) 4266 return 0; 4267 SrcVT = RetVT; 4268 SrcBits = SrcVT.getSizeInBits(); 4269 IsZExt = true; 4270 } 4271 4272 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4273 unsigned ImmS = SrcBits - 1; 4274 static const unsigned OpcTable[2][2] = { 4275 {AArch64::SBFMWri, AArch64::SBFMXri}, 4276 {AArch64::UBFMWri, AArch64::UBFMXri} 4277 }; 4278 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4279 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4280 Register TmpReg = MRI.createVirtualRegister(RC); 4281 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4282 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4283 .addImm(0) 4284 .addReg(Op0) 4285 .addImm(AArch64::sub_32); 4286 Op0 = TmpReg; 4287 } 4288 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4289 } 4290 4291 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, 4292 unsigned Op1Reg) { 4293 unsigned Opc = 0; 4294 bool NeedTrunc = false; 4295 uint64_t Mask = 0; 4296 switch (RetVT.SimpleTy) { 4297 default: return 0; 4298 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4299 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4300 case MVT::i32: Opc = AArch64::ASRVWr; break; 4301 case MVT::i64: Opc = AArch64::ASRVXr; break; 4302 } 4303 4304 const TargetRegisterClass *RC = 4305 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4306 if (NeedTrunc) { 4307 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4308 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4309 } 4310 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4311 if (NeedTrunc) 4312 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4313 return ResultReg; 4314 } 4315 4316 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4317 uint64_t Shift, bool IsZExt) { 4318 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4319 "Unexpected source/return type pair."); 4320 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4321 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4322 "Unexpected source value type."); 4323 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4324 RetVT == MVT::i64) && "Unexpected return value type."); 4325 4326 bool Is64Bit = (RetVT == MVT::i64); 4327 unsigned RegSize = Is64Bit ? 64 : 32; 4328 unsigned DstBits = RetVT.getSizeInBits(); 4329 unsigned SrcBits = SrcVT.getSizeInBits(); 4330 const TargetRegisterClass *RC = 4331 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4332 4333 // Just emit a copy for "zero" shifts. 4334 if (Shift == 0) { 4335 if (RetVT == SrcVT) { 4336 Register ResultReg = createResultReg(RC); 4337 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4338 TII.get(TargetOpcode::COPY), ResultReg) 4339 .addReg(Op0); 4340 return ResultReg; 4341 } else 4342 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4343 } 4344 4345 // Don't deal with undefined shifts. 4346 if (Shift >= DstBits) 4347 return 0; 4348 4349 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4350 // {S|U}BFM Wd, Wn, #r, #s 4351 // Wd<s-r:0> = Wn<s:r> when r <= s 4352 4353 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4354 // %2 = ashr i16 %1, 4 4355 // Wd<7-4:0> = Wn<7:4> 4356 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4357 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4358 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4359 4360 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4361 // %2 = ashr i16 %1, 8 4362 // Wd<7-7,0> = Wn<7:7> 4363 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4364 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4365 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4366 4367 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4368 // %2 = ashr i16 %1, 12 4369 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4370 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4371 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4372 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4373 4374 if (Shift >= SrcBits && IsZExt) 4375 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4376 4377 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4378 unsigned ImmS = SrcBits - 1; 4379 static const unsigned OpcTable[2][2] = { 4380 {AArch64::SBFMWri, AArch64::SBFMXri}, 4381 {AArch64::UBFMWri, AArch64::UBFMXri} 4382 }; 4383 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4384 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4385 Register TmpReg = MRI.createVirtualRegister(RC); 4386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4387 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4388 .addImm(0) 4389 .addReg(Op0) 4390 .addImm(AArch64::sub_32); 4391 Op0 = TmpReg; 4392 } 4393 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4394 } 4395 4396 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4397 bool IsZExt) { 4398 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4399 4400 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4401 // DestVT are odd things, so test to make sure that they are both types we can 4402 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4403 // bail out to SelectionDAG. 4404 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4405 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4406 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4407 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4408 return 0; 4409 4410 unsigned Opc; 4411 unsigned Imm = 0; 4412 4413 switch (SrcVT.SimpleTy) { 4414 default: 4415 return 0; 4416 case MVT::i1: 4417 return emiti1Ext(SrcReg, DestVT, IsZExt); 4418 case MVT::i8: 4419 if (DestVT == MVT::i64) 4420 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4421 else 4422 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4423 Imm = 7; 4424 break; 4425 case MVT::i16: 4426 if (DestVT == MVT::i64) 4427 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4428 else 4429 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4430 Imm = 15; 4431 break; 4432 case MVT::i32: 4433 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4434 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4435 Imm = 31; 4436 break; 4437 } 4438 4439 // Handle i8 and i16 as i32. 4440 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4441 DestVT = MVT::i32; 4442 else if (DestVT == MVT::i64) { 4443 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4444 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4445 TII.get(AArch64::SUBREG_TO_REG), Src64) 4446 .addImm(0) 4447 .addReg(SrcReg) 4448 .addImm(AArch64::sub_32); 4449 SrcReg = Src64; 4450 } 4451 4452 const TargetRegisterClass *RC = 4453 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4454 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm); 4455 } 4456 4457 static bool isZExtLoad(const MachineInstr *LI) { 4458 switch (LI->getOpcode()) { 4459 default: 4460 return false; 4461 case AArch64::LDURBBi: 4462 case AArch64::LDURHHi: 4463 case AArch64::LDURWi: 4464 case AArch64::LDRBBui: 4465 case AArch64::LDRHHui: 4466 case AArch64::LDRWui: 4467 case AArch64::LDRBBroX: 4468 case AArch64::LDRHHroX: 4469 case AArch64::LDRWroX: 4470 case AArch64::LDRBBroW: 4471 case AArch64::LDRHHroW: 4472 case AArch64::LDRWroW: 4473 return true; 4474 } 4475 } 4476 4477 static bool isSExtLoad(const MachineInstr *LI) { 4478 switch (LI->getOpcode()) { 4479 default: 4480 return false; 4481 case AArch64::LDURSBWi: 4482 case AArch64::LDURSHWi: 4483 case AArch64::LDURSBXi: 4484 case AArch64::LDURSHXi: 4485 case AArch64::LDURSWi: 4486 case AArch64::LDRSBWui: 4487 case AArch64::LDRSHWui: 4488 case AArch64::LDRSBXui: 4489 case AArch64::LDRSHXui: 4490 case AArch64::LDRSWui: 4491 case AArch64::LDRSBWroX: 4492 case AArch64::LDRSHWroX: 4493 case AArch64::LDRSBXroX: 4494 case AArch64::LDRSHXroX: 4495 case AArch64::LDRSWroX: 4496 case AArch64::LDRSBWroW: 4497 case AArch64::LDRSHWroW: 4498 case AArch64::LDRSBXroW: 4499 case AArch64::LDRSHXroW: 4500 case AArch64::LDRSWroW: 4501 return true; 4502 } 4503 } 4504 4505 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4506 MVT SrcVT) { 4507 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4508 if (!LI || !LI->hasOneUse()) 4509 return false; 4510 4511 // Check if the load instruction has already been selected. 4512 Register Reg = lookUpRegForValue(LI); 4513 if (!Reg) 4514 return false; 4515 4516 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4517 if (!MI) 4518 return false; 4519 4520 // Check if the correct load instruction has been emitted - SelectionDAG might 4521 // have emitted a zero-extending load, but we need a sign-extending load. 4522 bool IsZExt = isa<ZExtInst>(I); 4523 const auto *LoadMI = MI; 4524 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4525 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4526 Register LoadReg = MI->getOperand(1).getReg(); 4527 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4528 assert(LoadMI && "Expected valid instruction"); 4529 } 4530 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4531 return false; 4532 4533 // Nothing to be done. 4534 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4535 updateValueMap(I, Reg); 4536 return true; 4537 } 4538 4539 if (IsZExt) { 4540 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 4541 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4542 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4543 .addImm(0) 4544 .addReg(Reg, getKillRegState(true)) 4545 .addImm(AArch64::sub_32); 4546 Reg = Reg64; 4547 } else { 4548 assert((MI->getOpcode() == TargetOpcode::COPY && 4549 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4550 "Expected copy instruction"); 4551 Reg = MI->getOperand(1).getReg(); 4552 MachineBasicBlock::iterator I(MI); 4553 removeDeadCode(I, std::next(I)); 4554 } 4555 updateValueMap(I, Reg); 4556 return true; 4557 } 4558 4559 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4560 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4561 "Unexpected integer extend instruction."); 4562 MVT RetVT; 4563 MVT SrcVT; 4564 if (!isTypeSupported(I->getType(), RetVT)) 4565 return false; 4566 4567 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4568 return false; 4569 4570 // Try to optimize already sign-/zero-extended values from load instructions. 4571 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4572 return true; 4573 4574 Register SrcReg = getRegForValue(I->getOperand(0)); 4575 if (!SrcReg) 4576 return false; 4577 4578 // Try to optimize already sign-/zero-extended values from function arguments. 4579 bool IsZExt = isa<ZExtInst>(I); 4580 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4581 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4582 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4583 Register ResultReg = createResultReg(&AArch64::GPR64RegClass); 4584 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4585 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4586 .addImm(0) 4587 .addReg(SrcReg) 4588 .addImm(AArch64::sub_32); 4589 SrcReg = ResultReg; 4590 } 4591 4592 updateValueMap(I, SrcReg); 4593 return true; 4594 } 4595 } 4596 4597 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4598 if (!ResultReg) 4599 return false; 4600 4601 updateValueMap(I, ResultReg); 4602 return true; 4603 } 4604 4605 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4606 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4607 if (!DestEVT.isSimple()) 4608 return false; 4609 4610 MVT DestVT = DestEVT.getSimpleVT(); 4611 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4612 return false; 4613 4614 unsigned DivOpc; 4615 bool Is64bit = (DestVT == MVT::i64); 4616 switch (ISDOpcode) { 4617 default: 4618 return false; 4619 case ISD::SREM: 4620 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4621 break; 4622 case ISD::UREM: 4623 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4624 break; 4625 } 4626 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4627 Register Src0Reg = getRegForValue(I->getOperand(0)); 4628 if (!Src0Reg) 4629 return false; 4630 4631 Register Src1Reg = getRegForValue(I->getOperand(1)); 4632 if (!Src1Reg) 4633 return false; 4634 4635 const TargetRegisterClass *RC = 4636 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4637 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg); 4638 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4639 // The remainder is computed as numerator - (quotient * denominator) using the 4640 // MSUB instruction. 4641 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg); 4642 updateValueMap(I, ResultReg); 4643 return true; 4644 } 4645 4646 bool AArch64FastISel::selectMul(const Instruction *I) { 4647 MVT VT; 4648 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4649 return false; 4650 4651 if (VT.isVector()) 4652 return selectBinaryOp(I, ISD::MUL); 4653 4654 const Value *Src0 = I->getOperand(0); 4655 const Value *Src1 = I->getOperand(1); 4656 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4657 if (C->getValue().isPowerOf2()) 4658 std::swap(Src0, Src1); 4659 4660 // Try to simplify to a shift instruction. 4661 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4662 if (C->getValue().isPowerOf2()) { 4663 uint64_t ShiftVal = C->getValue().logBase2(); 4664 MVT SrcVT = VT; 4665 bool IsZExt = true; 4666 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4667 if (!isIntExtFree(ZExt)) { 4668 MVT VT; 4669 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4670 SrcVT = VT; 4671 IsZExt = true; 4672 Src0 = ZExt->getOperand(0); 4673 } 4674 } 4675 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4676 if (!isIntExtFree(SExt)) { 4677 MVT VT; 4678 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4679 SrcVT = VT; 4680 IsZExt = false; 4681 Src0 = SExt->getOperand(0); 4682 } 4683 } 4684 } 4685 4686 Register Src0Reg = getRegForValue(Src0); 4687 if (!Src0Reg) 4688 return false; 4689 4690 unsigned ResultReg = 4691 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt); 4692 4693 if (ResultReg) { 4694 updateValueMap(I, ResultReg); 4695 return true; 4696 } 4697 } 4698 4699 Register Src0Reg = getRegForValue(I->getOperand(0)); 4700 if (!Src0Reg) 4701 return false; 4702 4703 Register Src1Reg = getRegForValue(I->getOperand(1)); 4704 if (!Src1Reg) 4705 return false; 4706 4707 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg); 4708 4709 if (!ResultReg) 4710 return false; 4711 4712 updateValueMap(I, ResultReg); 4713 return true; 4714 } 4715 4716 bool AArch64FastISel::selectShift(const Instruction *I) { 4717 MVT RetVT; 4718 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4719 return false; 4720 4721 if (RetVT.isVector()) 4722 return selectOperator(I, I->getOpcode()); 4723 4724 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4725 unsigned ResultReg = 0; 4726 uint64_t ShiftVal = C->getZExtValue(); 4727 MVT SrcVT = RetVT; 4728 bool IsZExt = I->getOpcode() != Instruction::AShr; 4729 const Value *Op0 = I->getOperand(0); 4730 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4731 if (!isIntExtFree(ZExt)) { 4732 MVT TmpVT; 4733 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4734 SrcVT = TmpVT; 4735 IsZExt = true; 4736 Op0 = ZExt->getOperand(0); 4737 } 4738 } 4739 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4740 if (!isIntExtFree(SExt)) { 4741 MVT TmpVT; 4742 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4743 SrcVT = TmpVT; 4744 IsZExt = false; 4745 Op0 = SExt->getOperand(0); 4746 } 4747 } 4748 } 4749 4750 Register Op0Reg = getRegForValue(Op0); 4751 if (!Op0Reg) 4752 return false; 4753 4754 switch (I->getOpcode()) { 4755 default: llvm_unreachable("Unexpected instruction."); 4756 case Instruction::Shl: 4757 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4758 break; 4759 case Instruction::AShr: 4760 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4761 break; 4762 case Instruction::LShr: 4763 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4764 break; 4765 } 4766 if (!ResultReg) 4767 return false; 4768 4769 updateValueMap(I, ResultReg); 4770 return true; 4771 } 4772 4773 Register Op0Reg = getRegForValue(I->getOperand(0)); 4774 if (!Op0Reg) 4775 return false; 4776 4777 Register Op1Reg = getRegForValue(I->getOperand(1)); 4778 if (!Op1Reg) 4779 return false; 4780 4781 unsigned ResultReg = 0; 4782 switch (I->getOpcode()) { 4783 default: llvm_unreachable("Unexpected instruction."); 4784 case Instruction::Shl: 4785 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); 4786 break; 4787 case Instruction::AShr: 4788 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); 4789 break; 4790 case Instruction::LShr: 4791 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); 4792 break; 4793 } 4794 4795 if (!ResultReg) 4796 return false; 4797 4798 updateValueMap(I, ResultReg); 4799 return true; 4800 } 4801 4802 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4803 MVT RetVT, SrcVT; 4804 4805 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4806 return false; 4807 if (!isTypeLegal(I->getType(), RetVT)) 4808 return false; 4809 4810 unsigned Opc; 4811 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4812 Opc = AArch64::FMOVWSr; 4813 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4814 Opc = AArch64::FMOVXDr; 4815 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4816 Opc = AArch64::FMOVSWr; 4817 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4818 Opc = AArch64::FMOVDXr; 4819 else 4820 return false; 4821 4822 const TargetRegisterClass *RC = nullptr; 4823 switch (RetVT.SimpleTy) { 4824 default: llvm_unreachable("Unexpected value type."); 4825 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4826 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4827 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4828 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4829 } 4830 Register Op0Reg = getRegForValue(I->getOperand(0)); 4831 if (!Op0Reg) 4832 return false; 4833 4834 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg); 4835 if (!ResultReg) 4836 return false; 4837 4838 updateValueMap(I, ResultReg); 4839 return true; 4840 } 4841 4842 bool AArch64FastISel::selectFRem(const Instruction *I) { 4843 MVT RetVT; 4844 if (!isTypeLegal(I->getType(), RetVT)) 4845 return false; 4846 4847 RTLIB::Libcall LC; 4848 switch (RetVT.SimpleTy) { 4849 default: 4850 return false; 4851 case MVT::f32: 4852 LC = RTLIB::REM_F32; 4853 break; 4854 case MVT::f64: 4855 LC = RTLIB::REM_F64; 4856 break; 4857 } 4858 4859 ArgListTy Args; 4860 Args.reserve(I->getNumOperands()); 4861 4862 // Populate the argument list. 4863 for (auto &Arg : I->operands()) { 4864 ArgListEntry Entry; 4865 Entry.Val = Arg; 4866 Entry.Ty = Arg->getType(); 4867 Args.push_back(Entry); 4868 } 4869 4870 CallLoweringInfo CLI; 4871 MCContext &Ctx = MF->getContext(); 4872 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4873 TLI.getLibcallName(LC), std::move(Args)); 4874 if (!lowerCallTo(CLI)) 4875 return false; 4876 updateValueMap(I, CLI.ResultReg); 4877 return true; 4878 } 4879 4880 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4881 MVT VT; 4882 if (!isTypeLegal(I->getType(), VT)) 4883 return false; 4884 4885 if (!isa<ConstantInt>(I->getOperand(1))) 4886 return selectBinaryOp(I, ISD::SDIV); 4887 4888 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4889 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4890 !(C.isPowerOf2() || C.isNegatedPowerOf2())) 4891 return selectBinaryOp(I, ISD::SDIV); 4892 4893 unsigned Lg2 = C.countr_zero(); 4894 Register Src0Reg = getRegForValue(I->getOperand(0)); 4895 if (!Src0Reg) 4896 return false; 4897 4898 if (cast<BinaryOperator>(I)->isExact()) { 4899 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2); 4900 if (!ResultReg) 4901 return false; 4902 updateValueMap(I, ResultReg); 4903 return true; 4904 } 4905 4906 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4907 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne); 4908 if (!AddReg) 4909 return false; 4910 4911 // (Src0 < 0) ? Pow2 - 1 : 0; 4912 if (!emitICmp_ri(VT, Src0Reg, 0)) 4913 return false; 4914 4915 unsigned SelectOpc; 4916 const TargetRegisterClass *RC; 4917 if (VT == MVT::i64) { 4918 SelectOpc = AArch64::CSELXr; 4919 RC = &AArch64::GPR64RegClass; 4920 } else { 4921 SelectOpc = AArch64::CSELWr; 4922 RC = &AArch64::GPR32RegClass; 4923 } 4924 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg, 4925 AArch64CC::LT); 4926 if (!SelectReg) 4927 return false; 4928 4929 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4930 // negate the result. 4931 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4932 unsigned ResultReg; 4933 if (C.isNegative()) 4934 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg, 4935 AArch64_AM::ASR, Lg2); 4936 else 4937 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2); 4938 4939 if (!ResultReg) 4940 return false; 4941 4942 updateValueMap(I, ResultReg); 4943 return true; 4944 } 4945 4946 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4947 /// have to duplicate it for AArch64, because otherwise we would fail during the 4948 /// sign-extend emission. 4949 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4950 Register IdxN = getRegForValue(Idx); 4951 if (IdxN == 0) 4952 // Unhandled operand. Halt "fast" selection and bail. 4953 return 0; 4954 4955 // If the index is smaller or larger than intptr_t, truncate or extend it. 4956 MVT PtrVT = TLI.getPointerTy(DL); 4957 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4958 if (IdxVT.bitsLT(PtrVT)) { 4959 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 4960 } else if (IdxVT.bitsGT(PtrVT)) 4961 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4962 return IdxN; 4963 } 4964 4965 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4966 /// duplicate it for AArch64, because otherwise we would bail out even for 4967 /// simple cases. This is because the standard fastEmit functions don't cover 4968 /// MUL at all and ADD is lowered very inefficientily. 4969 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4970 if (Subtarget->isTargetILP32()) 4971 return false; 4972 4973 Register N = getRegForValue(I->getOperand(0)); 4974 if (!N) 4975 return false; 4976 4977 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4978 // into a single N = N + TotalOffset. 4979 uint64_t TotalOffs = 0; 4980 MVT VT = TLI.getPointerTy(DL); 4981 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4982 GTI != E; ++GTI) { 4983 const Value *Idx = GTI.getOperand(); 4984 if (auto *StTy = GTI.getStructTypeOrNull()) { 4985 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4986 // N = N + Offset 4987 if (Field) 4988 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4989 } else { 4990 Type *Ty = GTI.getIndexedType(); 4991 4992 // If this is a constant subscript, handle it quickly. 4993 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4994 if (CI->isZero()) 4995 continue; 4996 // N = N + Offset 4997 TotalOffs += 4998 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 4999 continue; 5000 } 5001 if (TotalOffs) { 5002 N = emitAdd_ri_(VT, N, TotalOffs); 5003 if (!N) 5004 return false; 5005 TotalOffs = 0; 5006 } 5007 5008 // N = N + Idx * ElementSize; 5009 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 5010 unsigned IdxN = getRegForGEPIndex(Idx); 5011 if (!IdxN) 5012 return false; 5013 5014 if (ElementSize != 1) { 5015 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 5016 if (!C) 5017 return false; 5018 IdxN = emitMul_rr(VT, IdxN, C); 5019 if (!IdxN) 5020 return false; 5021 } 5022 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN); 5023 if (!N) 5024 return false; 5025 } 5026 } 5027 if (TotalOffs) { 5028 N = emitAdd_ri_(VT, N, TotalOffs); 5029 if (!N) 5030 return false; 5031 } 5032 updateValueMap(I, N); 5033 return true; 5034 } 5035 5036 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 5037 assert(TM.getOptLevel() == CodeGenOpt::None && 5038 "cmpxchg survived AtomicExpand at optlevel > -O0"); 5039 5040 auto *RetPairTy = cast<StructType>(I->getType()); 5041 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5042 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5043 "cmpxchg has a non-i1 status result"); 5044 5045 MVT VT; 5046 if (!isTypeLegal(RetTy, VT)) 5047 return false; 5048 5049 const TargetRegisterClass *ResRC; 5050 unsigned Opc, CmpOpc; 5051 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5052 // extractvalue selection doesn't support that. 5053 if (VT == MVT::i32) { 5054 Opc = AArch64::CMP_SWAP_32; 5055 CmpOpc = AArch64::SUBSWrs; 5056 ResRC = &AArch64::GPR32RegClass; 5057 } else if (VT == MVT::i64) { 5058 Opc = AArch64::CMP_SWAP_64; 5059 CmpOpc = AArch64::SUBSXrs; 5060 ResRC = &AArch64::GPR64RegClass; 5061 } else { 5062 return false; 5063 } 5064 5065 const MCInstrDesc &II = TII.get(Opc); 5066 5067 const Register AddrReg = constrainOperandRegClass( 5068 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 5069 const Register DesiredReg = constrainOperandRegClass( 5070 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 5071 const Register NewReg = constrainOperandRegClass( 5072 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5073 5074 const Register ResultReg1 = createResultReg(ResRC); 5075 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5076 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5077 5078 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5079 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 5080 .addDef(ResultReg1) 5081 .addDef(ScratchReg) 5082 .addUse(AddrReg) 5083 .addUse(DesiredReg) 5084 .addUse(NewReg); 5085 5086 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc)) 5087 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5088 .addUse(ResultReg1) 5089 .addUse(DesiredReg) 5090 .addImm(0); 5091 5092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr)) 5093 .addDef(ResultReg2) 5094 .addUse(AArch64::WZR) 5095 .addUse(AArch64::WZR) 5096 .addImm(AArch64CC::NE); 5097 5098 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5099 updateValueMap(I, ResultReg1, 2); 5100 return true; 5101 } 5102 5103 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5104 if (TLI.fallBackToDAGISel(*I)) 5105 return false; 5106 switch (I->getOpcode()) { 5107 default: 5108 break; 5109 case Instruction::Add: 5110 case Instruction::Sub: 5111 return selectAddSub(I); 5112 case Instruction::Mul: 5113 return selectMul(I); 5114 case Instruction::SDiv: 5115 return selectSDiv(I); 5116 case Instruction::SRem: 5117 if (!selectBinaryOp(I, ISD::SREM)) 5118 return selectRem(I, ISD::SREM); 5119 return true; 5120 case Instruction::URem: 5121 if (!selectBinaryOp(I, ISD::UREM)) 5122 return selectRem(I, ISD::UREM); 5123 return true; 5124 case Instruction::Shl: 5125 case Instruction::LShr: 5126 case Instruction::AShr: 5127 return selectShift(I); 5128 case Instruction::And: 5129 case Instruction::Or: 5130 case Instruction::Xor: 5131 return selectLogicalOp(I); 5132 case Instruction::Br: 5133 return selectBranch(I); 5134 case Instruction::IndirectBr: 5135 return selectIndirectBr(I); 5136 case Instruction::BitCast: 5137 if (!FastISel::selectBitCast(I)) 5138 return selectBitCast(I); 5139 return true; 5140 case Instruction::FPToSI: 5141 if (!selectCast(I, ISD::FP_TO_SINT)) 5142 return selectFPToInt(I, /*Signed=*/true); 5143 return true; 5144 case Instruction::FPToUI: 5145 return selectFPToInt(I, /*Signed=*/false); 5146 case Instruction::ZExt: 5147 case Instruction::SExt: 5148 return selectIntExt(I); 5149 case Instruction::Trunc: 5150 if (!selectCast(I, ISD::TRUNCATE)) 5151 return selectTrunc(I); 5152 return true; 5153 case Instruction::FPExt: 5154 return selectFPExt(I); 5155 case Instruction::FPTrunc: 5156 return selectFPTrunc(I); 5157 case Instruction::SIToFP: 5158 if (!selectCast(I, ISD::SINT_TO_FP)) 5159 return selectIntToFP(I, /*Signed=*/true); 5160 return true; 5161 case Instruction::UIToFP: 5162 return selectIntToFP(I, /*Signed=*/false); 5163 case Instruction::Load: 5164 return selectLoad(I); 5165 case Instruction::Store: 5166 return selectStore(I); 5167 case Instruction::FCmp: 5168 case Instruction::ICmp: 5169 return selectCmp(I); 5170 case Instruction::Select: 5171 return selectSelect(I); 5172 case Instruction::Ret: 5173 return selectRet(I); 5174 case Instruction::FRem: 5175 return selectFRem(I); 5176 case Instruction::GetElementPtr: 5177 return selectGetElementPtr(I); 5178 case Instruction::AtomicCmpXchg: 5179 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5180 } 5181 5182 // fall-back to target-independent instruction selection. 5183 return selectOperator(I, I->getOpcode()); 5184 } 5185 5186 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5187 const TargetLibraryInfo *LibInfo) { 5188 5189 SMEAttrs CallerAttrs(*FuncInfo.Fn); 5190 if (CallerAttrs.hasZAState() || 5191 (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody())) 5192 return nullptr; 5193 return new AArch64FastISel(FuncInfo, LibInfo); 5194 } 5195