1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/APFloat.h" 23 #include "llvm/ADT/APInt.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/Analysis/BranchProbabilityInfo.h" 27 #include "llvm/CodeGen/CallingConvLower.h" 28 #include "llvm/CodeGen/FastISel.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineConstantPool.h" 33 #include "llvm/CodeGen/MachineFrameInfo.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineMemOperand.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/RuntimeLibcallUtil.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/CodeGenTypes/MachineValueType.h" 41 #include "llvm/IR/Argument.h" 42 #include "llvm/IR/Attributes.h" 43 #include "llvm/IR/BasicBlock.h" 44 #include "llvm/IR/CallingConv.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/Constants.h" 47 #include "llvm/IR/DataLayout.h" 48 #include "llvm/IR/DerivedTypes.h" 49 #include "llvm/IR/Function.h" 50 #include "llvm/IR/GetElementPtrTypeIterator.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/InstrTypes.h" 53 #include "llvm/IR/Instruction.h" 54 #include "llvm/IR/Instructions.h" 55 #include "llvm/IR/IntrinsicInst.h" 56 #include "llvm/IR/Intrinsics.h" 57 #include "llvm/IR/IntrinsicsAArch64.h" 58 #include "llvm/IR/Module.h" 59 #include "llvm/IR/Operator.h" 60 #include "llvm/IR/Type.h" 61 #include "llvm/IR/User.h" 62 #include "llvm/IR/Value.h" 63 #include "llvm/MC/MCInstrDesc.h" 64 #include "llvm/MC/MCRegisterInfo.h" 65 #include "llvm/MC/MCSymbol.h" 66 #include "llvm/Support/AtomicOrdering.h" 67 #include "llvm/Support/Casting.h" 68 #include "llvm/Support/CodeGen.h" 69 #include "llvm/Support/Compiler.h" 70 #include "llvm/Support/ErrorHandling.h" 71 #include "llvm/Support/MathExtras.h" 72 #include <algorithm> 73 #include <cassert> 74 #include <cstdint> 75 #include <iterator> 76 #include <utility> 77 78 using namespace llvm; 79 80 namespace { 81 82 class AArch64FastISel final : public FastISel { 83 class Address { 84 public: 85 using BaseKind = enum { 86 RegBase, 87 FrameIndexBase 88 }; 89 90 private: 91 BaseKind Kind = RegBase; 92 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 93 union { 94 unsigned Reg; 95 int FI; 96 } Base; 97 unsigned OffsetReg = 0; 98 unsigned Shift = 0; 99 int64_t Offset = 0; 100 const GlobalValue *GV = nullptr; 101 102 public: 103 Address() { Base.Reg = 0; } 104 105 void setKind(BaseKind K) { Kind = K; } 106 BaseKind getKind() const { return Kind; } 107 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 108 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 109 bool isRegBase() const { return Kind == RegBase; } 110 bool isFIBase() const { return Kind == FrameIndexBase; } 111 112 void setReg(unsigned Reg) { 113 assert(isRegBase() && "Invalid base register access!"); 114 Base.Reg = Reg; 115 } 116 117 unsigned getReg() const { 118 assert(isRegBase() && "Invalid base register access!"); 119 return Base.Reg; 120 } 121 122 void setOffsetReg(unsigned Reg) { 123 OffsetReg = Reg; 124 } 125 126 unsigned getOffsetReg() const { 127 return OffsetReg; 128 } 129 130 void setFI(unsigned FI) { 131 assert(isFIBase() && "Invalid base frame index access!"); 132 Base.FI = FI; 133 } 134 135 unsigned getFI() const { 136 assert(isFIBase() && "Invalid base frame index access!"); 137 return Base.FI; 138 } 139 140 void setOffset(int64_t O) { Offset = O; } 141 int64_t getOffset() { return Offset; } 142 void setShift(unsigned S) { Shift = S; } 143 unsigned getShift() { return Shift; } 144 145 void setGlobalValue(const GlobalValue *G) { GV = G; } 146 const GlobalValue *getGlobalValue() { return GV; } 147 }; 148 149 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 150 /// make the right decision when generating code for different targets. 151 const AArch64Subtarget *Subtarget; 152 LLVMContext *Context; 153 154 bool fastLowerArguments() override; 155 bool fastLowerCall(CallLoweringInfo &CLI) override; 156 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 157 158 private: 159 // Selection routines. 160 bool selectAddSub(const Instruction *I); 161 bool selectLogicalOp(const Instruction *I); 162 bool selectLoad(const Instruction *I); 163 bool selectStore(const Instruction *I); 164 bool selectBranch(const Instruction *I); 165 bool selectIndirectBr(const Instruction *I); 166 bool selectCmp(const Instruction *I); 167 bool selectSelect(const Instruction *I); 168 bool selectFPExt(const Instruction *I); 169 bool selectFPTrunc(const Instruction *I); 170 bool selectFPToInt(const Instruction *I, bool Signed); 171 bool selectIntToFP(const Instruction *I, bool Signed); 172 bool selectRem(const Instruction *I, unsigned ISDOpcode); 173 bool selectRet(const Instruction *I); 174 bool selectTrunc(const Instruction *I); 175 bool selectIntExt(const Instruction *I); 176 bool selectMul(const Instruction *I); 177 bool selectShift(const Instruction *I); 178 bool selectBitCast(const Instruction *I); 179 bool selectFRem(const Instruction *I); 180 bool selectSDiv(const Instruction *I); 181 bool selectGetElementPtr(const Instruction *I); 182 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 183 184 // Utility helper routines. 185 bool isTypeLegal(Type *Ty, MVT &VT); 186 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 187 bool isValueAvailable(const Value *V) const; 188 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 189 bool computeCallAddress(const Value *V, Address &Addr); 190 bool simplifyAddress(Address &Addr, MVT VT); 191 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 192 MachineMemOperand::Flags Flags, 193 unsigned ScaleFactor, MachineMemOperand *MMO); 194 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); 195 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 196 MaybeAlign Alignment); 197 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 198 const Value *Cond); 199 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 200 bool optimizeSelect(const SelectInst *SI); 201 unsigned getRegForGEPIndex(const Value *Idx); 202 203 // Emit helper routines. 204 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 205 const Value *RHS, bool SetFlags = false, 206 bool WantResult = true, bool IsZExt = false); 207 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 208 unsigned RHSReg, bool SetFlags = false, 209 bool WantResult = true); 210 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 211 uint64_t Imm, bool SetFlags = false, 212 bool WantResult = true); 213 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 214 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, 215 uint64_t ShiftImm, bool SetFlags = false, 216 bool WantResult = true); 217 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 218 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, 219 uint64_t ShiftImm, bool SetFlags = false, 220 bool WantResult = true); 221 222 // Emit functions. 223 bool emitCompareAndBranch(const BranchInst *BI); 224 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 225 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 226 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 227 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 228 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 229 MachineMemOperand *MMO = nullptr); 230 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 231 MachineMemOperand *MMO = nullptr); 232 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 233 MachineMemOperand *MMO = nullptr); 234 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 235 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 236 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 237 bool SetFlags = false, bool WantResult = true, 238 bool IsZExt = false); 239 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); 240 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 241 bool SetFlags = false, bool WantResult = true, 242 bool IsZExt = false); 243 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 244 bool WantResult = true); 245 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 246 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 247 bool WantResult = true); 248 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 249 const Value *RHS); 250 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 251 uint64_t Imm); 252 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 253 unsigned RHSReg, uint64_t ShiftImm); 254 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 255 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); 256 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 257 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 258 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 259 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 260 bool IsZExt = true); 261 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 262 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 263 bool IsZExt = true); 264 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 265 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 266 bool IsZExt = false); 267 268 unsigned materializeInt(const ConstantInt *CI, MVT VT); 269 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 270 unsigned materializeGV(const GlobalValue *GV); 271 272 // Call handling routines. 273 private: 274 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 275 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 276 unsigned &NumBytes); 277 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes); 278 279 public: 280 // Backend specific FastISel code. 281 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 282 unsigned fastMaterializeConstant(const Constant *C) override; 283 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 284 285 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 286 const TargetLibraryInfo *LibInfo) 287 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 288 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); 289 Context = &FuncInfo.Fn->getContext(); 290 } 291 292 bool fastSelectInstruction(const Instruction *I) override; 293 294 #include "AArch64GenFastISel.inc" 295 }; 296 297 } // end anonymous namespace 298 299 /// Check if the sign-/zero-extend will be a noop. 300 static bool isIntExtFree(const Instruction *I) { 301 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 302 "Unexpected integer extend instruction."); 303 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 304 "Unexpected value type."); 305 bool IsZExt = isa<ZExtInst>(I); 306 307 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 308 if (LI->hasOneUse()) 309 return true; 310 311 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 312 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 313 return true; 314 315 return false; 316 } 317 318 /// Determine the implicit scale factor that is applied by a memory 319 /// operation for a given value type. 320 static unsigned getImplicitScaleFactor(MVT VT) { 321 switch (VT.SimpleTy) { 322 default: 323 return 0; // invalid 324 case MVT::i1: // fall-through 325 case MVT::i8: 326 return 1; 327 case MVT::i16: 328 return 2; 329 case MVT::i32: // fall-through 330 case MVT::f32: 331 return 4; 332 case MVT::i64: // fall-through 333 case MVT::f64: 334 return 8; 335 } 336 } 337 338 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 339 if (CC == CallingConv::GHC) 340 return CC_AArch64_GHC; 341 if (CC == CallingConv::CFGuard_Check) 342 return CC_AArch64_Win64_CFGuard_Check; 343 if (Subtarget->isTargetDarwin()) 344 return CC_AArch64_DarwinPCS; 345 if (Subtarget->isTargetWindows()) 346 return CC_AArch64_Win64PCS; 347 return CC_AArch64_AAPCS; 348 } 349 350 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 351 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 352 "Alloca should always return a pointer."); 353 354 // Don't handle dynamic allocas. 355 if (!FuncInfo.StaticAllocaMap.count(AI)) 356 return 0; 357 358 DenseMap<const AllocaInst *, int>::iterator SI = 359 FuncInfo.StaticAllocaMap.find(AI); 360 361 if (SI != FuncInfo.StaticAllocaMap.end()) { 362 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 363 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 364 ResultReg) 365 .addFrameIndex(SI->second) 366 .addImm(0) 367 .addImm(0); 368 return ResultReg; 369 } 370 371 return 0; 372 } 373 374 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 375 if (VT > MVT::i64) 376 return 0; 377 378 if (!CI->isZero()) 379 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 380 381 // Create a copy from the zero register to materialize a "0" value. 382 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 383 : &AArch64::GPR32RegClass; 384 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 385 Register ResultReg = createResultReg(RC); 386 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 387 ResultReg).addReg(ZeroReg, getKillRegState(true)); 388 return ResultReg; 389 } 390 391 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 392 // Positive zero (+0.0) has to be materialized with a fmov from the zero 393 // register, because the immediate version of fmov cannot encode zero. 394 if (CFP->isNullValue()) 395 return fastMaterializeFloatZero(CFP); 396 397 if (VT != MVT::f32 && VT != MVT::f64) 398 return 0; 399 400 const APFloat Val = CFP->getValueAPF(); 401 bool Is64Bit = (VT == MVT::f64); 402 // This checks to see if we can use FMOV instructions to materialize 403 // a constant, otherwise we have to materialize via the constant pool. 404 int Imm = 405 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 406 if (Imm != -1) { 407 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 408 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 409 } 410 411 // For the large code model materialize the FP constant in code. 412 if (TM.getCodeModel() == CodeModel::Large) { 413 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 414 const TargetRegisterClass *RC = Is64Bit ? 415 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 416 417 Register TmpReg = createResultReg(RC); 418 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg) 419 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 420 421 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 423 TII.get(TargetOpcode::COPY), ResultReg) 424 .addReg(TmpReg, getKillRegState(true)); 425 426 return ResultReg; 427 } 428 429 // Materialize via constant pool. MachineConstantPool wants an explicit 430 // alignment. 431 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 432 433 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 434 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 435 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 436 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 437 438 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 439 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 440 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 441 .addReg(ADRPReg) 442 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 443 return ResultReg; 444 } 445 446 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 447 // We can't handle thread-local variables quickly yet. 448 if (GV->isThreadLocal()) 449 return 0; 450 451 // MachO still uses GOT for large code-model accesses, but ELF requires 452 // movz/movk sequences, which FastISel doesn't handle yet. 453 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 454 return 0; 455 456 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 457 458 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 459 if (!DestEVT.isSimple()) 460 return 0; 461 462 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 463 unsigned ResultReg; 464 465 if (OpFlags & AArch64II::MO_GOT) { 466 // ADRP + LDRX 467 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 468 ADRPReg) 469 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 470 471 unsigned LdrOpc; 472 if (Subtarget->isTargetILP32()) { 473 ResultReg = createResultReg(&AArch64::GPR32RegClass); 474 LdrOpc = AArch64::LDRWui; 475 } else { 476 ResultReg = createResultReg(&AArch64::GPR64RegClass); 477 LdrOpc = AArch64::LDRXui; 478 } 479 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc), 480 ResultReg) 481 .addReg(ADRPReg) 482 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 483 AArch64II::MO_NC | OpFlags); 484 if (!Subtarget->isTargetILP32()) 485 return ResultReg; 486 487 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 488 // so we must extend the result on ILP32. 489 Register Result64 = createResultReg(&AArch64::GPR64RegClass); 490 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 491 TII.get(TargetOpcode::SUBREG_TO_REG)) 492 .addDef(Result64) 493 .addImm(0) 494 .addReg(ResultReg, RegState::Kill) 495 .addImm(AArch64::sub_32); 496 return Result64; 497 } else { 498 // ADRP + ADDX 499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 500 ADRPReg) 501 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 502 503 if (OpFlags & AArch64II::MO_TAGGED) { 504 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 505 // We do so by creating a MOVK that sets bits 48-63 of the register to 506 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 507 // the small code model so we can assume a binary size of <= 4GB, which 508 // makes the untagged PC relative offset positive. The binary must also be 509 // loaded into address range [0, 2^48). Both of these properties need to 510 // be ensured at runtime when using tagged addresses. 511 // 512 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that 513 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands 514 // are not exactly 1:1 with FastISel so we cannot easily abstract this 515 // out. At some point, it would be nice to find a way to not have this 516 // duplciate code. 517 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass); 518 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi), 519 DstReg) 520 .addReg(ADRPReg) 521 .addGlobalAddress(GV, /*Offset=*/0x100000000, 522 AArch64II::MO_PREL | AArch64II::MO_G3) 523 .addImm(48); 524 ADRPReg = DstReg; 525 } 526 527 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 529 ResultReg) 530 .addReg(ADRPReg) 531 .addGlobalAddress(GV, 0, 532 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 533 .addImm(0); 534 } 535 return ResultReg; 536 } 537 538 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 539 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 540 541 // Only handle simple types. 542 if (!CEVT.isSimple()) 543 return 0; 544 MVT VT = CEVT.getSimpleVT(); 545 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 546 // 'null' pointers need to have a somewhat special treatment. 547 if (isa<ConstantPointerNull>(C)) { 548 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 549 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 550 } 551 552 if (const auto *CI = dyn_cast<ConstantInt>(C)) 553 return materializeInt(CI, VT); 554 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 555 return materializeFP(CFP, VT); 556 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 557 return materializeGV(GV); 558 559 return 0; 560 } 561 562 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 563 assert(CFP->isNullValue() && 564 "Floating-point constant is not a positive zero."); 565 MVT VT; 566 if (!isTypeLegal(CFP->getType(), VT)) 567 return 0; 568 569 if (VT != MVT::f32 && VT != MVT::f64) 570 return 0; 571 572 bool Is64Bit = (VT == MVT::f64); 573 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 574 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 575 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg); 576 } 577 578 /// Check if the multiply is by a power-of-2 constant. 579 static bool isMulPowOf2(const Value *I) { 580 if (const auto *MI = dyn_cast<MulOperator>(I)) { 581 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 582 if (C->getValue().isPowerOf2()) 583 return true; 584 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 585 if (C->getValue().isPowerOf2()) 586 return true; 587 } 588 return false; 589 } 590 591 // Computes the address to get to an object. 592 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 593 { 594 const User *U = nullptr; 595 unsigned Opcode = Instruction::UserOp1; 596 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 597 // Don't walk into other basic blocks unless the object is an alloca from 598 // another block, otherwise it may not have a virtual register assigned. 599 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 600 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 601 Opcode = I->getOpcode(); 602 U = I; 603 } 604 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 605 Opcode = C->getOpcode(); 606 U = C; 607 } 608 609 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 610 if (Ty->getAddressSpace() > 255) 611 // Fast instruction selection doesn't support the special 612 // address spaces. 613 return false; 614 615 switch (Opcode) { 616 default: 617 break; 618 case Instruction::BitCast: 619 // Look through bitcasts. 620 return computeAddress(U->getOperand(0), Addr, Ty); 621 622 case Instruction::IntToPtr: 623 // Look past no-op inttoptrs. 624 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 625 TLI.getPointerTy(DL)) 626 return computeAddress(U->getOperand(0), Addr, Ty); 627 break; 628 629 case Instruction::PtrToInt: 630 // Look past no-op ptrtoints. 631 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 632 return computeAddress(U->getOperand(0), Addr, Ty); 633 break; 634 635 case Instruction::GetElementPtr: { 636 Address SavedAddr = Addr; 637 uint64_t TmpOffset = Addr.getOffset(); 638 639 // Iterate through the GEP folding the constants into offsets where 640 // we can. 641 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 642 GTI != E; ++GTI) { 643 const Value *Op = GTI.getOperand(); 644 if (StructType *STy = GTI.getStructTypeOrNull()) { 645 const StructLayout *SL = DL.getStructLayout(STy); 646 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 647 TmpOffset += SL->getElementOffset(Idx); 648 } else { 649 uint64_t S = GTI.getSequentialElementStride(DL); 650 while (true) { 651 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 652 // Constant-offset addressing. 653 TmpOffset += CI->getSExtValue() * S; 654 break; 655 } 656 if (canFoldAddIntoGEP(U, Op)) { 657 // A compatible add with a constant operand. Fold the constant. 658 ConstantInt *CI = 659 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 660 TmpOffset += CI->getSExtValue() * S; 661 // Iterate on the other operand. 662 Op = cast<AddOperator>(Op)->getOperand(0); 663 continue; 664 } 665 // Unsupported 666 goto unsupported_gep; 667 } 668 } 669 } 670 671 // Try to grab the base operand now. 672 Addr.setOffset(TmpOffset); 673 if (computeAddress(U->getOperand(0), Addr, Ty)) 674 return true; 675 676 // We failed, restore everything and try the other options. 677 Addr = SavedAddr; 678 679 unsupported_gep: 680 break; 681 } 682 case Instruction::Alloca: { 683 const AllocaInst *AI = cast<AllocaInst>(Obj); 684 DenseMap<const AllocaInst *, int>::iterator SI = 685 FuncInfo.StaticAllocaMap.find(AI); 686 if (SI != FuncInfo.StaticAllocaMap.end()) { 687 Addr.setKind(Address::FrameIndexBase); 688 Addr.setFI(SI->second); 689 return true; 690 } 691 break; 692 } 693 case Instruction::Add: { 694 // Adds of constants are common and easy enough. 695 const Value *LHS = U->getOperand(0); 696 const Value *RHS = U->getOperand(1); 697 698 if (isa<ConstantInt>(LHS)) 699 std::swap(LHS, RHS); 700 701 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 702 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 703 return computeAddress(LHS, Addr, Ty); 704 } 705 706 Address Backup = Addr; 707 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 708 return true; 709 Addr = Backup; 710 711 break; 712 } 713 case Instruction::Sub: { 714 // Subs of constants are common and easy enough. 715 const Value *LHS = U->getOperand(0); 716 const Value *RHS = U->getOperand(1); 717 718 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 719 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 720 return computeAddress(LHS, Addr, Ty); 721 } 722 break; 723 } 724 case Instruction::Shl: { 725 if (Addr.getOffsetReg()) 726 break; 727 728 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 729 if (!CI) 730 break; 731 732 unsigned Val = CI->getZExtValue(); 733 if (Val < 1 || Val > 3) 734 break; 735 736 uint64_t NumBytes = 0; 737 if (Ty && Ty->isSized()) { 738 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 739 NumBytes = NumBits / 8; 740 if (!isPowerOf2_64(NumBits)) 741 NumBytes = 0; 742 } 743 744 if (NumBytes != (1ULL << Val)) 745 break; 746 747 Addr.setShift(Val); 748 Addr.setExtendType(AArch64_AM::LSL); 749 750 const Value *Src = U->getOperand(0); 751 if (const auto *I = dyn_cast<Instruction>(Src)) { 752 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 753 // Fold the zext or sext when it won't become a noop. 754 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 755 if (!isIntExtFree(ZE) && 756 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 757 Addr.setExtendType(AArch64_AM::UXTW); 758 Src = ZE->getOperand(0); 759 } 760 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 761 if (!isIntExtFree(SE) && 762 SE->getOperand(0)->getType()->isIntegerTy(32)) { 763 Addr.setExtendType(AArch64_AM::SXTW); 764 Src = SE->getOperand(0); 765 } 766 } 767 } 768 } 769 770 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 771 if (AI->getOpcode() == Instruction::And) { 772 const Value *LHS = AI->getOperand(0); 773 const Value *RHS = AI->getOperand(1); 774 775 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 776 if (C->getValue() == 0xffffffff) 777 std::swap(LHS, RHS); 778 779 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 780 if (C->getValue() == 0xffffffff) { 781 Addr.setExtendType(AArch64_AM::UXTW); 782 Register Reg = getRegForValue(LHS); 783 if (!Reg) 784 return false; 785 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 786 Addr.setOffsetReg(Reg); 787 return true; 788 } 789 } 790 791 Register Reg = getRegForValue(Src); 792 if (!Reg) 793 return false; 794 Addr.setOffsetReg(Reg); 795 return true; 796 } 797 case Instruction::Mul: { 798 if (Addr.getOffsetReg()) 799 break; 800 801 if (!isMulPowOf2(U)) 802 break; 803 804 const Value *LHS = U->getOperand(0); 805 const Value *RHS = U->getOperand(1); 806 807 // Canonicalize power-of-2 value to the RHS. 808 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 809 if (C->getValue().isPowerOf2()) 810 std::swap(LHS, RHS); 811 812 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 813 const auto *C = cast<ConstantInt>(RHS); 814 unsigned Val = C->getValue().logBase2(); 815 if (Val < 1 || Val > 3) 816 break; 817 818 uint64_t NumBytes = 0; 819 if (Ty && Ty->isSized()) { 820 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 821 NumBytes = NumBits / 8; 822 if (!isPowerOf2_64(NumBits)) 823 NumBytes = 0; 824 } 825 826 if (NumBytes != (1ULL << Val)) 827 break; 828 829 Addr.setShift(Val); 830 Addr.setExtendType(AArch64_AM::LSL); 831 832 const Value *Src = LHS; 833 if (const auto *I = dyn_cast<Instruction>(Src)) { 834 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 835 // Fold the zext or sext when it won't become a noop. 836 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 837 if (!isIntExtFree(ZE) && 838 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 839 Addr.setExtendType(AArch64_AM::UXTW); 840 Src = ZE->getOperand(0); 841 } 842 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 843 if (!isIntExtFree(SE) && 844 SE->getOperand(0)->getType()->isIntegerTy(32)) { 845 Addr.setExtendType(AArch64_AM::SXTW); 846 Src = SE->getOperand(0); 847 } 848 } 849 } 850 } 851 852 Register Reg = getRegForValue(Src); 853 if (!Reg) 854 return false; 855 Addr.setOffsetReg(Reg); 856 return true; 857 } 858 case Instruction::And: { 859 if (Addr.getOffsetReg()) 860 break; 861 862 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 863 break; 864 865 const Value *LHS = U->getOperand(0); 866 const Value *RHS = U->getOperand(1); 867 868 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 869 if (C->getValue() == 0xffffffff) 870 std::swap(LHS, RHS); 871 872 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 873 if (C->getValue() == 0xffffffff) { 874 Addr.setShift(0); 875 Addr.setExtendType(AArch64_AM::LSL); 876 Addr.setExtendType(AArch64_AM::UXTW); 877 878 Register Reg = getRegForValue(LHS); 879 if (!Reg) 880 return false; 881 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 882 Addr.setOffsetReg(Reg); 883 return true; 884 } 885 break; 886 } 887 case Instruction::SExt: 888 case Instruction::ZExt: { 889 if (!Addr.getReg() || Addr.getOffsetReg()) 890 break; 891 892 const Value *Src = nullptr; 893 // Fold the zext or sext when it won't become a noop. 894 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 895 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 896 Addr.setExtendType(AArch64_AM::UXTW); 897 Src = ZE->getOperand(0); 898 } 899 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 900 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 901 Addr.setExtendType(AArch64_AM::SXTW); 902 Src = SE->getOperand(0); 903 } 904 } 905 906 if (!Src) 907 break; 908 909 Addr.setShift(0); 910 Register Reg = getRegForValue(Src); 911 if (!Reg) 912 return false; 913 Addr.setOffsetReg(Reg); 914 return true; 915 } 916 } // end switch 917 918 if (Addr.isRegBase() && !Addr.getReg()) { 919 Register Reg = getRegForValue(Obj); 920 if (!Reg) 921 return false; 922 Addr.setReg(Reg); 923 return true; 924 } 925 926 if (!Addr.getOffsetReg()) { 927 Register Reg = getRegForValue(Obj); 928 if (!Reg) 929 return false; 930 Addr.setOffsetReg(Reg); 931 return true; 932 } 933 934 return false; 935 } 936 937 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 938 const User *U = nullptr; 939 unsigned Opcode = Instruction::UserOp1; 940 bool InMBB = true; 941 942 if (const auto *I = dyn_cast<Instruction>(V)) { 943 Opcode = I->getOpcode(); 944 U = I; 945 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 946 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 947 Opcode = C->getOpcode(); 948 U = C; 949 } 950 951 switch (Opcode) { 952 default: break; 953 case Instruction::BitCast: 954 // Look past bitcasts if its operand is in the same BB. 955 if (InMBB) 956 return computeCallAddress(U->getOperand(0), Addr); 957 break; 958 case Instruction::IntToPtr: 959 // Look past no-op inttoptrs if its operand is in the same BB. 960 if (InMBB && 961 TLI.getValueType(DL, U->getOperand(0)->getType()) == 962 TLI.getPointerTy(DL)) 963 return computeCallAddress(U->getOperand(0), Addr); 964 break; 965 case Instruction::PtrToInt: 966 // Look past no-op ptrtoints if its operand is in the same BB. 967 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 968 return computeCallAddress(U->getOperand(0), Addr); 969 break; 970 } 971 972 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 973 Addr.setGlobalValue(GV); 974 return true; 975 } 976 977 // If all else fails, try to materialize the value in a register. 978 if (!Addr.getGlobalValue()) { 979 Addr.setReg(getRegForValue(V)); 980 return Addr.getReg() != 0; 981 } 982 983 return false; 984 } 985 986 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 987 EVT evt = TLI.getValueType(DL, Ty, true); 988 989 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 990 return false; 991 992 // Only handle simple types. 993 if (evt == MVT::Other || !evt.isSimple()) 994 return false; 995 VT = evt.getSimpleVT(); 996 997 // This is a legal type, but it's not something we handle in fast-isel. 998 if (VT == MVT::f128) 999 return false; 1000 1001 // Handle all other legal types, i.e. a register that will directly hold this 1002 // value. 1003 return TLI.isTypeLegal(VT); 1004 } 1005 1006 /// Determine if the value type is supported by FastISel. 1007 /// 1008 /// FastISel for AArch64 can handle more value types than are legal. This adds 1009 /// simple value type such as i1, i8, and i16. 1010 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 1011 if (Ty->isVectorTy() && !IsVectorAllowed) 1012 return false; 1013 1014 if (isTypeLegal(Ty, VT)) 1015 return true; 1016 1017 // If this is a type than can be sign or zero-extended to a basic operation 1018 // go ahead and accept it now. 1019 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 1020 return true; 1021 1022 return false; 1023 } 1024 1025 bool AArch64FastISel::isValueAvailable(const Value *V) const { 1026 if (!isa<Instruction>(V)) 1027 return true; 1028 1029 const auto *I = cast<Instruction>(V); 1030 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 1031 } 1032 1033 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1034 if (Subtarget->isTargetILP32()) 1035 return false; 1036 1037 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1038 if (!ScaleFactor) 1039 return false; 1040 1041 bool ImmediateOffsetNeedsLowering = false; 1042 bool RegisterOffsetNeedsLowering = false; 1043 int64_t Offset = Addr.getOffset(); 1044 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1045 ImmediateOffsetNeedsLowering = true; 1046 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1047 !isUInt<12>(Offset / ScaleFactor)) 1048 ImmediateOffsetNeedsLowering = true; 1049 1050 // Cannot encode an offset register and an immediate offset in the same 1051 // instruction. Fold the immediate offset into the load/store instruction and 1052 // emit an additional add to take care of the offset register. 1053 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1054 RegisterOffsetNeedsLowering = true; 1055 1056 // Cannot encode zero register as base. 1057 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1058 RegisterOffsetNeedsLowering = true; 1059 1060 // If this is a stack pointer and the offset needs to be simplified then put 1061 // the alloca address into a register, set the base type back to register and 1062 // continue. This should almost never happen. 1063 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1064 { 1065 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1066 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 1067 ResultReg) 1068 .addFrameIndex(Addr.getFI()) 1069 .addImm(0) 1070 .addImm(0); 1071 Addr.setKind(Address::RegBase); 1072 Addr.setReg(ResultReg); 1073 } 1074 1075 if (RegisterOffsetNeedsLowering) { 1076 unsigned ResultReg = 0; 1077 if (Addr.getReg()) { 1078 if (Addr.getExtendType() == AArch64_AM::SXTW || 1079 Addr.getExtendType() == AArch64_AM::UXTW ) 1080 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1081 Addr.getOffsetReg(), Addr.getExtendType(), 1082 Addr.getShift()); 1083 else 1084 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1085 Addr.getOffsetReg(), AArch64_AM::LSL, 1086 Addr.getShift()); 1087 } else { 1088 if (Addr.getExtendType() == AArch64_AM::UXTW) 1089 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1090 Addr.getShift(), /*IsZExt=*/true); 1091 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1092 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1093 Addr.getShift(), /*IsZExt=*/false); 1094 else 1095 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1096 Addr.getShift()); 1097 } 1098 if (!ResultReg) 1099 return false; 1100 1101 Addr.setReg(ResultReg); 1102 Addr.setOffsetReg(0); 1103 Addr.setShift(0); 1104 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1105 } 1106 1107 // Since the offset is too large for the load/store instruction get the 1108 // reg+offset into a register. 1109 if (ImmediateOffsetNeedsLowering) { 1110 unsigned ResultReg; 1111 if (Addr.getReg()) 1112 // Try to fold the immediate into the add instruction. 1113 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); 1114 else 1115 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1116 1117 if (!ResultReg) 1118 return false; 1119 Addr.setReg(ResultReg); 1120 Addr.setOffset(0); 1121 } 1122 return true; 1123 } 1124 1125 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1126 const MachineInstrBuilder &MIB, 1127 MachineMemOperand::Flags Flags, 1128 unsigned ScaleFactor, 1129 MachineMemOperand *MMO) { 1130 int64_t Offset = Addr.getOffset() / ScaleFactor; 1131 // Frame base works a bit differently. Handle it separately. 1132 if (Addr.isFIBase()) { 1133 int FI = Addr.getFI(); 1134 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1135 // and alignment should be based on the VT. 1136 MMO = FuncInfo.MF->getMachineMemOperand( 1137 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1138 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1139 // Now add the rest of the operands. 1140 MIB.addFrameIndex(FI).addImm(Offset); 1141 } else { 1142 assert(Addr.isRegBase() && "Unexpected address kind."); 1143 const MCInstrDesc &II = MIB->getDesc(); 1144 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1145 Addr.setReg( 1146 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1147 Addr.setOffsetReg( 1148 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1149 if (Addr.getOffsetReg()) { 1150 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1151 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1152 Addr.getExtendType() == AArch64_AM::SXTX; 1153 MIB.addReg(Addr.getReg()); 1154 MIB.addReg(Addr.getOffsetReg()); 1155 MIB.addImm(IsSigned); 1156 MIB.addImm(Addr.getShift() != 0); 1157 } else 1158 MIB.addReg(Addr.getReg()).addImm(Offset); 1159 } 1160 1161 if (MMO) 1162 MIB.addMemOperand(MMO); 1163 } 1164 1165 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1166 const Value *RHS, bool SetFlags, 1167 bool WantResult, bool IsZExt) { 1168 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1169 bool NeedExtend = false; 1170 switch (RetVT.SimpleTy) { 1171 default: 1172 return 0; 1173 case MVT::i1: 1174 NeedExtend = true; 1175 break; 1176 case MVT::i8: 1177 NeedExtend = true; 1178 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1179 break; 1180 case MVT::i16: 1181 NeedExtend = true; 1182 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1183 break; 1184 case MVT::i32: // fall-through 1185 case MVT::i64: 1186 break; 1187 } 1188 MVT SrcVT = RetVT; 1189 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1190 1191 // Canonicalize immediates to the RHS first. 1192 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1193 std::swap(LHS, RHS); 1194 1195 // Canonicalize mul by power of 2 to the RHS. 1196 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1197 if (isMulPowOf2(LHS)) 1198 std::swap(LHS, RHS); 1199 1200 // Canonicalize shift immediate to the RHS. 1201 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1202 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1203 if (isa<ConstantInt>(SI->getOperand(1))) 1204 if (SI->getOpcode() == Instruction::Shl || 1205 SI->getOpcode() == Instruction::LShr || 1206 SI->getOpcode() == Instruction::AShr ) 1207 std::swap(LHS, RHS); 1208 1209 Register LHSReg = getRegForValue(LHS); 1210 if (!LHSReg) 1211 return 0; 1212 1213 if (NeedExtend) 1214 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1215 1216 unsigned ResultReg = 0; 1217 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1218 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1219 if (C->isNegative()) 1220 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags, 1221 WantResult); 1222 else 1223 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, 1224 WantResult); 1225 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1226 if (C->isNullValue()) 1227 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult); 1228 1229 if (ResultReg) 1230 return ResultReg; 1231 1232 // Only extend the RHS within the instruction if there is a valid extend type. 1233 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1234 isValueAvailable(RHS)) { 1235 Register RHSReg = getRegForValue(RHS); 1236 if (!RHSReg) 1237 return 0; 1238 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0, 1239 SetFlags, WantResult); 1240 } 1241 1242 // Check if the mul can be folded into the instruction. 1243 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1244 if (isMulPowOf2(RHS)) { 1245 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1246 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1247 1248 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1249 if (C->getValue().isPowerOf2()) 1250 std::swap(MulLHS, MulRHS); 1251 1252 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1253 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1254 Register RHSReg = getRegForValue(MulLHS); 1255 if (!RHSReg) 1256 return 0; 1257 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL, 1258 ShiftVal, SetFlags, WantResult); 1259 if (ResultReg) 1260 return ResultReg; 1261 } 1262 } 1263 1264 // Check if the shift can be folded into the instruction. 1265 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1266 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1267 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1268 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1269 switch (SI->getOpcode()) { 1270 default: break; 1271 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1272 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1273 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1274 } 1275 uint64_t ShiftVal = C->getZExtValue(); 1276 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1277 Register RHSReg = getRegForValue(SI->getOperand(0)); 1278 if (!RHSReg) 1279 return 0; 1280 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, 1281 ShiftVal, SetFlags, WantResult); 1282 if (ResultReg) 1283 return ResultReg; 1284 } 1285 } 1286 } 1287 } 1288 1289 Register RHSReg = getRegForValue(RHS); 1290 if (!RHSReg) 1291 return 0; 1292 1293 if (NeedExtend) 1294 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1295 1296 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); 1297 } 1298 1299 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1300 unsigned RHSReg, bool SetFlags, 1301 bool WantResult) { 1302 assert(LHSReg && RHSReg && "Invalid register number."); 1303 1304 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1305 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1306 return 0; 1307 1308 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1309 return 0; 1310 1311 static const unsigned OpcTable[2][2][2] = { 1312 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1313 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1314 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1315 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1316 }; 1317 bool Is64Bit = RetVT == MVT::i64; 1318 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1319 const TargetRegisterClass *RC = 1320 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1321 unsigned ResultReg; 1322 if (WantResult) 1323 ResultReg = createResultReg(RC); 1324 else 1325 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1326 1327 const MCInstrDesc &II = TII.get(Opc); 1328 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1329 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1330 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1331 .addReg(LHSReg) 1332 .addReg(RHSReg); 1333 return ResultReg; 1334 } 1335 1336 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1337 uint64_t Imm, bool SetFlags, 1338 bool WantResult) { 1339 assert(LHSReg && "Invalid register number."); 1340 1341 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1342 return 0; 1343 1344 unsigned ShiftImm; 1345 if (isUInt<12>(Imm)) 1346 ShiftImm = 0; 1347 else if ((Imm & 0xfff000) == Imm) { 1348 ShiftImm = 12; 1349 Imm >>= 12; 1350 } else 1351 return 0; 1352 1353 static const unsigned OpcTable[2][2][2] = { 1354 { { AArch64::SUBWri, AArch64::SUBXri }, 1355 { AArch64::ADDWri, AArch64::ADDXri } }, 1356 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1357 { AArch64::ADDSWri, AArch64::ADDSXri } } 1358 }; 1359 bool Is64Bit = RetVT == MVT::i64; 1360 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1361 const TargetRegisterClass *RC; 1362 if (SetFlags) 1363 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1364 else 1365 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1366 unsigned ResultReg; 1367 if (WantResult) 1368 ResultReg = createResultReg(RC); 1369 else 1370 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1371 1372 const MCInstrDesc &II = TII.get(Opc); 1373 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1374 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1375 .addReg(LHSReg) 1376 .addImm(Imm) 1377 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1378 return ResultReg; 1379 } 1380 1381 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1382 unsigned RHSReg, 1383 AArch64_AM::ShiftExtendType ShiftType, 1384 uint64_t ShiftImm, bool SetFlags, 1385 bool WantResult) { 1386 assert(LHSReg && RHSReg && "Invalid register number."); 1387 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1388 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1389 1390 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1391 return 0; 1392 1393 // Don't deal with undefined shifts. 1394 if (ShiftImm >= RetVT.getSizeInBits()) 1395 return 0; 1396 1397 static const unsigned OpcTable[2][2][2] = { 1398 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1399 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1400 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1401 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1402 }; 1403 bool Is64Bit = RetVT == MVT::i64; 1404 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1405 const TargetRegisterClass *RC = 1406 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1407 unsigned ResultReg; 1408 if (WantResult) 1409 ResultReg = createResultReg(RC); 1410 else 1411 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1412 1413 const MCInstrDesc &II = TII.get(Opc); 1414 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1415 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1416 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1417 .addReg(LHSReg) 1418 .addReg(RHSReg) 1419 .addImm(getShifterImm(ShiftType, ShiftImm)); 1420 return ResultReg; 1421 } 1422 1423 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1424 unsigned RHSReg, 1425 AArch64_AM::ShiftExtendType ExtType, 1426 uint64_t ShiftImm, bool SetFlags, 1427 bool WantResult) { 1428 assert(LHSReg && RHSReg && "Invalid register number."); 1429 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1430 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1431 1432 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1433 return 0; 1434 1435 if (ShiftImm >= 4) 1436 return 0; 1437 1438 static const unsigned OpcTable[2][2][2] = { 1439 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1440 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1441 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1442 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1443 }; 1444 bool Is64Bit = RetVT == MVT::i64; 1445 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1446 const TargetRegisterClass *RC = nullptr; 1447 if (SetFlags) 1448 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1449 else 1450 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1451 unsigned ResultReg; 1452 if (WantResult) 1453 ResultReg = createResultReg(RC); 1454 else 1455 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1456 1457 const MCInstrDesc &II = TII.get(Opc); 1458 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1459 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1460 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1461 .addReg(LHSReg) 1462 .addReg(RHSReg) 1463 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1464 return ResultReg; 1465 } 1466 1467 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1468 Type *Ty = LHS->getType(); 1469 EVT EVT = TLI.getValueType(DL, Ty, true); 1470 if (!EVT.isSimple()) 1471 return false; 1472 MVT VT = EVT.getSimpleVT(); 1473 1474 switch (VT.SimpleTy) { 1475 default: 1476 return false; 1477 case MVT::i1: 1478 case MVT::i8: 1479 case MVT::i16: 1480 case MVT::i32: 1481 case MVT::i64: 1482 return emitICmp(VT, LHS, RHS, IsZExt); 1483 case MVT::f32: 1484 case MVT::f64: 1485 return emitFCmp(VT, LHS, RHS); 1486 } 1487 } 1488 1489 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1490 bool IsZExt) { 1491 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1492 IsZExt) != 0; 1493 } 1494 1495 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { 1496 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, 1497 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1498 } 1499 1500 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1501 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1502 return false; 1503 1504 // Check to see if the 2nd operand is a constant that we can encode directly 1505 // in the compare. 1506 bool UseImm = false; 1507 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1508 if (CFP->isZero() && !CFP->isNegative()) 1509 UseImm = true; 1510 1511 Register LHSReg = getRegForValue(LHS); 1512 if (!LHSReg) 1513 return false; 1514 1515 if (UseImm) { 1516 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1518 .addReg(LHSReg); 1519 return true; 1520 } 1521 1522 Register RHSReg = getRegForValue(RHS); 1523 if (!RHSReg) 1524 return false; 1525 1526 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1528 .addReg(LHSReg) 1529 .addReg(RHSReg); 1530 return true; 1531 } 1532 1533 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1534 bool SetFlags, bool WantResult, bool IsZExt) { 1535 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1536 IsZExt); 1537 } 1538 1539 /// This method is a wrapper to simplify add emission. 1540 /// 1541 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1542 /// that fails, then try to materialize the immediate into a register and use 1543 /// emitAddSub_rr instead. 1544 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { 1545 unsigned ResultReg; 1546 if (Imm < 0) 1547 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm); 1548 else 1549 ResultReg = emitAddSub_ri(true, VT, Op0, Imm); 1550 1551 if (ResultReg) 1552 return ResultReg; 1553 1554 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1555 if (!CReg) 1556 return 0; 1557 1558 ResultReg = emitAddSub_rr(true, VT, Op0, CReg); 1559 return ResultReg; 1560 } 1561 1562 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1563 bool SetFlags, bool WantResult, bool IsZExt) { 1564 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1565 IsZExt); 1566 } 1567 1568 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1569 unsigned RHSReg, bool WantResult) { 1570 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, 1571 /*SetFlags=*/true, WantResult); 1572 } 1573 1574 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1575 unsigned RHSReg, 1576 AArch64_AM::ShiftExtendType ShiftType, 1577 uint64_t ShiftImm, bool WantResult) { 1578 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, 1579 ShiftImm, /*SetFlags=*/true, WantResult); 1580 } 1581 1582 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1583 const Value *LHS, const Value *RHS) { 1584 // Canonicalize immediates to the RHS first. 1585 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1586 std::swap(LHS, RHS); 1587 1588 // Canonicalize mul by power-of-2 to the RHS. 1589 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1590 if (isMulPowOf2(LHS)) 1591 std::swap(LHS, RHS); 1592 1593 // Canonicalize shift immediate to the RHS. 1594 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1595 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1596 if (isa<ConstantInt>(SI->getOperand(1))) 1597 std::swap(LHS, RHS); 1598 1599 Register LHSReg = getRegForValue(LHS); 1600 if (!LHSReg) 1601 return 0; 1602 1603 unsigned ResultReg = 0; 1604 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1605 uint64_t Imm = C->getZExtValue(); 1606 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); 1607 } 1608 if (ResultReg) 1609 return ResultReg; 1610 1611 // Check if the mul can be folded into the instruction. 1612 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1613 if (isMulPowOf2(RHS)) { 1614 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1615 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1616 1617 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1618 if (C->getValue().isPowerOf2()) 1619 std::swap(MulLHS, MulRHS); 1620 1621 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1622 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1623 1624 Register RHSReg = getRegForValue(MulLHS); 1625 if (!RHSReg) 1626 return 0; 1627 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1628 if (ResultReg) 1629 return ResultReg; 1630 } 1631 } 1632 1633 // Check if the shift can be folded into the instruction. 1634 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1635 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1636 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1637 uint64_t ShiftVal = C->getZExtValue(); 1638 Register RHSReg = getRegForValue(SI->getOperand(0)); 1639 if (!RHSReg) 1640 return 0; 1641 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1642 if (ResultReg) 1643 return ResultReg; 1644 } 1645 } 1646 1647 Register RHSReg = getRegForValue(RHS); 1648 if (!RHSReg) 1649 return 0; 1650 1651 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1652 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg); 1653 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1654 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1655 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1656 } 1657 return ResultReg; 1658 } 1659 1660 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1661 unsigned LHSReg, uint64_t Imm) { 1662 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1663 "ISD nodes are not consecutive!"); 1664 static const unsigned OpcTable[3][2] = { 1665 { AArch64::ANDWri, AArch64::ANDXri }, 1666 { AArch64::ORRWri, AArch64::ORRXri }, 1667 { AArch64::EORWri, AArch64::EORXri } 1668 }; 1669 const TargetRegisterClass *RC; 1670 unsigned Opc; 1671 unsigned RegSize; 1672 switch (RetVT.SimpleTy) { 1673 default: 1674 return 0; 1675 case MVT::i1: 1676 case MVT::i8: 1677 case MVT::i16: 1678 case MVT::i32: { 1679 unsigned Idx = ISDOpc - ISD::AND; 1680 Opc = OpcTable[Idx][0]; 1681 RC = &AArch64::GPR32spRegClass; 1682 RegSize = 32; 1683 break; 1684 } 1685 case MVT::i64: 1686 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1687 RC = &AArch64::GPR64spRegClass; 1688 RegSize = 64; 1689 break; 1690 } 1691 1692 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1693 return 0; 1694 1695 Register ResultReg = 1696 fastEmitInst_ri(Opc, RC, LHSReg, 1697 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1698 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1699 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1700 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1701 } 1702 return ResultReg; 1703 } 1704 1705 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1706 unsigned LHSReg, unsigned RHSReg, 1707 uint64_t ShiftImm) { 1708 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1709 "ISD nodes are not consecutive!"); 1710 static const unsigned OpcTable[3][2] = { 1711 { AArch64::ANDWrs, AArch64::ANDXrs }, 1712 { AArch64::ORRWrs, AArch64::ORRXrs }, 1713 { AArch64::EORWrs, AArch64::EORXrs } 1714 }; 1715 1716 // Don't deal with undefined shifts. 1717 if (ShiftImm >= RetVT.getSizeInBits()) 1718 return 0; 1719 1720 const TargetRegisterClass *RC; 1721 unsigned Opc; 1722 switch (RetVT.SimpleTy) { 1723 default: 1724 return 0; 1725 case MVT::i1: 1726 case MVT::i8: 1727 case MVT::i16: 1728 case MVT::i32: 1729 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1730 RC = &AArch64::GPR32RegClass; 1731 break; 1732 case MVT::i64: 1733 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1734 RC = &AArch64::GPR64RegClass; 1735 break; 1736 } 1737 Register ResultReg = 1738 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg, 1739 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1740 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1741 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1742 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1743 } 1744 return ResultReg; 1745 } 1746 1747 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, 1748 uint64_t Imm) { 1749 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm); 1750 } 1751 1752 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1753 bool WantZExt, MachineMemOperand *MMO) { 1754 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1755 return 0; 1756 1757 // Simplify this down to something we can handle. 1758 if (!simplifyAddress(Addr, VT)) 1759 return 0; 1760 1761 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1762 if (!ScaleFactor) 1763 llvm_unreachable("Unexpected value type."); 1764 1765 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1766 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1767 bool UseScaled = true; 1768 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1769 UseScaled = false; 1770 ScaleFactor = 1; 1771 } 1772 1773 static const unsigned GPOpcTable[2][8][4] = { 1774 // Sign-extend. 1775 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1776 AArch64::LDURXi }, 1777 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1778 AArch64::LDURXi }, 1779 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1780 AArch64::LDRXui }, 1781 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1782 AArch64::LDRXui }, 1783 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1784 AArch64::LDRXroX }, 1785 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1786 AArch64::LDRXroX }, 1787 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1788 AArch64::LDRXroW }, 1789 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1790 AArch64::LDRXroW } 1791 }, 1792 // Zero-extend. 1793 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1794 AArch64::LDURXi }, 1795 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1796 AArch64::LDURXi }, 1797 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1798 AArch64::LDRXui }, 1799 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1800 AArch64::LDRXui }, 1801 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1802 AArch64::LDRXroX }, 1803 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1804 AArch64::LDRXroX }, 1805 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1806 AArch64::LDRXroW }, 1807 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1808 AArch64::LDRXroW } 1809 } 1810 }; 1811 1812 static const unsigned FPOpcTable[4][2] = { 1813 { AArch64::LDURSi, AArch64::LDURDi }, 1814 { AArch64::LDRSui, AArch64::LDRDui }, 1815 { AArch64::LDRSroX, AArch64::LDRDroX }, 1816 { AArch64::LDRSroW, AArch64::LDRDroW } 1817 }; 1818 1819 unsigned Opc; 1820 const TargetRegisterClass *RC; 1821 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1822 Addr.getOffsetReg(); 1823 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1824 if (Addr.getExtendType() == AArch64_AM::UXTW || 1825 Addr.getExtendType() == AArch64_AM::SXTW) 1826 Idx++; 1827 1828 bool IsRet64Bit = RetVT == MVT::i64; 1829 switch (VT.SimpleTy) { 1830 default: 1831 llvm_unreachable("Unexpected value type."); 1832 case MVT::i1: // Intentional fall-through. 1833 case MVT::i8: 1834 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1835 RC = (IsRet64Bit && !WantZExt) ? 1836 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1837 break; 1838 case MVT::i16: 1839 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1840 RC = (IsRet64Bit && !WantZExt) ? 1841 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1842 break; 1843 case MVT::i32: 1844 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1845 RC = (IsRet64Bit && !WantZExt) ? 1846 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1847 break; 1848 case MVT::i64: 1849 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1850 RC = &AArch64::GPR64RegClass; 1851 break; 1852 case MVT::f32: 1853 Opc = FPOpcTable[Idx][0]; 1854 RC = &AArch64::FPR32RegClass; 1855 break; 1856 case MVT::f64: 1857 Opc = FPOpcTable[Idx][1]; 1858 RC = &AArch64::FPR64RegClass; 1859 break; 1860 } 1861 1862 // Create the base instruction, then add the operands. 1863 Register ResultReg = createResultReg(RC); 1864 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1865 TII.get(Opc), ResultReg); 1866 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1867 1868 // Loading an i1 requires special handling. 1869 if (VT == MVT::i1) { 1870 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); 1871 assert(ANDReg && "Unexpected AND instruction emission failure."); 1872 ResultReg = ANDReg; 1873 } 1874 1875 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1876 // the 32bit reg to a 64bit reg. 1877 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1878 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 1879 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1880 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1881 .addImm(0) 1882 .addReg(ResultReg, getKillRegState(true)) 1883 .addImm(AArch64::sub_32); 1884 ResultReg = Reg64; 1885 } 1886 return ResultReg; 1887 } 1888 1889 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1890 MVT VT; 1891 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1892 return false; 1893 1894 if (VT.isVector()) 1895 return selectOperator(I, I->getOpcode()); 1896 1897 unsigned ResultReg; 1898 switch (I->getOpcode()) { 1899 default: 1900 llvm_unreachable("Unexpected instruction."); 1901 case Instruction::Add: 1902 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1903 break; 1904 case Instruction::Sub: 1905 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1906 break; 1907 } 1908 if (!ResultReg) 1909 return false; 1910 1911 updateValueMap(I, ResultReg); 1912 return true; 1913 } 1914 1915 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1916 MVT VT; 1917 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1918 return false; 1919 1920 if (VT.isVector()) 1921 return selectOperator(I, I->getOpcode()); 1922 1923 unsigned ResultReg; 1924 switch (I->getOpcode()) { 1925 default: 1926 llvm_unreachable("Unexpected instruction."); 1927 case Instruction::And: 1928 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1929 break; 1930 case Instruction::Or: 1931 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1932 break; 1933 case Instruction::Xor: 1934 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1935 break; 1936 } 1937 if (!ResultReg) 1938 return false; 1939 1940 updateValueMap(I, ResultReg); 1941 return true; 1942 } 1943 1944 bool AArch64FastISel::selectLoad(const Instruction *I) { 1945 MVT VT; 1946 // Verify we have a legal type before going any further. Currently, we handle 1947 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1948 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1949 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1950 cast<LoadInst>(I)->isAtomic()) 1951 return false; 1952 1953 const Value *SV = I->getOperand(0); 1954 if (TLI.supportSwiftError()) { 1955 // Swifterror values can come from either a function parameter with 1956 // swifterror attribute or an alloca with swifterror attribute. 1957 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1958 if (Arg->hasSwiftErrorAttr()) 1959 return false; 1960 } 1961 1962 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1963 if (Alloca->isSwiftError()) 1964 return false; 1965 } 1966 } 1967 1968 // See if we can handle this address. 1969 Address Addr; 1970 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1971 return false; 1972 1973 // Fold the following sign-/zero-extend into the load instruction. 1974 bool WantZExt = true; 1975 MVT RetVT = VT; 1976 const Value *IntExtVal = nullptr; 1977 if (I->hasOneUse()) { 1978 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1979 if (isTypeSupported(ZE->getType(), RetVT)) 1980 IntExtVal = ZE; 1981 else 1982 RetVT = VT; 1983 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1984 if (isTypeSupported(SE->getType(), RetVT)) 1985 IntExtVal = SE; 1986 else 1987 RetVT = VT; 1988 WantZExt = false; 1989 } 1990 } 1991 1992 unsigned ResultReg = 1993 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1994 if (!ResultReg) 1995 return false; 1996 1997 // There are a few different cases we have to handle, because the load or the 1998 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1999 // SelectionDAG. There is also an ordering issue when both instructions are in 2000 // different basic blocks. 2001 // 1.) The load instruction is selected by FastISel, but the integer extend 2002 // not. This usually happens when the integer extend is in a different 2003 // basic block and SelectionDAG took over for that basic block. 2004 // 2.) The load instruction is selected before the integer extend. This only 2005 // happens when the integer extend is in a different basic block. 2006 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2007 // by FastISel. This happens if there are instructions between the load 2008 // and the integer extend that couldn't be selected by FastISel. 2009 if (IntExtVal) { 2010 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2011 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2012 // it when it selects the integer extend. 2013 Register Reg = lookUpRegForValue(IntExtVal); 2014 auto *MI = MRI.getUniqueVRegDef(Reg); 2015 if (!MI) { 2016 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2017 if (WantZExt) { 2018 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2019 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2020 ResultReg = std::prev(I)->getOperand(0).getReg(); 2021 removeDeadCode(I, std::next(I)); 2022 } else 2023 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2024 AArch64::sub_32); 2025 } 2026 updateValueMap(I, ResultReg); 2027 return true; 2028 } 2029 2030 // The integer extend has already been emitted - delete all the instructions 2031 // that have been emitted by the integer extend lowering code and use the 2032 // result from the load instruction directly. 2033 while (MI) { 2034 Reg = 0; 2035 for (auto &Opnd : MI->uses()) { 2036 if (Opnd.isReg()) { 2037 Reg = Opnd.getReg(); 2038 break; 2039 } 2040 } 2041 MachineBasicBlock::iterator I(MI); 2042 removeDeadCode(I, std::next(I)); 2043 MI = nullptr; 2044 if (Reg) 2045 MI = MRI.getUniqueVRegDef(Reg); 2046 } 2047 updateValueMap(IntExtVal, ResultReg); 2048 return true; 2049 } 2050 2051 updateValueMap(I, ResultReg); 2052 return true; 2053 } 2054 2055 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2056 unsigned AddrReg, 2057 MachineMemOperand *MMO) { 2058 unsigned Opc; 2059 switch (VT.SimpleTy) { 2060 default: return false; 2061 case MVT::i8: Opc = AArch64::STLRB; break; 2062 case MVT::i16: Opc = AArch64::STLRH; break; 2063 case MVT::i32: Opc = AArch64::STLRW; break; 2064 case MVT::i64: Opc = AArch64::STLRX; break; 2065 } 2066 2067 const MCInstrDesc &II = TII.get(Opc); 2068 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2069 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2070 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2071 .addReg(SrcReg) 2072 .addReg(AddrReg) 2073 .addMemOperand(MMO); 2074 return true; 2075 } 2076 2077 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2078 MachineMemOperand *MMO) { 2079 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2080 return false; 2081 2082 // Simplify this down to something we can handle. 2083 if (!simplifyAddress(Addr, VT)) 2084 return false; 2085 2086 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2087 if (!ScaleFactor) 2088 llvm_unreachable("Unexpected value type."); 2089 2090 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2091 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2092 bool UseScaled = true; 2093 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2094 UseScaled = false; 2095 ScaleFactor = 1; 2096 } 2097 2098 static const unsigned OpcTable[4][6] = { 2099 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2100 AArch64::STURSi, AArch64::STURDi }, 2101 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2102 AArch64::STRSui, AArch64::STRDui }, 2103 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2104 AArch64::STRSroX, AArch64::STRDroX }, 2105 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2106 AArch64::STRSroW, AArch64::STRDroW } 2107 }; 2108 2109 unsigned Opc; 2110 bool VTIsi1 = false; 2111 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2112 Addr.getOffsetReg(); 2113 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2114 if (Addr.getExtendType() == AArch64_AM::UXTW || 2115 Addr.getExtendType() == AArch64_AM::SXTW) 2116 Idx++; 2117 2118 switch (VT.SimpleTy) { 2119 default: llvm_unreachable("Unexpected value type."); 2120 case MVT::i1: VTIsi1 = true; [[fallthrough]]; 2121 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2122 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2123 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2124 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2125 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2126 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2127 } 2128 2129 // Storing an i1 requires special handling. 2130 if (VTIsi1 && SrcReg != AArch64::WZR) { 2131 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); 2132 assert(ANDReg && "Unexpected AND instruction emission failure."); 2133 SrcReg = ANDReg; 2134 } 2135 // Create the base instruction, then add the operands. 2136 const MCInstrDesc &II = TII.get(Opc); 2137 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2138 MachineInstrBuilder MIB = 2139 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg); 2140 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2141 2142 return true; 2143 } 2144 2145 bool AArch64FastISel::selectStore(const Instruction *I) { 2146 MVT VT; 2147 const Value *Op0 = I->getOperand(0); 2148 // Verify we have a legal type before going any further. Currently, we handle 2149 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2150 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2151 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2152 return false; 2153 2154 const Value *PtrV = I->getOperand(1); 2155 if (TLI.supportSwiftError()) { 2156 // Swifterror values can come from either a function parameter with 2157 // swifterror attribute or an alloca with swifterror attribute. 2158 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2159 if (Arg->hasSwiftErrorAttr()) 2160 return false; 2161 } 2162 2163 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2164 if (Alloca->isSwiftError()) 2165 return false; 2166 } 2167 } 2168 2169 // Get the value to be stored into a register. Use the zero register directly 2170 // when possible to avoid an unnecessary copy and a wasted register. 2171 unsigned SrcReg = 0; 2172 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2173 if (CI->isZero()) 2174 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2175 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2176 if (CF->isZero() && !CF->isNegative()) { 2177 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2178 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2179 } 2180 } 2181 2182 if (!SrcReg) 2183 SrcReg = getRegForValue(Op0); 2184 2185 if (!SrcReg) 2186 return false; 2187 2188 auto *SI = cast<StoreInst>(I); 2189 2190 // Try to emit a STLR for seq_cst/release. 2191 if (SI->isAtomic()) { 2192 AtomicOrdering Ord = SI->getOrdering(); 2193 // The non-atomic instructions are sufficient for relaxed stores. 2194 if (isReleaseOrStronger(Ord)) { 2195 // The STLR addressing mode only supports a base reg; pass that directly. 2196 Register AddrReg = getRegForValue(PtrV); 2197 return emitStoreRelease(VT, SrcReg, AddrReg, 2198 createMachineMemOperandFor(I)); 2199 } 2200 } 2201 2202 // See if we can handle this address. 2203 Address Addr; 2204 if (!computeAddress(PtrV, Addr, Op0->getType())) 2205 return false; 2206 2207 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2208 return false; 2209 return true; 2210 } 2211 2212 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2213 switch (Pred) { 2214 case CmpInst::FCMP_ONE: 2215 case CmpInst::FCMP_UEQ: 2216 default: 2217 // AL is our "false" for now. The other two need more compares. 2218 return AArch64CC::AL; 2219 case CmpInst::ICMP_EQ: 2220 case CmpInst::FCMP_OEQ: 2221 return AArch64CC::EQ; 2222 case CmpInst::ICMP_SGT: 2223 case CmpInst::FCMP_OGT: 2224 return AArch64CC::GT; 2225 case CmpInst::ICMP_SGE: 2226 case CmpInst::FCMP_OGE: 2227 return AArch64CC::GE; 2228 case CmpInst::ICMP_UGT: 2229 case CmpInst::FCMP_UGT: 2230 return AArch64CC::HI; 2231 case CmpInst::FCMP_OLT: 2232 return AArch64CC::MI; 2233 case CmpInst::ICMP_ULE: 2234 case CmpInst::FCMP_OLE: 2235 return AArch64CC::LS; 2236 case CmpInst::FCMP_ORD: 2237 return AArch64CC::VC; 2238 case CmpInst::FCMP_UNO: 2239 return AArch64CC::VS; 2240 case CmpInst::FCMP_UGE: 2241 return AArch64CC::PL; 2242 case CmpInst::ICMP_SLT: 2243 case CmpInst::FCMP_ULT: 2244 return AArch64CC::LT; 2245 case CmpInst::ICMP_SLE: 2246 case CmpInst::FCMP_ULE: 2247 return AArch64CC::LE; 2248 case CmpInst::FCMP_UNE: 2249 case CmpInst::ICMP_NE: 2250 return AArch64CC::NE; 2251 case CmpInst::ICMP_UGE: 2252 return AArch64CC::HS; 2253 case CmpInst::ICMP_ULT: 2254 return AArch64CC::LO; 2255 } 2256 } 2257 2258 /// Try to emit a combined compare-and-branch instruction. 2259 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2260 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2261 // will not be produced, as they are conditional branch instructions that do 2262 // not set flags. 2263 if (FuncInfo.MF->getFunction().hasFnAttribute( 2264 Attribute::SpeculativeLoadHardening)) 2265 return false; 2266 2267 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2268 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2269 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2270 2271 const Value *LHS = CI->getOperand(0); 2272 const Value *RHS = CI->getOperand(1); 2273 2274 MVT VT; 2275 if (!isTypeSupported(LHS->getType(), VT)) 2276 return false; 2277 2278 unsigned BW = VT.getSizeInBits(); 2279 if (BW > 64) 2280 return false; 2281 2282 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2283 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2284 2285 // Try to take advantage of fallthrough opportunities. 2286 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2287 std::swap(TBB, FBB); 2288 Predicate = CmpInst::getInversePredicate(Predicate); 2289 } 2290 2291 int TestBit = -1; 2292 bool IsCmpNE; 2293 switch (Predicate) { 2294 default: 2295 return false; 2296 case CmpInst::ICMP_EQ: 2297 case CmpInst::ICMP_NE: 2298 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2299 std::swap(LHS, RHS); 2300 2301 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2302 return false; 2303 2304 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2305 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2306 const Value *AndLHS = AI->getOperand(0); 2307 const Value *AndRHS = AI->getOperand(1); 2308 2309 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2310 if (C->getValue().isPowerOf2()) 2311 std::swap(AndLHS, AndRHS); 2312 2313 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2314 if (C->getValue().isPowerOf2()) { 2315 TestBit = C->getValue().logBase2(); 2316 LHS = AndLHS; 2317 } 2318 } 2319 2320 if (VT == MVT::i1) 2321 TestBit = 0; 2322 2323 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2324 break; 2325 case CmpInst::ICMP_SLT: 2326 case CmpInst::ICMP_SGE: 2327 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2328 return false; 2329 2330 TestBit = BW - 1; 2331 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2332 break; 2333 case CmpInst::ICMP_SGT: 2334 case CmpInst::ICMP_SLE: 2335 if (!isa<ConstantInt>(RHS)) 2336 return false; 2337 2338 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2339 return false; 2340 2341 TestBit = BW - 1; 2342 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2343 break; 2344 } // end switch 2345 2346 static const unsigned OpcTable[2][2][2] = { 2347 { {AArch64::CBZW, AArch64::CBZX }, 2348 {AArch64::CBNZW, AArch64::CBNZX} }, 2349 { {AArch64::TBZW, AArch64::TBZX }, 2350 {AArch64::TBNZW, AArch64::TBNZX} } 2351 }; 2352 2353 bool IsBitTest = TestBit != -1; 2354 bool Is64Bit = BW == 64; 2355 if (TestBit < 32 && TestBit >= 0) 2356 Is64Bit = false; 2357 2358 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2359 const MCInstrDesc &II = TII.get(Opc); 2360 2361 Register SrcReg = getRegForValue(LHS); 2362 if (!SrcReg) 2363 return false; 2364 2365 if (BW == 64 && !Is64Bit) 2366 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); 2367 2368 if ((BW < 32) && !IsBitTest) 2369 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2370 2371 // Emit the combined compare and branch instruction. 2372 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2373 MachineInstrBuilder MIB = 2374 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 2375 .addReg(SrcReg); 2376 if (IsBitTest) 2377 MIB.addImm(TestBit); 2378 MIB.addMBB(TBB); 2379 2380 finishCondBranch(BI->getParent(), TBB, FBB); 2381 return true; 2382 } 2383 2384 bool AArch64FastISel::selectBranch(const Instruction *I) { 2385 const BranchInst *BI = cast<BranchInst>(I); 2386 if (BI->isUnconditional()) { 2387 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2388 fastEmitBranch(MSucc, BI->getDebugLoc()); 2389 return true; 2390 } 2391 2392 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2393 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2394 2395 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2396 if (CI->hasOneUse() && isValueAvailable(CI)) { 2397 // Try to optimize or fold the cmp. 2398 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2399 switch (Predicate) { 2400 default: 2401 break; 2402 case CmpInst::FCMP_FALSE: 2403 fastEmitBranch(FBB, MIMD.getDL()); 2404 return true; 2405 case CmpInst::FCMP_TRUE: 2406 fastEmitBranch(TBB, MIMD.getDL()); 2407 return true; 2408 } 2409 2410 // Try to emit a combined compare-and-branch first. 2411 if (emitCompareAndBranch(BI)) 2412 return true; 2413 2414 // Try to take advantage of fallthrough opportunities. 2415 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2416 std::swap(TBB, FBB); 2417 Predicate = CmpInst::getInversePredicate(Predicate); 2418 } 2419 2420 // Emit the cmp. 2421 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2422 return false; 2423 2424 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2425 // instruction. 2426 AArch64CC::CondCode CC = getCompareCC(Predicate); 2427 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2428 switch (Predicate) { 2429 default: 2430 break; 2431 case CmpInst::FCMP_UEQ: 2432 ExtraCC = AArch64CC::EQ; 2433 CC = AArch64CC::VS; 2434 break; 2435 case CmpInst::FCMP_ONE: 2436 ExtraCC = AArch64CC::MI; 2437 CC = AArch64CC::GT; 2438 break; 2439 } 2440 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2441 2442 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2443 if (ExtraCC != AArch64CC::AL) { 2444 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2445 .addImm(ExtraCC) 2446 .addMBB(TBB); 2447 } 2448 2449 // Emit the branch. 2450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2451 .addImm(CC) 2452 .addMBB(TBB); 2453 2454 finishCondBranch(BI->getParent(), TBB, FBB); 2455 return true; 2456 } 2457 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2458 uint64_t Imm = CI->getZExtValue(); 2459 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2460 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B)) 2461 .addMBB(Target); 2462 2463 // Obtain the branch probability and add the target to the successor list. 2464 if (FuncInfo.BPI) { 2465 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2466 BI->getParent(), Target->getBasicBlock()); 2467 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2468 } else 2469 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2470 return true; 2471 } else { 2472 AArch64CC::CondCode CC = AArch64CC::NE; 2473 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2474 // Fake request the condition, otherwise the intrinsic might be completely 2475 // optimized away. 2476 Register CondReg = getRegForValue(BI->getCondition()); 2477 if (!CondReg) 2478 return false; 2479 2480 // Emit the branch. 2481 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2482 .addImm(CC) 2483 .addMBB(TBB); 2484 2485 finishCondBranch(BI->getParent(), TBB, FBB); 2486 return true; 2487 } 2488 } 2489 2490 Register CondReg = getRegForValue(BI->getCondition()); 2491 if (CondReg == 0) 2492 return false; 2493 2494 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2495 unsigned Opcode = AArch64::TBNZW; 2496 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2497 std::swap(TBB, FBB); 2498 Opcode = AArch64::TBZW; 2499 } 2500 2501 const MCInstrDesc &II = TII.get(Opcode); 2502 Register ConstrainedCondReg 2503 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2504 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2505 .addReg(ConstrainedCondReg) 2506 .addImm(0) 2507 .addMBB(TBB); 2508 2509 finishCondBranch(BI->getParent(), TBB, FBB); 2510 return true; 2511 } 2512 2513 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2514 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2515 Register AddrReg = getRegForValue(BI->getOperand(0)); 2516 if (AddrReg == 0) 2517 return false; 2518 2519 // Authenticated indirectbr is not implemented yet. 2520 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos")) 2521 return false; 2522 2523 // Emit the indirect branch. 2524 const MCInstrDesc &II = TII.get(AArch64::BR); 2525 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2526 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg); 2527 2528 // Make sure the CFG is up-to-date. 2529 for (const auto *Succ : BI->successors()) 2530 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2531 2532 return true; 2533 } 2534 2535 bool AArch64FastISel::selectCmp(const Instruction *I) { 2536 const CmpInst *CI = cast<CmpInst>(I); 2537 2538 // Vectors of i1 are weird: bail out. 2539 if (CI->getType()->isVectorTy()) 2540 return false; 2541 2542 // Try to optimize or fold the cmp. 2543 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2544 unsigned ResultReg = 0; 2545 switch (Predicate) { 2546 default: 2547 break; 2548 case CmpInst::FCMP_FALSE: 2549 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2550 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 2551 TII.get(TargetOpcode::COPY), ResultReg) 2552 .addReg(AArch64::WZR, getKillRegState(true)); 2553 break; 2554 case CmpInst::FCMP_TRUE: 2555 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2556 break; 2557 } 2558 2559 if (ResultReg) { 2560 updateValueMap(I, ResultReg); 2561 return true; 2562 } 2563 2564 // Emit the cmp. 2565 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2566 return false; 2567 2568 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2569 2570 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2571 // condition codes are inverted, because they are used by CSINC. 2572 static unsigned CondCodeTable[2][2] = { 2573 { AArch64CC::NE, AArch64CC::VC }, 2574 { AArch64CC::PL, AArch64CC::LE } 2575 }; 2576 unsigned *CondCodes = nullptr; 2577 switch (Predicate) { 2578 default: 2579 break; 2580 case CmpInst::FCMP_UEQ: 2581 CondCodes = &CondCodeTable[0][0]; 2582 break; 2583 case CmpInst::FCMP_ONE: 2584 CondCodes = &CondCodeTable[1][0]; 2585 break; 2586 } 2587 2588 if (CondCodes) { 2589 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2591 TmpReg1) 2592 .addReg(AArch64::WZR, getKillRegState(true)) 2593 .addReg(AArch64::WZR, getKillRegState(true)) 2594 .addImm(CondCodes[0]); 2595 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2596 ResultReg) 2597 .addReg(TmpReg1, getKillRegState(true)) 2598 .addReg(AArch64::WZR, getKillRegState(true)) 2599 .addImm(CondCodes[1]); 2600 2601 updateValueMap(I, ResultReg); 2602 return true; 2603 } 2604 2605 // Now set a register based on the comparison. 2606 AArch64CC::CondCode CC = getCompareCC(Predicate); 2607 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2608 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2609 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2610 ResultReg) 2611 .addReg(AArch64::WZR, getKillRegState(true)) 2612 .addReg(AArch64::WZR, getKillRegState(true)) 2613 .addImm(invertedCC); 2614 2615 updateValueMap(I, ResultReg); 2616 return true; 2617 } 2618 2619 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2620 /// value. 2621 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2622 if (!SI->getType()->isIntegerTy(1)) 2623 return false; 2624 2625 const Value *Src1Val, *Src2Val; 2626 unsigned Opc = 0; 2627 bool NeedExtraOp = false; 2628 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2629 if (CI->isOne()) { 2630 Src1Val = SI->getCondition(); 2631 Src2Val = SI->getFalseValue(); 2632 Opc = AArch64::ORRWrr; 2633 } else { 2634 assert(CI->isZero()); 2635 Src1Val = SI->getFalseValue(); 2636 Src2Val = SI->getCondition(); 2637 Opc = AArch64::BICWrr; 2638 } 2639 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2640 if (CI->isOne()) { 2641 Src1Val = SI->getCondition(); 2642 Src2Val = SI->getTrueValue(); 2643 Opc = AArch64::ORRWrr; 2644 NeedExtraOp = true; 2645 } else { 2646 assert(CI->isZero()); 2647 Src1Val = SI->getCondition(); 2648 Src2Val = SI->getTrueValue(); 2649 Opc = AArch64::ANDWrr; 2650 } 2651 } 2652 2653 if (!Opc) 2654 return false; 2655 2656 Register Src1Reg = getRegForValue(Src1Val); 2657 if (!Src1Reg) 2658 return false; 2659 2660 Register Src2Reg = getRegForValue(Src2Val); 2661 if (!Src2Reg) 2662 return false; 2663 2664 if (NeedExtraOp) 2665 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1); 2666 2667 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2668 Src2Reg); 2669 updateValueMap(SI, ResultReg); 2670 return true; 2671 } 2672 2673 bool AArch64FastISel::selectSelect(const Instruction *I) { 2674 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2675 MVT VT; 2676 if (!isTypeSupported(I->getType(), VT)) 2677 return false; 2678 2679 unsigned Opc; 2680 const TargetRegisterClass *RC; 2681 switch (VT.SimpleTy) { 2682 default: 2683 return false; 2684 case MVT::i1: 2685 case MVT::i8: 2686 case MVT::i16: 2687 case MVT::i32: 2688 Opc = AArch64::CSELWr; 2689 RC = &AArch64::GPR32RegClass; 2690 break; 2691 case MVT::i64: 2692 Opc = AArch64::CSELXr; 2693 RC = &AArch64::GPR64RegClass; 2694 break; 2695 case MVT::f32: 2696 Opc = AArch64::FCSELSrrr; 2697 RC = &AArch64::FPR32RegClass; 2698 break; 2699 case MVT::f64: 2700 Opc = AArch64::FCSELDrrr; 2701 RC = &AArch64::FPR64RegClass; 2702 break; 2703 } 2704 2705 const SelectInst *SI = cast<SelectInst>(I); 2706 const Value *Cond = SI->getCondition(); 2707 AArch64CC::CondCode CC = AArch64CC::NE; 2708 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2709 2710 if (optimizeSelect(SI)) 2711 return true; 2712 2713 // Try to pickup the flags, so we don't have to emit another compare. 2714 if (foldXALUIntrinsic(CC, I, Cond)) { 2715 // Fake request the condition to force emission of the XALU intrinsic. 2716 Register CondReg = getRegForValue(Cond); 2717 if (!CondReg) 2718 return false; 2719 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2720 isValueAvailable(Cond)) { 2721 const auto *Cmp = cast<CmpInst>(Cond); 2722 // Try to optimize or fold the cmp. 2723 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2724 const Value *FoldSelect = nullptr; 2725 switch (Predicate) { 2726 default: 2727 break; 2728 case CmpInst::FCMP_FALSE: 2729 FoldSelect = SI->getFalseValue(); 2730 break; 2731 case CmpInst::FCMP_TRUE: 2732 FoldSelect = SI->getTrueValue(); 2733 break; 2734 } 2735 2736 if (FoldSelect) { 2737 Register SrcReg = getRegForValue(FoldSelect); 2738 if (!SrcReg) 2739 return false; 2740 2741 updateValueMap(I, SrcReg); 2742 return true; 2743 } 2744 2745 // Emit the cmp. 2746 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2747 return false; 2748 2749 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2750 CC = getCompareCC(Predicate); 2751 switch (Predicate) { 2752 default: 2753 break; 2754 case CmpInst::FCMP_UEQ: 2755 ExtraCC = AArch64CC::EQ; 2756 CC = AArch64CC::VS; 2757 break; 2758 case CmpInst::FCMP_ONE: 2759 ExtraCC = AArch64CC::MI; 2760 CC = AArch64CC::GT; 2761 break; 2762 } 2763 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2764 } else { 2765 Register CondReg = getRegForValue(Cond); 2766 if (!CondReg) 2767 return false; 2768 2769 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2770 CondReg = constrainOperandRegClass(II, CondReg, 1); 2771 2772 // Emit a TST instruction (ANDS wzr, reg, #imm). 2773 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, 2774 AArch64::WZR) 2775 .addReg(CondReg) 2776 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2777 } 2778 2779 Register Src1Reg = getRegForValue(SI->getTrueValue()); 2780 Register Src2Reg = getRegForValue(SI->getFalseValue()); 2781 2782 if (!Src1Reg || !Src2Reg) 2783 return false; 2784 2785 if (ExtraCC != AArch64CC::AL) 2786 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC); 2787 2788 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC); 2789 updateValueMap(I, ResultReg); 2790 return true; 2791 } 2792 2793 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2794 Value *V = I->getOperand(0); 2795 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2796 return false; 2797 2798 Register Op = getRegForValue(V); 2799 if (Op == 0) 2800 return false; 2801 2802 Register ResultReg = createResultReg(&AArch64::FPR64RegClass); 2803 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr), 2804 ResultReg).addReg(Op); 2805 updateValueMap(I, ResultReg); 2806 return true; 2807 } 2808 2809 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2810 Value *V = I->getOperand(0); 2811 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2812 return false; 2813 2814 Register Op = getRegForValue(V); 2815 if (Op == 0) 2816 return false; 2817 2818 Register ResultReg = createResultReg(&AArch64::FPR32RegClass); 2819 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr), 2820 ResultReg).addReg(Op); 2821 updateValueMap(I, ResultReg); 2822 return true; 2823 } 2824 2825 // FPToUI and FPToSI 2826 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2827 MVT DestVT; 2828 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2829 return false; 2830 2831 Register SrcReg = getRegForValue(I->getOperand(0)); 2832 if (SrcReg == 0) 2833 return false; 2834 2835 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2836 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16) 2837 return false; 2838 2839 unsigned Opc; 2840 if (SrcVT == MVT::f64) { 2841 if (Signed) 2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2843 else 2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2845 } else { 2846 if (Signed) 2847 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2848 else 2849 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2850 } 2851 Register ResultReg = createResultReg( 2852 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2853 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 2854 .addReg(SrcReg); 2855 updateValueMap(I, ResultReg); 2856 return true; 2857 } 2858 2859 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2860 MVT DestVT; 2861 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2862 return false; 2863 // Let regular ISEL handle FP16 2864 if (DestVT == MVT::f16 || DestVT == MVT::bf16) 2865 return false; 2866 2867 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2868 "Unexpected value type."); 2869 2870 Register SrcReg = getRegForValue(I->getOperand(0)); 2871 if (!SrcReg) 2872 return false; 2873 2874 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2875 2876 // Handle sign-extension. 2877 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2878 SrcReg = 2879 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2880 if (!SrcReg) 2881 return false; 2882 } 2883 2884 unsigned Opc; 2885 if (SrcVT == MVT::i64) { 2886 if (Signed) 2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2888 else 2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2890 } else { 2891 if (Signed) 2892 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2893 else 2894 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2895 } 2896 2897 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg); 2898 updateValueMap(I, ResultReg); 2899 return true; 2900 } 2901 2902 bool AArch64FastISel::fastLowerArguments() { 2903 if (!FuncInfo.CanLowerReturn) 2904 return false; 2905 2906 const Function *F = FuncInfo.Fn; 2907 if (F->isVarArg()) 2908 return false; 2909 2910 CallingConv::ID CC = F->getCallingConv(); 2911 if (CC != CallingConv::C && CC != CallingConv::Swift) 2912 return false; 2913 2914 if (Subtarget->hasCustomCallingConv()) 2915 return false; 2916 2917 // Only handle simple cases of up to 8 GPR and FPR each. 2918 unsigned GPRCnt = 0; 2919 unsigned FPRCnt = 0; 2920 for (auto const &Arg : F->args()) { 2921 if (Arg.hasAttribute(Attribute::ByVal) || 2922 Arg.hasAttribute(Attribute::InReg) || 2923 Arg.hasAttribute(Attribute::StructRet) || 2924 Arg.hasAttribute(Attribute::SwiftSelf) || 2925 Arg.hasAttribute(Attribute::SwiftAsync) || 2926 Arg.hasAttribute(Attribute::SwiftError) || 2927 Arg.hasAttribute(Attribute::Nest)) 2928 return false; 2929 2930 Type *ArgTy = Arg.getType(); 2931 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2932 return false; 2933 2934 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2935 if (!ArgVT.isSimple()) 2936 return false; 2937 2938 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2939 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2940 return false; 2941 2942 if (VT.isVector() && 2943 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2944 return false; 2945 2946 if (VT >= MVT::i1 && VT <= MVT::i64) 2947 ++GPRCnt; 2948 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2949 VT.is128BitVector()) 2950 ++FPRCnt; 2951 else 2952 return false; 2953 2954 if (GPRCnt > 8 || FPRCnt > 8) 2955 return false; 2956 } 2957 2958 static const MCPhysReg Registers[6][8] = { 2959 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2960 AArch64::W5, AArch64::W6, AArch64::W7 }, 2961 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2962 AArch64::X5, AArch64::X6, AArch64::X7 }, 2963 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2964 AArch64::H5, AArch64::H6, AArch64::H7 }, 2965 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2966 AArch64::S5, AArch64::S6, AArch64::S7 }, 2967 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2968 AArch64::D5, AArch64::D6, AArch64::D7 }, 2969 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2970 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2971 }; 2972 2973 unsigned GPRIdx = 0; 2974 unsigned FPRIdx = 0; 2975 for (auto const &Arg : F->args()) { 2976 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2977 unsigned SrcReg; 2978 const TargetRegisterClass *RC; 2979 if (VT >= MVT::i1 && VT <= MVT::i32) { 2980 SrcReg = Registers[0][GPRIdx++]; 2981 RC = &AArch64::GPR32RegClass; 2982 VT = MVT::i32; 2983 } else if (VT == MVT::i64) { 2984 SrcReg = Registers[1][GPRIdx++]; 2985 RC = &AArch64::GPR64RegClass; 2986 } else if (VT == MVT::f16 || VT == MVT::bf16) { 2987 SrcReg = Registers[2][FPRIdx++]; 2988 RC = &AArch64::FPR16RegClass; 2989 } else if (VT == MVT::f32) { 2990 SrcReg = Registers[3][FPRIdx++]; 2991 RC = &AArch64::FPR32RegClass; 2992 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2993 SrcReg = Registers[4][FPRIdx++]; 2994 RC = &AArch64::FPR64RegClass; 2995 } else if (VT.is128BitVector()) { 2996 SrcReg = Registers[5][FPRIdx++]; 2997 RC = &AArch64::FPR128RegClass; 2998 } else 2999 llvm_unreachable("Unexpected value type."); 3000 3001 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 3002 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 3003 // Without this, EmitLiveInCopies may eliminate the livein if its only 3004 // use is a bitcast (which isn't turned into an instruction). 3005 Register ResultReg = createResultReg(RC); 3006 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3007 TII.get(TargetOpcode::COPY), ResultReg) 3008 .addReg(DstReg, getKillRegState(true)); 3009 updateValueMap(&Arg, ResultReg); 3010 } 3011 return true; 3012 } 3013 3014 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3015 SmallVectorImpl<MVT> &OutVTs, 3016 unsigned &NumBytes) { 3017 CallingConv::ID CC = CLI.CallConv; 3018 SmallVector<CCValAssign, 16> ArgLocs; 3019 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3020 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3021 3022 // Get a count of how many bytes are to be pushed on the stack. 3023 NumBytes = CCInfo.getStackSize(); 3024 3025 // Issue CALLSEQ_START 3026 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown)) 3028 .addImm(NumBytes).addImm(0); 3029 3030 // Process the args. 3031 for (CCValAssign &VA : ArgLocs) { 3032 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3033 MVT ArgVT = OutVTs[VA.getValNo()]; 3034 3035 Register ArgReg = getRegForValue(ArgVal); 3036 if (!ArgReg) 3037 return false; 3038 3039 // Handle arg promotion: SExt, ZExt, AExt. 3040 switch (VA.getLocInfo()) { 3041 case CCValAssign::Full: 3042 break; 3043 case CCValAssign::SExt: { 3044 MVT DestVT = VA.getLocVT(); 3045 MVT SrcVT = ArgVT; 3046 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3047 if (!ArgReg) 3048 return false; 3049 break; 3050 } 3051 case CCValAssign::AExt: 3052 // Intentional fall-through. 3053 case CCValAssign::ZExt: { 3054 MVT DestVT = VA.getLocVT(); 3055 MVT SrcVT = ArgVT; 3056 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3057 if (!ArgReg) 3058 return false; 3059 break; 3060 } 3061 default: 3062 llvm_unreachable("Unknown arg promotion!"); 3063 } 3064 3065 // Now copy/store arg to correct locations. 3066 if (VA.isRegLoc() && !VA.needsCustom()) { 3067 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3068 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3069 CLI.OutRegs.push_back(VA.getLocReg()); 3070 } else if (VA.needsCustom()) { 3071 // FIXME: Handle custom args. 3072 return false; 3073 } else { 3074 assert(VA.isMemLoc() && "Assuming store on stack."); 3075 3076 // Don't emit stores for undef values. 3077 if (isa<UndefValue>(ArgVal)) 3078 continue; 3079 3080 // Need to store on the stack. 3081 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3082 3083 unsigned BEAlign = 0; 3084 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3085 BEAlign = 8 - ArgSize; 3086 3087 Address Addr; 3088 Addr.setKind(Address::RegBase); 3089 Addr.setReg(AArch64::SP); 3090 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3091 3092 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3093 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3094 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3095 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3096 3097 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3098 return false; 3099 } 3100 } 3101 return true; 3102 } 3103 3104 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) { 3105 CallingConv::ID CC = CLI.CallConv; 3106 3107 // Issue CALLSEQ_END 3108 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3109 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp)) 3110 .addImm(NumBytes).addImm(0); 3111 3112 // Now the return values. 3113 SmallVector<CCValAssign, 16> RVLocs; 3114 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3115 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC)); 3116 3117 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy); 3118 for (unsigned i = 0; i != RVLocs.size(); ++i) { 3119 CCValAssign &VA = RVLocs[i]; 3120 MVT CopyVT = VA.getValVT(); 3121 unsigned CopyReg = ResultReg + i; 3122 3123 // TODO: Handle big-endian results 3124 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3125 return false; 3126 3127 // Copy result out of their specified physreg. 3128 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 3129 CopyReg) 3130 .addReg(VA.getLocReg()); 3131 CLI.InRegs.push_back(VA.getLocReg()); 3132 } 3133 3134 CLI.ResultReg = ResultReg; 3135 CLI.NumResultRegs = RVLocs.size(); 3136 3137 return true; 3138 } 3139 3140 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3141 CallingConv::ID CC = CLI.CallConv; 3142 bool IsTailCall = CLI.IsTailCall; 3143 bool IsVarArg = CLI.IsVarArg; 3144 const Value *Callee = CLI.Callee; 3145 MCSymbol *Symbol = CLI.Symbol; 3146 3147 if (!Callee && !Symbol) 3148 return false; 3149 3150 // Allow SelectionDAG isel to handle calls to functions like setjmp that need 3151 // a bti instruction following the call. 3152 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && 3153 !Subtarget->noBTIAtReturnTwice() && 3154 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 3155 return false; 3156 3157 // Allow SelectionDAG isel to handle indirect calls with KCFI checks. 3158 if (CLI.CB && CLI.CB->isIndirectCall() && 3159 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi)) 3160 return false; 3161 3162 // Allow SelectionDAG isel to handle tail calls. 3163 if (IsTailCall) 3164 return false; 3165 3166 // FIXME: we could and should support this, but for now correctness at -O0 is 3167 // more important. 3168 if (Subtarget->isTargetILP32()) 3169 return false; 3170 3171 CodeModel::Model CM = TM.getCodeModel(); 3172 // Only support the small-addressing and large code models. 3173 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3174 return false; 3175 3176 // FIXME: Add large code model support for ELF. 3177 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3178 return false; 3179 3180 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind 3181 // attribute. Check "RtLibUseGOT" instead. 3182 if (MF->getFunction().getParent()->getRtLibUseGOT()) 3183 return false; 3184 3185 // Let SDISel handle vararg functions. 3186 if (IsVarArg) 3187 return false; 3188 3189 if (Subtarget->isWindowsArm64EC()) 3190 return false; 3191 3192 for (auto Flag : CLI.OutFlags) 3193 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3194 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) 3195 return false; 3196 3197 // Set up the argument vectors. 3198 SmallVector<MVT, 16> OutVTs; 3199 OutVTs.reserve(CLI.OutVals.size()); 3200 3201 for (auto *Val : CLI.OutVals) { 3202 MVT VT; 3203 if (!isTypeLegal(Val->getType(), VT) && 3204 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3205 return false; 3206 3207 // We don't handle vector parameters yet. 3208 if (VT.isVector() || VT.getSizeInBits() > 64) 3209 return false; 3210 3211 OutVTs.push_back(VT); 3212 } 3213 3214 Address Addr; 3215 if (Callee && !computeCallAddress(Callee, Addr)) 3216 return false; 3217 3218 // The weak function target may be zero; in that case we must use indirect 3219 // addressing via a stub on windows as it may be out of range for a 3220 // PC-relative jump. 3221 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3222 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3223 return false; 3224 3225 // Handle the arguments now that we've gotten them. 3226 unsigned NumBytes; 3227 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3228 return false; 3229 3230 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3231 if (RegInfo->isAnyArgRegReserved(*MF)) 3232 RegInfo->emitReservedArgRegCallError(*MF); 3233 3234 // Issue the call. 3235 MachineInstrBuilder MIB; 3236 if (Subtarget->useSmallAddressing()) { 3237 const MCInstrDesc &II = 3238 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3239 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II); 3240 if (Symbol) 3241 MIB.addSym(Symbol, 0); 3242 else if (Addr.getGlobalValue()) 3243 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3244 else if (Addr.getReg()) { 3245 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3246 MIB.addReg(Reg); 3247 } else 3248 return false; 3249 } else { 3250 unsigned CallReg = 0; 3251 if (Symbol) { 3252 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3253 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 3254 ADRPReg) 3255 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3256 3257 CallReg = createResultReg(&AArch64::GPR64RegClass); 3258 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3259 TII.get(AArch64::LDRXui), CallReg) 3260 .addReg(ADRPReg) 3261 .addSym(Symbol, 3262 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3263 } else if (Addr.getGlobalValue()) 3264 CallReg = materializeGV(Addr.getGlobalValue()); 3265 else if (Addr.getReg()) 3266 CallReg = Addr.getReg(); 3267 3268 if (!CallReg) 3269 return false; 3270 3271 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3272 CallReg = constrainOperandRegClass(II, CallReg, 0); 3273 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg); 3274 } 3275 3276 // Add implicit physical register uses to the call. 3277 for (auto Reg : CLI.OutRegs) 3278 MIB.addReg(Reg, RegState::Implicit); 3279 3280 // Add a register mask with the call-preserved registers. 3281 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3282 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3283 3284 CLI.Call = MIB; 3285 3286 // Finish off the call including any return values. 3287 return finishCall(CLI, NumBytes); 3288 } 3289 3290 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) { 3291 if (Alignment) 3292 return Len / Alignment->value() <= 4; 3293 else 3294 return Len < 32; 3295 } 3296 3297 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3298 uint64_t Len, MaybeAlign Alignment) { 3299 // Make sure we don't bloat code by inlining very large memcpy's. 3300 if (!isMemCpySmall(Len, Alignment)) 3301 return false; 3302 3303 int64_t UnscaledOffset = 0; 3304 Address OrigDest = Dest; 3305 Address OrigSrc = Src; 3306 3307 while (Len) { 3308 MVT VT; 3309 if (!Alignment || *Alignment >= 8) { 3310 if (Len >= 8) 3311 VT = MVT::i64; 3312 else if (Len >= 4) 3313 VT = MVT::i32; 3314 else if (Len >= 2) 3315 VT = MVT::i16; 3316 else { 3317 VT = MVT::i8; 3318 } 3319 } else { 3320 assert(Alignment && "Alignment is set in this branch"); 3321 // Bound based on alignment. 3322 if (Len >= 4 && *Alignment == 4) 3323 VT = MVT::i32; 3324 else if (Len >= 2 && *Alignment == 2) 3325 VT = MVT::i16; 3326 else { 3327 VT = MVT::i8; 3328 } 3329 } 3330 3331 unsigned ResultReg = emitLoad(VT, VT, Src); 3332 if (!ResultReg) 3333 return false; 3334 3335 if (!emitStore(VT, ResultReg, Dest)) 3336 return false; 3337 3338 int64_t Size = VT.getSizeInBits() / 8; 3339 Len -= Size; 3340 UnscaledOffset += Size; 3341 3342 // We need to recompute the unscaled offset for each iteration. 3343 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3344 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3345 } 3346 3347 return true; 3348 } 3349 3350 /// Check if it is possible to fold the condition from the XALU intrinsic 3351 /// into the user. The condition code will only be updated on success. 3352 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3353 const Instruction *I, 3354 const Value *Cond) { 3355 if (!isa<ExtractValueInst>(Cond)) 3356 return false; 3357 3358 const auto *EV = cast<ExtractValueInst>(Cond); 3359 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3360 return false; 3361 3362 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3363 MVT RetVT; 3364 const Function *Callee = II->getCalledFunction(); 3365 Type *RetTy = 3366 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3367 if (!isTypeLegal(RetTy, RetVT)) 3368 return false; 3369 3370 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3371 return false; 3372 3373 const Value *LHS = II->getArgOperand(0); 3374 const Value *RHS = II->getArgOperand(1); 3375 3376 // Canonicalize immediate to the RHS. 3377 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3378 std::swap(LHS, RHS); 3379 3380 // Simplify multiplies. 3381 Intrinsic::ID IID = II->getIntrinsicID(); 3382 switch (IID) { 3383 default: 3384 break; 3385 case Intrinsic::smul_with_overflow: 3386 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3387 if (C->getValue() == 2) 3388 IID = Intrinsic::sadd_with_overflow; 3389 break; 3390 case Intrinsic::umul_with_overflow: 3391 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3392 if (C->getValue() == 2) 3393 IID = Intrinsic::uadd_with_overflow; 3394 break; 3395 } 3396 3397 AArch64CC::CondCode TmpCC; 3398 switch (IID) { 3399 default: 3400 return false; 3401 case Intrinsic::sadd_with_overflow: 3402 case Intrinsic::ssub_with_overflow: 3403 TmpCC = AArch64CC::VS; 3404 break; 3405 case Intrinsic::uadd_with_overflow: 3406 TmpCC = AArch64CC::HS; 3407 break; 3408 case Intrinsic::usub_with_overflow: 3409 TmpCC = AArch64CC::LO; 3410 break; 3411 case Intrinsic::smul_with_overflow: 3412 case Intrinsic::umul_with_overflow: 3413 TmpCC = AArch64CC::NE; 3414 break; 3415 } 3416 3417 // Check if both instructions are in the same basic block. 3418 if (!isValueAvailable(II)) 3419 return false; 3420 3421 // Make sure nothing is in the way 3422 BasicBlock::const_iterator Start(I); 3423 BasicBlock::const_iterator End(II); 3424 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3425 // We only expect extractvalue instructions between the intrinsic and the 3426 // instruction to be selected. 3427 if (!isa<ExtractValueInst>(Itr)) 3428 return false; 3429 3430 // Check that the extractvalue operand comes from the intrinsic. 3431 const auto *EVI = cast<ExtractValueInst>(Itr); 3432 if (EVI->getAggregateOperand() != II) 3433 return false; 3434 } 3435 3436 CC = TmpCC; 3437 return true; 3438 } 3439 3440 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3441 // FIXME: Handle more intrinsics. 3442 switch (II->getIntrinsicID()) { 3443 default: return false; 3444 case Intrinsic::frameaddress: { 3445 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3446 MFI.setFrameAddressIsTaken(true); 3447 3448 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3449 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3450 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3451 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3452 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3453 // Recursively load frame address 3454 // ldr x0, [fp] 3455 // ldr x0, [x0] 3456 // ldr x0, [x0] 3457 // ... 3458 unsigned DestReg; 3459 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3460 while (Depth--) { 3461 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3462 SrcReg, 0); 3463 assert(DestReg && "Unexpected LDR instruction emission failure."); 3464 SrcReg = DestReg; 3465 } 3466 3467 updateValueMap(II, SrcReg); 3468 return true; 3469 } 3470 case Intrinsic::sponentry: { 3471 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3472 3473 // SP = FP + Fixed Object + 16 3474 int FI = MFI.CreateFixedObject(4, 0, false); 3475 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3477 TII.get(AArch64::ADDXri), ResultReg) 3478 .addFrameIndex(FI) 3479 .addImm(0) 3480 .addImm(0); 3481 3482 updateValueMap(II, ResultReg); 3483 return true; 3484 } 3485 case Intrinsic::memcpy: 3486 case Intrinsic::memmove: { 3487 const auto *MTI = cast<MemTransferInst>(II); 3488 // Don't handle volatile. 3489 if (MTI->isVolatile()) 3490 return false; 3491 3492 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3493 // we would emit dead code because we don't currently handle memmoves. 3494 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3495 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3496 // Small memcpy's are common enough that we want to do them without a call 3497 // if possible. 3498 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3499 MaybeAlign Alignment; 3500 if (MTI->getDestAlign() || MTI->getSourceAlign()) 3501 Alignment = std::min(MTI->getDestAlign().valueOrOne(), 3502 MTI->getSourceAlign().valueOrOne()); 3503 if (isMemCpySmall(Len, Alignment)) { 3504 Address Dest, Src; 3505 if (!computeAddress(MTI->getRawDest(), Dest) || 3506 !computeAddress(MTI->getRawSource(), Src)) 3507 return false; 3508 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3509 return true; 3510 } 3511 } 3512 3513 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3514 return false; 3515 3516 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3517 // Fast instruction selection doesn't support the special 3518 // address spaces. 3519 return false; 3520 3521 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3522 return lowerCallTo(II, IntrMemName, II->arg_size() - 1); 3523 } 3524 case Intrinsic::memset: { 3525 const MemSetInst *MSI = cast<MemSetInst>(II); 3526 // Don't handle volatile. 3527 if (MSI->isVolatile()) 3528 return false; 3529 3530 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3531 return false; 3532 3533 if (MSI->getDestAddressSpace() > 255) 3534 // Fast instruction selection doesn't support the special 3535 // address spaces. 3536 return false; 3537 3538 return lowerCallTo(II, "memset", II->arg_size() - 1); 3539 } 3540 case Intrinsic::sin: 3541 case Intrinsic::cos: 3542 case Intrinsic::tan: 3543 case Intrinsic::pow: { 3544 MVT RetVT; 3545 if (!isTypeLegal(II->getType(), RetVT)) 3546 return false; 3547 3548 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3549 return false; 3550 3551 static const RTLIB::Libcall LibCallTable[4][2] = { 3552 {RTLIB::SIN_F32, RTLIB::SIN_F64}, 3553 {RTLIB::COS_F32, RTLIB::COS_F64}, 3554 {RTLIB::TAN_F32, RTLIB::TAN_F64}, 3555 {RTLIB::POW_F32, RTLIB::POW_F64}}; 3556 RTLIB::Libcall LC; 3557 bool Is64Bit = RetVT == MVT::f64; 3558 switch (II->getIntrinsicID()) { 3559 default: 3560 llvm_unreachable("Unexpected intrinsic."); 3561 case Intrinsic::sin: 3562 LC = LibCallTable[0][Is64Bit]; 3563 break; 3564 case Intrinsic::cos: 3565 LC = LibCallTable[1][Is64Bit]; 3566 break; 3567 case Intrinsic::tan: 3568 LC = LibCallTable[2][Is64Bit]; 3569 break; 3570 case Intrinsic::pow: 3571 LC = LibCallTable[3][Is64Bit]; 3572 break; 3573 } 3574 3575 ArgListTy Args; 3576 Args.reserve(II->arg_size()); 3577 3578 // Populate the argument list. 3579 for (auto &Arg : II->args()) { 3580 ArgListEntry Entry; 3581 Entry.Val = Arg; 3582 Entry.Ty = Arg->getType(); 3583 Args.push_back(Entry); 3584 } 3585 3586 CallLoweringInfo CLI; 3587 MCContext &Ctx = MF->getContext(); 3588 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3589 TLI.getLibcallName(LC), std::move(Args)); 3590 if (!lowerCallTo(CLI)) 3591 return false; 3592 updateValueMap(II, CLI.ResultReg); 3593 return true; 3594 } 3595 case Intrinsic::fabs: { 3596 MVT VT; 3597 if (!isTypeLegal(II->getType(), VT)) 3598 return false; 3599 3600 unsigned Opc; 3601 switch (VT.SimpleTy) { 3602 default: 3603 return false; 3604 case MVT::f32: 3605 Opc = AArch64::FABSSr; 3606 break; 3607 case MVT::f64: 3608 Opc = AArch64::FABSDr; 3609 break; 3610 } 3611 Register SrcReg = getRegForValue(II->getOperand(0)); 3612 if (!SrcReg) 3613 return false; 3614 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3615 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 3616 .addReg(SrcReg); 3617 updateValueMap(II, ResultReg); 3618 return true; 3619 } 3620 case Intrinsic::trap: 3621 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3622 .addImm(1); 3623 return true; 3624 case Intrinsic::debugtrap: 3625 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3626 .addImm(0xF000); 3627 return true; 3628 3629 case Intrinsic::sqrt: { 3630 Type *RetTy = II->getCalledFunction()->getReturnType(); 3631 3632 MVT VT; 3633 if (!isTypeLegal(RetTy, VT)) 3634 return false; 3635 3636 Register Op0Reg = getRegForValue(II->getOperand(0)); 3637 if (!Op0Reg) 3638 return false; 3639 3640 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg); 3641 if (!ResultReg) 3642 return false; 3643 3644 updateValueMap(II, ResultReg); 3645 return true; 3646 } 3647 case Intrinsic::sadd_with_overflow: 3648 case Intrinsic::uadd_with_overflow: 3649 case Intrinsic::ssub_with_overflow: 3650 case Intrinsic::usub_with_overflow: 3651 case Intrinsic::smul_with_overflow: 3652 case Intrinsic::umul_with_overflow: { 3653 // This implements the basic lowering of the xalu with overflow intrinsics. 3654 const Function *Callee = II->getCalledFunction(); 3655 auto *Ty = cast<StructType>(Callee->getReturnType()); 3656 Type *RetTy = Ty->getTypeAtIndex(0U); 3657 3658 MVT VT; 3659 if (!isTypeLegal(RetTy, VT)) 3660 return false; 3661 3662 if (VT != MVT::i32 && VT != MVT::i64) 3663 return false; 3664 3665 const Value *LHS = II->getArgOperand(0); 3666 const Value *RHS = II->getArgOperand(1); 3667 // Canonicalize immediate to the RHS. 3668 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3669 std::swap(LHS, RHS); 3670 3671 // Simplify multiplies. 3672 Intrinsic::ID IID = II->getIntrinsicID(); 3673 switch (IID) { 3674 default: 3675 break; 3676 case Intrinsic::smul_with_overflow: 3677 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3678 if (C->getValue() == 2) { 3679 IID = Intrinsic::sadd_with_overflow; 3680 RHS = LHS; 3681 } 3682 break; 3683 case Intrinsic::umul_with_overflow: 3684 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3685 if (C->getValue() == 2) { 3686 IID = Intrinsic::uadd_with_overflow; 3687 RHS = LHS; 3688 } 3689 break; 3690 } 3691 3692 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3693 AArch64CC::CondCode CC = AArch64CC::Invalid; 3694 switch (IID) { 3695 default: llvm_unreachable("Unexpected intrinsic!"); 3696 case Intrinsic::sadd_with_overflow: 3697 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3698 CC = AArch64CC::VS; 3699 break; 3700 case Intrinsic::uadd_with_overflow: 3701 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3702 CC = AArch64CC::HS; 3703 break; 3704 case Intrinsic::ssub_with_overflow: 3705 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3706 CC = AArch64CC::VS; 3707 break; 3708 case Intrinsic::usub_with_overflow: 3709 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3710 CC = AArch64CC::LO; 3711 break; 3712 case Intrinsic::smul_with_overflow: { 3713 CC = AArch64CC::NE; 3714 Register LHSReg = getRegForValue(LHS); 3715 if (!LHSReg) 3716 return false; 3717 3718 Register RHSReg = getRegForValue(RHS); 3719 if (!RHSReg) 3720 return false; 3721 3722 if (VT == MVT::i32) { 3723 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg); 3724 Register MulSubReg = 3725 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3726 // cmp xreg, wreg, sxtw 3727 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg, 3728 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, 3729 /*WantResult=*/false); 3730 MulReg = MulSubReg; 3731 } else { 3732 assert(VT == MVT::i64 && "Unexpected value type."); 3733 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3734 // reused in the next instruction. 3735 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3736 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg); 3737 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63, 3738 /*WantResult=*/false); 3739 } 3740 break; 3741 } 3742 case Intrinsic::umul_with_overflow: { 3743 CC = AArch64CC::NE; 3744 Register LHSReg = getRegForValue(LHS); 3745 if (!LHSReg) 3746 return false; 3747 3748 Register RHSReg = getRegForValue(RHS); 3749 if (!RHSReg) 3750 return false; 3751 3752 if (VT == MVT::i32) { 3753 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg); 3754 // tst xreg, #0xffffffff00000000 3755 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3756 TII.get(AArch64::ANDSXri), AArch64::XZR) 3757 .addReg(MulReg) 3758 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64)); 3759 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3760 } else { 3761 assert(VT == MVT::i64 && "Unexpected value type."); 3762 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3763 // reused in the next instruction. 3764 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3765 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg); 3766 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false); 3767 } 3768 break; 3769 } 3770 } 3771 3772 if (MulReg) { 3773 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3774 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3775 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3776 } 3777 3778 if (!ResultReg1) 3779 return false; 3780 3781 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3782 AArch64::WZR, AArch64::WZR, 3783 getInvertedCondCode(CC)); 3784 (void)ResultReg2; 3785 assert((ResultReg1 + 1) == ResultReg2 && 3786 "Nonconsecutive result registers."); 3787 updateValueMap(II, ResultReg1, 2); 3788 return true; 3789 } 3790 case Intrinsic::aarch64_crc32b: 3791 case Intrinsic::aarch64_crc32h: 3792 case Intrinsic::aarch64_crc32w: 3793 case Intrinsic::aarch64_crc32x: 3794 case Intrinsic::aarch64_crc32cb: 3795 case Intrinsic::aarch64_crc32ch: 3796 case Intrinsic::aarch64_crc32cw: 3797 case Intrinsic::aarch64_crc32cx: { 3798 if (!Subtarget->hasCRC()) 3799 return false; 3800 3801 unsigned Opc; 3802 switch (II->getIntrinsicID()) { 3803 default: 3804 llvm_unreachable("Unexpected intrinsic!"); 3805 case Intrinsic::aarch64_crc32b: 3806 Opc = AArch64::CRC32Brr; 3807 break; 3808 case Intrinsic::aarch64_crc32h: 3809 Opc = AArch64::CRC32Hrr; 3810 break; 3811 case Intrinsic::aarch64_crc32w: 3812 Opc = AArch64::CRC32Wrr; 3813 break; 3814 case Intrinsic::aarch64_crc32x: 3815 Opc = AArch64::CRC32Xrr; 3816 break; 3817 case Intrinsic::aarch64_crc32cb: 3818 Opc = AArch64::CRC32CBrr; 3819 break; 3820 case Intrinsic::aarch64_crc32ch: 3821 Opc = AArch64::CRC32CHrr; 3822 break; 3823 case Intrinsic::aarch64_crc32cw: 3824 Opc = AArch64::CRC32CWrr; 3825 break; 3826 case Intrinsic::aarch64_crc32cx: 3827 Opc = AArch64::CRC32CXrr; 3828 break; 3829 } 3830 3831 Register LHSReg = getRegForValue(II->getArgOperand(0)); 3832 Register RHSReg = getRegForValue(II->getArgOperand(1)); 3833 if (!LHSReg || !RHSReg) 3834 return false; 3835 3836 Register ResultReg = 3837 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg); 3838 updateValueMap(II, ResultReg); 3839 return true; 3840 } 3841 } 3842 return false; 3843 } 3844 3845 bool AArch64FastISel::selectRet(const Instruction *I) { 3846 const ReturnInst *Ret = cast<ReturnInst>(I); 3847 const Function &F = *I->getParent()->getParent(); 3848 3849 if (!FuncInfo.CanLowerReturn) 3850 return false; 3851 3852 if (F.isVarArg()) 3853 return false; 3854 3855 if (TLI.supportSwiftError() && 3856 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3857 return false; 3858 3859 if (TLI.supportSplitCSR(FuncInfo.MF)) 3860 return false; 3861 3862 // Build a list of return value registers. 3863 SmallVector<unsigned, 4> RetRegs; 3864 3865 if (Ret->getNumOperands() > 0) { 3866 CallingConv::ID CC = F.getCallingConv(); 3867 SmallVector<ISD::OutputArg, 4> Outs; 3868 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3869 3870 // Analyze operands of the call, assigning locations to each operand. 3871 SmallVector<CCValAssign, 16> ValLocs; 3872 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3873 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS); 3874 3875 // Only handle a single return value for now. 3876 if (ValLocs.size() != 1) 3877 return false; 3878 3879 CCValAssign &VA = ValLocs[0]; 3880 const Value *RV = Ret->getOperand(0); 3881 3882 // Don't bother handling odd stuff for now. 3883 if ((VA.getLocInfo() != CCValAssign::Full) && 3884 (VA.getLocInfo() != CCValAssign::BCvt)) 3885 return false; 3886 3887 // Only handle register returns for now. 3888 if (!VA.isRegLoc()) 3889 return false; 3890 3891 Register Reg = getRegForValue(RV); 3892 if (Reg == 0) 3893 return false; 3894 3895 unsigned SrcReg = Reg + VA.getValNo(); 3896 Register DestReg = VA.getLocReg(); 3897 // Avoid a cross-class copy. This is very unlikely. 3898 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3899 return false; 3900 3901 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3902 if (!RVEVT.isSimple()) 3903 return false; 3904 3905 // Vectors (of > 1 lane) in big endian need tricky handling. 3906 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && 3907 !Subtarget->isLittleEndian()) 3908 return false; 3909 3910 MVT RVVT = RVEVT.getSimpleVT(); 3911 if (RVVT == MVT::f128) 3912 return false; 3913 3914 MVT DestVT = VA.getValVT(); 3915 // Special handling for extended integers. 3916 if (RVVT != DestVT) { 3917 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3918 return false; 3919 3920 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3921 return false; 3922 3923 bool IsZExt = Outs[0].Flags.isZExt(); 3924 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3925 if (SrcReg == 0) 3926 return false; 3927 } 3928 3929 // "Callee" (i.e. value producer) zero extends pointers at function 3930 // boundary. 3931 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3932 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff); 3933 3934 // Make the copy. 3935 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3936 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3937 3938 // Add register to return instruction. 3939 RetRegs.push_back(VA.getLocReg()); 3940 } 3941 3942 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3943 TII.get(AArch64::RET_ReallyLR)); 3944 for (unsigned RetReg : RetRegs) 3945 MIB.addReg(RetReg, RegState::Implicit); 3946 return true; 3947 } 3948 3949 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3950 Type *DestTy = I->getType(); 3951 Value *Op = I->getOperand(0); 3952 Type *SrcTy = Op->getType(); 3953 3954 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3955 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3956 if (!SrcEVT.isSimple()) 3957 return false; 3958 if (!DestEVT.isSimple()) 3959 return false; 3960 3961 MVT SrcVT = SrcEVT.getSimpleVT(); 3962 MVT DestVT = DestEVT.getSimpleVT(); 3963 3964 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3965 SrcVT != MVT::i8) 3966 return false; 3967 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3968 DestVT != MVT::i1) 3969 return false; 3970 3971 Register SrcReg = getRegForValue(Op); 3972 if (!SrcReg) 3973 return false; 3974 3975 // If we're truncating from i64 to a smaller non-legal type then generate an 3976 // AND. Otherwise, we know the high bits are undefined and a truncate only 3977 // generate a COPY. We cannot mark the source register also as result 3978 // register, because this can incorrectly transfer the kill flag onto the 3979 // source register. 3980 unsigned ResultReg; 3981 if (SrcVT == MVT::i64) { 3982 uint64_t Mask = 0; 3983 switch (DestVT.SimpleTy) { 3984 default: 3985 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3986 return false; 3987 case MVT::i1: 3988 Mask = 0x1; 3989 break; 3990 case MVT::i8: 3991 Mask = 0xff; 3992 break; 3993 case MVT::i16: 3994 Mask = 0xffff; 3995 break; 3996 } 3997 // Issue an extract_subreg to get the lower 32-bits. 3998 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, 3999 AArch64::sub_32); 4000 // Create the AND instruction which performs the actual truncation. 4001 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask); 4002 assert(ResultReg && "Unexpected AND instruction emission failure."); 4003 } else { 4004 ResultReg = createResultReg(&AArch64::GPR32RegClass); 4005 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4006 TII.get(TargetOpcode::COPY), ResultReg) 4007 .addReg(SrcReg); 4008 } 4009 4010 updateValueMap(I, ResultReg); 4011 return true; 4012 } 4013 4014 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 4015 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 4016 DestVT == MVT::i64) && 4017 "Unexpected value type."); 4018 // Handle i8 and i16 as i32. 4019 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4020 DestVT = MVT::i32; 4021 4022 if (IsZExt) { 4023 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1); 4024 assert(ResultReg && "Unexpected AND instruction emission failure."); 4025 if (DestVT == MVT::i64) { 4026 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 4027 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 4028 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4029 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4030 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4031 .addImm(0) 4032 .addReg(ResultReg) 4033 .addImm(AArch64::sub_32); 4034 ResultReg = Reg64; 4035 } 4036 return ResultReg; 4037 } else { 4038 if (DestVT == MVT::i64) { 4039 // FIXME: We're SExt i1 to i64. 4040 return 0; 4041 } 4042 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 4043 0, 0); 4044 } 4045 } 4046 4047 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4048 unsigned Opc, ZReg; 4049 switch (RetVT.SimpleTy) { 4050 default: return 0; 4051 case MVT::i8: 4052 case MVT::i16: 4053 case MVT::i32: 4054 RetVT = MVT::i32; 4055 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4056 case MVT::i64: 4057 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4058 } 4059 4060 const TargetRegisterClass *RC = 4061 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4062 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg); 4063 } 4064 4065 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4066 if (RetVT != MVT::i64) 4067 return 0; 4068 4069 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4070 Op0, Op1, AArch64::XZR); 4071 } 4072 4073 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4074 if (RetVT != MVT::i64) 4075 return 0; 4076 4077 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4078 Op0, Op1, AArch64::XZR); 4079 } 4080 4081 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, 4082 unsigned Op1Reg) { 4083 unsigned Opc = 0; 4084 bool NeedTrunc = false; 4085 uint64_t Mask = 0; 4086 switch (RetVT.SimpleTy) { 4087 default: return 0; 4088 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4089 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4090 case MVT::i32: Opc = AArch64::LSLVWr; break; 4091 case MVT::i64: Opc = AArch64::LSLVXr; break; 4092 } 4093 4094 const TargetRegisterClass *RC = 4095 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4096 if (NeedTrunc) 4097 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4098 4099 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4100 if (NeedTrunc) 4101 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4102 return ResultReg; 4103 } 4104 4105 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4106 uint64_t Shift, bool IsZExt) { 4107 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4108 "Unexpected source/return type pair."); 4109 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4110 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4111 "Unexpected source value type."); 4112 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4113 RetVT == MVT::i64) && "Unexpected return value type."); 4114 4115 bool Is64Bit = (RetVT == MVT::i64); 4116 unsigned RegSize = Is64Bit ? 64 : 32; 4117 unsigned DstBits = RetVT.getSizeInBits(); 4118 unsigned SrcBits = SrcVT.getSizeInBits(); 4119 const TargetRegisterClass *RC = 4120 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4121 4122 // Just emit a copy for "zero" shifts. 4123 if (Shift == 0) { 4124 if (RetVT == SrcVT) { 4125 Register ResultReg = createResultReg(RC); 4126 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4127 TII.get(TargetOpcode::COPY), ResultReg) 4128 .addReg(Op0); 4129 return ResultReg; 4130 } else 4131 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4132 } 4133 4134 // Don't deal with undefined shifts. 4135 if (Shift >= DstBits) 4136 return 0; 4137 4138 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4139 // {S|U}BFM Wd, Wn, #r, #s 4140 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4141 4142 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4143 // %2 = shl i16 %1, 4 4144 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4145 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4146 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4147 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4148 4149 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4150 // %2 = shl i16 %1, 8 4151 // Wd<32+7-24,32-24> = Wn<7:0> 4152 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4153 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4154 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4155 4156 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4157 // %2 = shl i16 %1, 12 4158 // Wd<32+3-20,32-20> = Wn<3:0> 4159 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4160 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4161 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4162 4163 unsigned ImmR = RegSize - Shift; 4164 // Limit the width to the length of the source type. 4165 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4166 static const unsigned OpcTable[2][2] = { 4167 {AArch64::SBFMWri, AArch64::SBFMXri}, 4168 {AArch64::UBFMWri, AArch64::UBFMXri} 4169 }; 4170 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4171 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4172 Register TmpReg = MRI.createVirtualRegister(RC); 4173 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4174 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4175 .addImm(0) 4176 .addReg(Op0) 4177 .addImm(AArch64::sub_32); 4178 Op0 = TmpReg; 4179 } 4180 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4181 } 4182 4183 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, 4184 unsigned Op1Reg) { 4185 unsigned Opc = 0; 4186 bool NeedTrunc = false; 4187 uint64_t Mask = 0; 4188 switch (RetVT.SimpleTy) { 4189 default: return 0; 4190 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4191 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4192 case MVT::i32: Opc = AArch64::LSRVWr; break; 4193 case MVT::i64: Opc = AArch64::LSRVXr; break; 4194 } 4195 4196 const TargetRegisterClass *RC = 4197 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4198 if (NeedTrunc) { 4199 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask); 4200 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4201 } 4202 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4203 if (NeedTrunc) 4204 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4205 return ResultReg; 4206 } 4207 4208 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4209 uint64_t Shift, bool IsZExt) { 4210 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4211 "Unexpected source/return type pair."); 4212 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4213 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4214 "Unexpected source value type."); 4215 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4216 RetVT == MVT::i64) && "Unexpected return value type."); 4217 4218 bool Is64Bit = (RetVT == MVT::i64); 4219 unsigned RegSize = Is64Bit ? 64 : 32; 4220 unsigned DstBits = RetVT.getSizeInBits(); 4221 unsigned SrcBits = SrcVT.getSizeInBits(); 4222 const TargetRegisterClass *RC = 4223 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4224 4225 // Just emit a copy for "zero" shifts. 4226 if (Shift == 0) { 4227 if (RetVT == SrcVT) { 4228 Register ResultReg = createResultReg(RC); 4229 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4230 TII.get(TargetOpcode::COPY), ResultReg) 4231 .addReg(Op0); 4232 return ResultReg; 4233 } else 4234 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4235 } 4236 4237 // Don't deal with undefined shifts. 4238 if (Shift >= DstBits) 4239 return 0; 4240 4241 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4242 // {S|U}BFM Wd, Wn, #r, #s 4243 // Wd<s-r:0> = Wn<s:r> when r <= s 4244 4245 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4246 // %2 = lshr i16 %1, 4 4247 // Wd<7-4:0> = Wn<7:4> 4248 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4249 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4250 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4251 4252 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4253 // %2 = lshr i16 %1, 8 4254 // Wd<7-7,0> = Wn<7:7> 4255 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4256 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4257 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4258 4259 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4260 // %2 = lshr i16 %1, 12 4261 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4262 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4263 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4264 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4265 4266 if (Shift >= SrcBits && IsZExt) 4267 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4268 4269 // It is not possible to fold a sign-extend into the LShr instruction. In this 4270 // case emit a sign-extend. 4271 if (!IsZExt) { 4272 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4273 if (!Op0) 4274 return 0; 4275 SrcVT = RetVT; 4276 SrcBits = SrcVT.getSizeInBits(); 4277 IsZExt = true; 4278 } 4279 4280 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4281 unsigned ImmS = SrcBits - 1; 4282 static const unsigned OpcTable[2][2] = { 4283 {AArch64::SBFMWri, AArch64::SBFMXri}, 4284 {AArch64::UBFMWri, AArch64::UBFMXri} 4285 }; 4286 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4287 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4288 Register TmpReg = MRI.createVirtualRegister(RC); 4289 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4290 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4291 .addImm(0) 4292 .addReg(Op0) 4293 .addImm(AArch64::sub_32); 4294 Op0 = TmpReg; 4295 } 4296 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4297 } 4298 4299 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, 4300 unsigned Op1Reg) { 4301 unsigned Opc = 0; 4302 bool NeedTrunc = false; 4303 uint64_t Mask = 0; 4304 switch (RetVT.SimpleTy) { 4305 default: return 0; 4306 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4307 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4308 case MVT::i32: Opc = AArch64::ASRVWr; break; 4309 case MVT::i64: Opc = AArch64::ASRVXr; break; 4310 } 4311 4312 const TargetRegisterClass *RC = 4313 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4314 if (NeedTrunc) { 4315 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4316 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4317 } 4318 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4319 if (NeedTrunc) 4320 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4321 return ResultReg; 4322 } 4323 4324 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4325 uint64_t Shift, bool IsZExt) { 4326 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4327 "Unexpected source/return type pair."); 4328 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4329 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4330 "Unexpected source value type."); 4331 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4332 RetVT == MVT::i64) && "Unexpected return value type."); 4333 4334 bool Is64Bit = (RetVT == MVT::i64); 4335 unsigned RegSize = Is64Bit ? 64 : 32; 4336 unsigned DstBits = RetVT.getSizeInBits(); 4337 unsigned SrcBits = SrcVT.getSizeInBits(); 4338 const TargetRegisterClass *RC = 4339 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4340 4341 // Just emit a copy for "zero" shifts. 4342 if (Shift == 0) { 4343 if (RetVT == SrcVT) { 4344 Register ResultReg = createResultReg(RC); 4345 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4346 TII.get(TargetOpcode::COPY), ResultReg) 4347 .addReg(Op0); 4348 return ResultReg; 4349 } else 4350 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4351 } 4352 4353 // Don't deal with undefined shifts. 4354 if (Shift >= DstBits) 4355 return 0; 4356 4357 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4358 // {S|U}BFM Wd, Wn, #r, #s 4359 // Wd<s-r:0> = Wn<s:r> when r <= s 4360 4361 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4362 // %2 = ashr i16 %1, 4 4363 // Wd<7-4:0> = Wn<7:4> 4364 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4365 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4366 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4367 4368 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4369 // %2 = ashr i16 %1, 8 4370 // Wd<7-7,0> = Wn<7:7> 4371 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4372 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4373 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4374 4375 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4376 // %2 = ashr i16 %1, 12 4377 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4378 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4379 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4380 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4381 4382 if (Shift >= SrcBits && IsZExt) 4383 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4384 4385 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4386 unsigned ImmS = SrcBits - 1; 4387 static const unsigned OpcTable[2][2] = { 4388 {AArch64::SBFMWri, AArch64::SBFMXri}, 4389 {AArch64::UBFMWri, AArch64::UBFMXri} 4390 }; 4391 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4392 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4393 Register TmpReg = MRI.createVirtualRegister(RC); 4394 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4395 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4396 .addImm(0) 4397 .addReg(Op0) 4398 .addImm(AArch64::sub_32); 4399 Op0 = TmpReg; 4400 } 4401 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4402 } 4403 4404 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4405 bool IsZExt) { 4406 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4407 4408 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4409 // DestVT are odd things, so test to make sure that they are both types we can 4410 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4411 // bail out to SelectionDAG. 4412 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4413 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4414 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4415 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4416 return 0; 4417 4418 unsigned Opc; 4419 unsigned Imm = 0; 4420 4421 switch (SrcVT.SimpleTy) { 4422 default: 4423 return 0; 4424 case MVT::i1: 4425 return emiti1Ext(SrcReg, DestVT, IsZExt); 4426 case MVT::i8: 4427 if (DestVT == MVT::i64) 4428 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4429 else 4430 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4431 Imm = 7; 4432 break; 4433 case MVT::i16: 4434 if (DestVT == MVT::i64) 4435 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4436 else 4437 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4438 Imm = 15; 4439 break; 4440 case MVT::i32: 4441 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4442 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4443 Imm = 31; 4444 break; 4445 } 4446 4447 // Handle i8 and i16 as i32. 4448 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4449 DestVT = MVT::i32; 4450 else if (DestVT == MVT::i64) { 4451 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4452 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4453 TII.get(AArch64::SUBREG_TO_REG), Src64) 4454 .addImm(0) 4455 .addReg(SrcReg) 4456 .addImm(AArch64::sub_32); 4457 SrcReg = Src64; 4458 } 4459 4460 const TargetRegisterClass *RC = 4461 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4462 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm); 4463 } 4464 4465 static bool isZExtLoad(const MachineInstr *LI) { 4466 switch (LI->getOpcode()) { 4467 default: 4468 return false; 4469 case AArch64::LDURBBi: 4470 case AArch64::LDURHHi: 4471 case AArch64::LDURWi: 4472 case AArch64::LDRBBui: 4473 case AArch64::LDRHHui: 4474 case AArch64::LDRWui: 4475 case AArch64::LDRBBroX: 4476 case AArch64::LDRHHroX: 4477 case AArch64::LDRWroX: 4478 case AArch64::LDRBBroW: 4479 case AArch64::LDRHHroW: 4480 case AArch64::LDRWroW: 4481 return true; 4482 } 4483 } 4484 4485 static bool isSExtLoad(const MachineInstr *LI) { 4486 switch (LI->getOpcode()) { 4487 default: 4488 return false; 4489 case AArch64::LDURSBWi: 4490 case AArch64::LDURSHWi: 4491 case AArch64::LDURSBXi: 4492 case AArch64::LDURSHXi: 4493 case AArch64::LDURSWi: 4494 case AArch64::LDRSBWui: 4495 case AArch64::LDRSHWui: 4496 case AArch64::LDRSBXui: 4497 case AArch64::LDRSHXui: 4498 case AArch64::LDRSWui: 4499 case AArch64::LDRSBWroX: 4500 case AArch64::LDRSHWroX: 4501 case AArch64::LDRSBXroX: 4502 case AArch64::LDRSHXroX: 4503 case AArch64::LDRSWroX: 4504 case AArch64::LDRSBWroW: 4505 case AArch64::LDRSHWroW: 4506 case AArch64::LDRSBXroW: 4507 case AArch64::LDRSHXroW: 4508 case AArch64::LDRSWroW: 4509 return true; 4510 } 4511 } 4512 4513 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4514 MVT SrcVT) { 4515 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4516 if (!LI || !LI->hasOneUse()) 4517 return false; 4518 4519 // Check if the load instruction has already been selected. 4520 Register Reg = lookUpRegForValue(LI); 4521 if (!Reg) 4522 return false; 4523 4524 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4525 if (!MI) 4526 return false; 4527 4528 // Check if the correct load instruction has been emitted - SelectionDAG might 4529 // have emitted a zero-extending load, but we need a sign-extending load. 4530 bool IsZExt = isa<ZExtInst>(I); 4531 const auto *LoadMI = MI; 4532 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4533 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4534 Register LoadReg = MI->getOperand(1).getReg(); 4535 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4536 assert(LoadMI && "Expected valid instruction"); 4537 } 4538 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4539 return false; 4540 4541 // Nothing to be done. 4542 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4543 updateValueMap(I, Reg); 4544 return true; 4545 } 4546 4547 if (IsZExt) { 4548 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 4549 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4550 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4551 .addImm(0) 4552 .addReg(Reg, getKillRegState(true)) 4553 .addImm(AArch64::sub_32); 4554 Reg = Reg64; 4555 } else { 4556 assert((MI->getOpcode() == TargetOpcode::COPY && 4557 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4558 "Expected copy instruction"); 4559 Reg = MI->getOperand(1).getReg(); 4560 MachineBasicBlock::iterator I(MI); 4561 removeDeadCode(I, std::next(I)); 4562 } 4563 updateValueMap(I, Reg); 4564 return true; 4565 } 4566 4567 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4568 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4569 "Unexpected integer extend instruction."); 4570 MVT RetVT; 4571 MVT SrcVT; 4572 if (!isTypeSupported(I->getType(), RetVT)) 4573 return false; 4574 4575 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4576 return false; 4577 4578 // Try to optimize already sign-/zero-extended values from load instructions. 4579 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4580 return true; 4581 4582 Register SrcReg = getRegForValue(I->getOperand(0)); 4583 if (!SrcReg) 4584 return false; 4585 4586 // Try to optimize already sign-/zero-extended values from function arguments. 4587 bool IsZExt = isa<ZExtInst>(I); 4588 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4589 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4590 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4591 Register ResultReg = createResultReg(&AArch64::GPR64RegClass); 4592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4593 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4594 .addImm(0) 4595 .addReg(SrcReg) 4596 .addImm(AArch64::sub_32); 4597 SrcReg = ResultReg; 4598 } 4599 4600 updateValueMap(I, SrcReg); 4601 return true; 4602 } 4603 } 4604 4605 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4606 if (!ResultReg) 4607 return false; 4608 4609 updateValueMap(I, ResultReg); 4610 return true; 4611 } 4612 4613 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4614 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4615 if (!DestEVT.isSimple()) 4616 return false; 4617 4618 MVT DestVT = DestEVT.getSimpleVT(); 4619 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4620 return false; 4621 4622 unsigned DivOpc; 4623 bool Is64bit = (DestVT == MVT::i64); 4624 switch (ISDOpcode) { 4625 default: 4626 return false; 4627 case ISD::SREM: 4628 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4629 break; 4630 case ISD::UREM: 4631 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4632 break; 4633 } 4634 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4635 Register Src0Reg = getRegForValue(I->getOperand(0)); 4636 if (!Src0Reg) 4637 return false; 4638 4639 Register Src1Reg = getRegForValue(I->getOperand(1)); 4640 if (!Src1Reg) 4641 return false; 4642 4643 const TargetRegisterClass *RC = 4644 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4645 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg); 4646 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4647 // The remainder is computed as numerator - (quotient * denominator) using the 4648 // MSUB instruction. 4649 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg); 4650 updateValueMap(I, ResultReg); 4651 return true; 4652 } 4653 4654 bool AArch64FastISel::selectMul(const Instruction *I) { 4655 MVT VT; 4656 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4657 return false; 4658 4659 if (VT.isVector()) 4660 return selectBinaryOp(I, ISD::MUL); 4661 4662 const Value *Src0 = I->getOperand(0); 4663 const Value *Src1 = I->getOperand(1); 4664 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4665 if (C->getValue().isPowerOf2()) 4666 std::swap(Src0, Src1); 4667 4668 // Try to simplify to a shift instruction. 4669 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4670 if (C->getValue().isPowerOf2()) { 4671 uint64_t ShiftVal = C->getValue().logBase2(); 4672 MVT SrcVT = VT; 4673 bool IsZExt = true; 4674 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4675 if (!isIntExtFree(ZExt)) { 4676 MVT VT; 4677 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4678 SrcVT = VT; 4679 IsZExt = true; 4680 Src0 = ZExt->getOperand(0); 4681 } 4682 } 4683 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4684 if (!isIntExtFree(SExt)) { 4685 MVT VT; 4686 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4687 SrcVT = VT; 4688 IsZExt = false; 4689 Src0 = SExt->getOperand(0); 4690 } 4691 } 4692 } 4693 4694 Register Src0Reg = getRegForValue(Src0); 4695 if (!Src0Reg) 4696 return false; 4697 4698 unsigned ResultReg = 4699 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt); 4700 4701 if (ResultReg) { 4702 updateValueMap(I, ResultReg); 4703 return true; 4704 } 4705 } 4706 4707 Register Src0Reg = getRegForValue(I->getOperand(0)); 4708 if (!Src0Reg) 4709 return false; 4710 4711 Register Src1Reg = getRegForValue(I->getOperand(1)); 4712 if (!Src1Reg) 4713 return false; 4714 4715 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg); 4716 4717 if (!ResultReg) 4718 return false; 4719 4720 updateValueMap(I, ResultReg); 4721 return true; 4722 } 4723 4724 bool AArch64FastISel::selectShift(const Instruction *I) { 4725 MVT RetVT; 4726 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4727 return false; 4728 4729 if (RetVT.isVector()) 4730 return selectOperator(I, I->getOpcode()); 4731 4732 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4733 unsigned ResultReg = 0; 4734 uint64_t ShiftVal = C->getZExtValue(); 4735 MVT SrcVT = RetVT; 4736 bool IsZExt = I->getOpcode() != Instruction::AShr; 4737 const Value *Op0 = I->getOperand(0); 4738 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4739 if (!isIntExtFree(ZExt)) { 4740 MVT TmpVT; 4741 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4742 SrcVT = TmpVT; 4743 IsZExt = true; 4744 Op0 = ZExt->getOperand(0); 4745 } 4746 } 4747 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4748 if (!isIntExtFree(SExt)) { 4749 MVT TmpVT; 4750 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4751 SrcVT = TmpVT; 4752 IsZExt = false; 4753 Op0 = SExt->getOperand(0); 4754 } 4755 } 4756 } 4757 4758 Register Op0Reg = getRegForValue(Op0); 4759 if (!Op0Reg) 4760 return false; 4761 4762 switch (I->getOpcode()) { 4763 default: llvm_unreachable("Unexpected instruction."); 4764 case Instruction::Shl: 4765 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4766 break; 4767 case Instruction::AShr: 4768 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4769 break; 4770 case Instruction::LShr: 4771 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4772 break; 4773 } 4774 if (!ResultReg) 4775 return false; 4776 4777 updateValueMap(I, ResultReg); 4778 return true; 4779 } 4780 4781 Register Op0Reg = getRegForValue(I->getOperand(0)); 4782 if (!Op0Reg) 4783 return false; 4784 4785 Register Op1Reg = getRegForValue(I->getOperand(1)); 4786 if (!Op1Reg) 4787 return false; 4788 4789 unsigned ResultReg = 0; 4790 switch (I->getOpcode()) { 4791 default: llvm_unreachable("Unexpected instruction."); 4792 case Instruction::Shl: 4793 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); 4794 break; 4795 case Instruction::AShr: 4796 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); 4797 break; 4798 case Instruction::LShr: 4799 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); 4800 break; 4801 } 4802 4803 if (!ResultReg) 4804 return false; 4805 4806 updateValueMap(I, ResultReg); 4807 return true; 4808 } 4809 4810 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4811 MVT RetVT, SrcVT; 4812 4813 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4814 return false; 4815 if (!isTypeLegal(I->getType(), RetVT)) 4816 return false; 4817 4818 unsigned Opc; 4819 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4820 Opc = AArch64::FMOVWSr; 4821 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4822 Opc = AArch64::FMOVXDr; 4823 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4824 Opc = AArch64::FMOVSWr; 4825 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4826 Opc = AArch64::FMOVDXr; 4827 else 4828 return false; 4829 4830 const TargetRegisterClass *RC = nullptr; 4831 switch (RetVT.SimpleTy) { 4832 default: llvm_unreachable("Unexpected value type."); 4833 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4834 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4835 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4836 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4837 } 4838 Register Op0Reg = getRegForValue(I->getOperand(0)); 4839 if (!Op0Reg) 4840 return false; 4841 4842 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg); 4843 if (!ResultReg) 4844 return false; 4845 4846 updateValueMap(I, ResultReg); 4847 return true; 4848 } 4849 4850 bool AArch64FastISel::selectFRem(const Instruction *I) { 4851 MVT RetVT; 4852 if (!isTypeLegal(I->getType(), RetVT)) 4853 return false; 4854 4855 RTLIB::Libcall LC; 4856 switch (RetVT.SimpleTy) { 4857 default: 4858 return false; 4859 case MVT::f32: 4860 LC = RTLIB::REM_F32; 4861 break; 4862 case MVT::f64: 4863 LC = RTLIB::REM_F64; 4864 break; 4865 } 4866 4867 ArgListTy Args; 4868 Args.reserve(I->getNumOperands()); 4869 4870 // Populate the argument list. 4871 for (auto &Arg : I->operands()) { 4872 ArgListEntry Entry; 4873 Entry.Val = Arg; 4874 Entry.Ty = Arg->getType(); 4875 Args.push_back(Entry); 4876 } 4877 4878 CallLoweringInfo CLI; 4879 MCContext &Ctx = MF->getContext(); 4880 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4881 TLI.getLibcallName(LC), std::move(Args)); 4882 if (!lowerCallTo(CLI)) 4883 return false; 4884 updateValueMap(I, CLI.ResultReg); 4885 return true; 4886 } 4887 4888 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4889 MVT VT; 4890 if (!isTypeLegal(I->getType(), VT)) 4891 return false; 4892 4893 if (!isa<ConstantInt>(I->getOperand(1))) 4894 return selectBinaryOp(I, ISD::SDIV); 4895 4896 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4897 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4898 !(C.isPowerOf2() || C.isNegatedPowerOf2())) 4899 return selectBinaryOp(I, ISD::SDIV); 4900 4901 unsigned Lg2 = C.countr_zero(); 4902 Register Src0Reg = getRegForValue(I->getOperand(0)); 4903 if (!Src0Reg) 4904 return false; 4905 4906 if (cast<BinaryOperator>(I)->isExact()) { 4907 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2); 4908 if (!ResultReg) 4909 return false; 4910 updateValueMap(I, ResultReg); 4911 return true; 4912 } 4913 4914 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4915 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne); 4916 if (!AddReg) 4917 return false; 4918 4919 // (Src0 < 0) ? Pow2 - 1 : 0; 4920 if (!emitICmp_ri(VT, Src0Reg, 0)) 4921 return false; 4922 4923 unsigned SelectOpc; 4924 const TargetRegisterClass *RC; 4925 if (VT == MVT::i64) { 4926 SelectOpc = AArch64::CSELXr; 4927 RC = &AArch64::GPR64RegClass; 4928 } else { 4929 SelectOpc = AArch64::CSELWr; 4930 RC = &AArch64::GPR32RegClass; 4931 } 4932 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg, 4933 AArch64CC::LT); 4934 if (!SelectReg) 4935 return false; 4936 4937 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4938 // negate the result. 4939 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4940 unsigned ResultReg; 4941 if (C.isNegative()) 4942 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg, 4943 AArch64_AM::ASR, Lg2); 4944 else 4945 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2); 4946 4947 if (!ResultReg) 4948 return false; 4949 4950 updateValueMap(I, ResultReg); 4951 return true; 4952 } 4953 4954 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4955 /// have to duplicate it for AArch64, because otherwise we would fail during the 4956 /// sign-extend emission. 4957 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4958 Register IdxN = getRegForValue(Idx); 4959 if (IdxN == 0) 4960 // Unhandled operand. Halt "fast" selection and bail. 4961 return 0; 4962 4963 // If the index is smaller or larger than intptr_t, truncate or extend it. 4964 MVT PtrVT = TLI.getPointerTy(DL); 4965 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4966 if (IdxVT.bitsLT(PtrVT)) { 4967 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 4968 } else if (IdxVT.bitsGT(PtrVT)) 4969 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4970 return IdxN; 4971 } 4972 4973 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4974 /// duplicate it for AArch64, because otherwise we would bail out even for 4975 /// simple cases. This is because the standard fastEmit functions don't cover 4976 /// MUL at all and ADD is lowered very inefficientily. 4977 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4978 if (Subtarget->isTargetILP32()) 4979 return false; 4980 4981 Register N = getRegForValue(I->getOperand(0)); 4982 if (!N) 4983 return false; 4984 4985 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4986 // into a single N = N + TotalOffset. 4987 uint64_t TotalOffs = 0; 4988 MVT VT = TLI.getPointerTy(DL); 4989 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4990 GTI != E; ++GTI) { 4991 const Value *Idx = GTI.getOperand(); 4992 if (auto *StTy = GTI.getStructTypeOrNull()) { 4993 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4994 // N = N + Offset 4995 if (Field) 4996 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4997 } else { 4998 // If this is a constant subscript, handle it quickly. 4999 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 5000 if (CI->isZero()) 5001 continue; 5002 // N = N + Offset 5003 TotalOffs += GTI.getSequentialElementStride(DL) * 5004 cast<ConstantInt>(CI)->getSExtValue(); 5005 continue; 5006 } 5007 if (TotalOffs) { 5008 N = emitAdd_ri_(VT, N, TotalOffs); 5009 if (!N) 5010 return false; 5011 TotalOffs = 0; 5012 } 5013 5014 // N = N + Idx * ElementSize; 5015 uint64_t ElementSize = GTI.getSequentialElementStride(DL); 5016 unsigned IdxN = getRegForGEPIndex(Idx); 5017 if (!IdxN) 5018 return false; 5019 5020 if (ElementSize != 1) { 5021 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 5022 if (!C) 5023 return false; 5024 IdxN = emitMul_rr(VT, IdxN, C); 5025 if (!IdxN) 5026 return false; 5027 } 5028 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN); 5029 if (!N) 5030 return false; 5031 } 5032 } 5033 if (TotalOffs) { 5034 N = emitAdd_ri_(VT, N, TotalOffs); 5035 if (!N) 5036 return false; 5037 } 5038 updateValueMap(I, N); 5039 return true; 5040 } 5041 5042 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 5043 assert(TM.getOptLevel() == CodeGenOptLevel::None && 5044 "cmpxchg survived AtomicExpand at optlevel > -O0"); 5045 5046 auto *RetPairTy = cast<StructType>(I->getType()); 5047 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5048 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5049 "cmpxchg has a non-i1 status result"); 5050 5051 MVT VT; 5052 if (!isTypeLegal(RetTy, VT)) 5053 return false; 5054 5055 const TargetRegisterClass *ResRC; 5056 unsigned Opc, CmpOpc; 5057 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5058 // extractvalue selection doesn't support that. 5059 if (VT == MVT::i32) { 5060 Opc = AArch64::CMP_SWAP_32; 5061 CmpOpc = AArch64::SUBSWrs; 5062 ResRC = &AArch64::GPR32RegClass; 5063 } else if (VT == MVT::i64) { 5064 Opc = AArch64::CMP_SWAP_64; 5065 CmpOpc = AArch64::SUBSXrs; 5066 ResRC = &AArch64::GPR64RegClass; 5067 } else { 5068 return false; 5069 } 5070 5071 const MCInstrDesc &II = TII.get(Opc); 5072 5073 const Register AddrReg = constrainOperandRegClass( 5074 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 5075 const Register DesiredReg = constrainOperandRegClass( 5076 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 5077 const Register NewReg = constrainOperandRegClass( 5078 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5079 5080 const Register ResultReg1 = createResultReg(ResRC); 5081 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5082 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5083 5084 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5085 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 5086 .addDef(ResultReg1) 5087 .addDef(ScratchReg) 5088 .addUse(AddrReg) 5089 .addUse(DesiredReg) 5090 .addUse(NewReg); 5091 5092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc)) 5093 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5094 .addUse(ResultReg1) 5095 .addUse(DesiredReg) 5096 .addImm(0); 5097 5098 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr)) 5099 .addDef(ResultReg2) 5100 .addUse(AArch64::WZR) 5101 .addUse(AArch64::WZR) 5102 .addImm(AArch64CC::NE); 5103 5104 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5105 updateValueMap(I, ResultReg1, 2); 5106 return true; 5107 } 5108 5109 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5110 if (TLI.fallBackToDAGISel(*I)) 5111 return false; 5112 switch (I->getOpcode()) { 5113 default: 5114 break; 5115 case Instruction::Add: 5116 case Instruction::Sub: 5117 return selectAddSub(I); 5118 case Instruction::Mul: 5119 return selectMul(I); 5120 case Instruction::SDiv: 5121 return selectSDiv(I); 5122 case Instruction::SRem: 5123 if (!selectBinaryOp(I, ISD::SREM)) 5124 return selectRem(I, ISD::SREM); 5125 return true; 5126 case Instruction::URem: 5127 if (!selectBinaryOp(I, ISD::UREM)) 5128 return selectRem(I, ISD::UREM); 5129 return true; 5130 case Instruction::Shl: 5131 case Instruction::LShr: 5132 case Instruction::AShr: 5133 return selectShift(I); 5134 case Instruction::And: 5135 case Instruction::Or: 5136 case Instruction::Xor: 5137 return selectLogicalOp(I); 5138 case Instruction::Br: 5139 return selectBranch(I); 5140 case Instruction::IndirectBr: 5141 return selectIndirectBr(I); 5142 case Instruction::BitCast: 5143 if (!FastISel::selectBitCast(I)) 5144 return selectBitCast(I); 5145 return true; 5146 case Instruction::FPToSI: 5147 if (!selectCast(I, ISD::FP_TO_SINT)) 5148 return selectFPToInt(I, /*Signed=*/true); 5149 return true; 5150 case Instruction::FPToUI: 5151 return selectFPToInt(I, /*Signed=*/false); 5152 case Instruction::ZExt: 5153 case Instruction::SExt: 5154 return selectIntExt(I); 5155 case Instruction::Trunc: 5156 if (!selectCast(I, ISD::TRUNCATE)) 5157 return selectTrunc(I); 5158 return true; 5159 case Instruction::FPExt: 5160 return selectFPExt(I); 5161 case Instruction::FPTrunc: 5162 return selectFPTrunc(I); 5163 case Instruction::SIToFP: 5164 if (!selectCast(I, ISD::SINT_TO_FP)) 5165 return selectIntToFP(I, /*Signed=*/true); 5166 return true; 5167 case Instruction::UIToFP: 5168 return selectIntToFP(I, /*Signed=*/false); 5169 case Instruction::Load: 5170 return selectLoad(I); 5171 case Instruction::Store: 5172 return selectStore(I); 5173 case Instruction::FCmp: 5174 case Instruction::ICmp: 5175 return selectCmp(I); 5176 case Instruction::Select: 5177 return selectSelect(I); 5178 case Instruction::Ret: 5179 return selectRet(I); 5180 case Instruction::FRem: 5181 return selectFRem(I); 5182 case Instruction::GetElementPtr: 5183 return selectGetElementPtr(I); 5184 case Instruction::AtomicCmpXchg: 5185 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5186 } 5187 5188 // fall-back to target-independent instruction selection. 5189 return selectOperator(I, I->getOpcode()); 5190 } 5191 5192 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5193 const TargetLibraryInfo *LibInfo) { 5194 5195 SMEAttrs CallerAttrs(*FuncInfo.Fn); 5196 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() || 5197 CallerAttrs.hasStreamingInterfaceOrBody() || 5198 CallerAttrs.hasStreamingCompatibleInterface()) 5199 return nullptr; 5200 return new AArch64FastISel(FuncInfo, LibInfo); 5201 } 5202