1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "Utils/AArch64SMEAttributes.h" 23 #include "llvm/ADT/APFloat.h" 24 #include "llvm/ADT/APInt.h" 25 #include "llvm/ADT/DenseMap.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/Analysis/BranchProbabilityInfo.h" 28 #include "llvm/CodeGen/CallingConvLower.h" 29 #include "llvm/CodeGen/FastISel.h" 30 #include "llvm/CodeGen/FunctionLoweringInfo.h" 31 #include "llvm/CodeGen/ISDOpcodes.h" 32 #include "llvm/CodeGen/MachineBasicBlock.h" 33 #include "llvm/CodeGen/MachineConstantPool.h" 34 #include "llvm/CodeGen/MachineFrameInfo.h" 35 #include "llvm/CodeGen/MachineInstr.h" 36 #include "llvm/CodeGen/MachineInstrBuilder.h" 37 #include "llvm/CodeGen/MachineMemOperand.h" 38 #include "llvm/CodeGen/MachineRegisterInfo.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/CodeGenTypes/MachineValueType.h" 41 #include "llvm/IR/Argument.h" 42 #include "llvm/IR/Attributes.h" 43 #include "llvm/IR/BasicBlock.h" 44 #include "llvm/IR/CallingConv.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/Constants.h" 47 #include "llvm/IR/DataLayout.h" 48 #include "llvm/IR/DerivedTypes.h" 49 #include "llvm/IR/Function.h" 50 #include "llvm/IR/GetElementPtrTypeIterator.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/InstrTypes.h" 53 #include "llvm/IR/Instruction.h" 54 #include "llvm/IR/Instructions.h" 55 #include "llvm/IR/IntrinsicInst.h" 56 #include "llvm/IR/Intrinsics.h" 57 #include "llvm/IR/IntrinsicsAArch64.h" 58 #include "llvm/IR/Module.h" 59 #include "llvm/IR/Operator.h" 60 #include "llvm/IR/Type.h" 61 #include "llvm/IR/User.h" 62 #include "llvm/IR/Value.h" 63 #include "llvm/MC/MCInstrDesc.h" 64 #include "llvm/MC/MCSymbol.h" 65 #include "llvm/Support/AtomicOrdering.h" 66 #include "llvm/Support/Casting.h" 67 #include "llvm/Support/CodeGen.h" 68 #include "llvm/Support/Compiler.h" 69 #include "llvm/Support/ErrorHandling.h" 70 #include "llvm/Support/MathExtras.h" 71 #include <algorithm> 72 #include <cassert> 73 #include <cstdint> 74 #include <iterator> 75 #include <utility> 76 77 using namespace llvm; 78 79 namespace { 80 81 class AArch64FastISel final : public FastISel { 82 class Address { 83 public: 84 using BaseKind = enum { 85 RegBase, 86 FrameIndexBase 87 }; 88 89 private: 90 BaseKind Kind = RegBase; 91 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 92 union { 93 unsigned Reg; 94 int FI; 95 } Base; 96 Register OffsetReg; 97 unsigned Shift = 0; 98 int64_t Offset = 0; 99 const GlobalValue *GV = nullptr; 100 101 public: 102 Address() { Base.Reg = 0; } 103 104 void setKind(BaseKind K) { Kind = K; } 105 BaseKind getKind() const { return Kind; } 106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 108 bool isRegBase() const { return Kind == RegBase; } 109 bool isFIBase() const { return Kind == FrameIndexBase; } 110 111 void setReg(Register Reg) { 112 assert(isRegBase() && "Invalid base register access!"); 113 Base.Reg = Reg.id(); 114 } 115 116 Register getReg() const { 117 assert(isRegBase() && "Invalid base register access!"); 118 return Base.Reg; 119 } 120 121 void setOffsetReg(Register Reg) { OffsetReg = Reg; } 122 123 Register getOffsetReg() const { return OffsetReg; } 124 125 void setFI(unsigned FI) { 126 assert(isFIBase() && "Invalid base frame index access!"); 127 Base.FI = FI; 128 } 129 130 unsigned getFI() const { 131 assert(isFIBase() && "Invalid base frame index access!"); 132 return Base.FI; 133 } 134 135 void setOffset(int64_t O) { Offset = O; } 136 int64_t getOffset() { return Offset; } 137 void setShift(unsigned S) { Shift = S; } 138 unsigned getShift() { return Shift; } 139 140 void setGlobalValue(const GlobalValue *G) { GV = G; } 141 const GlobalValue *getGlobalValue() { return GV; } 142 }; 143 144 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 145 /// make the right decision when generating code for different targets. 146 const AArch64Subtarget *Subtarget; 147 LLVMContext *Context; 148 149 bool fastLowerArguments() override; 150 bool fastLowerCall(CallLoweringInfo &CLI) override; 151 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 152 153 private: 154 // Selection routines. 155 bool selectAddSub(const Instruction *I); 156 bool selectLogicalOp(const Instruction *I); 157 bool selectLoad(const Instruction *I); 158 bool selectStore(const Instruction *I); 159 bool selectBranch(const Instruction *I); 160 bool selectIndirectBr(const Instruction *I); 161 bool selectCmp(const Instruction *I); 162 bool selectSelect(const Instruction *I); 163 bool selectFPExt(const Instruction *I); 164 bool selectFPTrunc(const Instruction *I); 165 bool selectFPToInt(const Instruction *I, bool Signed); 166 bool selectIntToFP(const Instruction *I, bool Signed); 167 bool selectRem(const Instruction *I, unsigned ISDOpcode); 168 bool selectRet(const Instruction *I); 169 bool selectTrunc(const Instruction *I); 170 bool selectIntExt(const Instruction *I); 171 bool selectMul(const Instruction *I); 172 bool selectShift(const Instruction *I); 173 bool selectBitCast(const Instruction *I); 174 bool selectFRem(const Instruction *I); 175 bool selectSDiv(const Instruction *I); 176 bool selectGetElementPtr(const Instruction *I); 177 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 178 179 // Utility helper routines. 180 bool isTypeLegal(Type *Ty, MVT &VT); 181 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 182 bool isValueAvailable(const Value *V) const; 183 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 184 bool computeCallAddress(const Value *V, Address &Addr); 185 bool simplifyAddress(Address &Addr, MVT VT); 186 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 187 MachineMemOperand::Flags Flags, 188 unsigned ScaleFactor, MachineMemOperand *MMO); 189 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); 190 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 191 MaybeAlign Alignment); 192 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 193 const Value *Cond); 194 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 195 bool optimizeSelect(const SelectInst *SI); 196 Register getRegForGEPIndex(const Value *Idx); 197 198 // Emit helper routines. 199 Register emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 200 const Value *RHS, bool SetFlags = false, 201 bool WantResult = true, bool IsZExt = false); 202 Register emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg, 203 Register RHSReg, bool SetFlags = false, 204 bool WantResult = true); 205 Register emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, uint64_t Imm, 206 bool SetFlags = false, bool WantResult = true); 207 Register emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg, 208 Register RHSReg, AArch64_AM::ShiftExtendType ShiftType, 209 uint64_t ShiftImm, bool SetFlags = false, 210 bool WantResult = true); 211 Register emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg, 212 Register RHSReg, AArch64_AM::ShiftExtendType ExtType, 213 uint64_t ShiftImm, bool SetFlags = false, 214 bool WantResult = true); 215 216 // Emit functions. 217 bool emitCompareAndBranch(const BranchInst *BI); 218 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 219 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 220 bool emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm); 221 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 222 Register emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 223 MachineMemOperand *MMO = nullptr); 224 bool emitStore(MVT VT, Register SrcReg, Address Addr, 225 MachineMemOperand *MMO = nullptr); 226 bool emitStoreRelease(MVT VT, Register SrcReg, Register AddrReg, 227 MachineMemOperand *MMO = nullptr); 228 Register emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, bool isZExt); 229 Register emiti1Ext(Register SrcReg, MVT DestVT, bool isZExt); 230 Register emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 231 bool SetFlags = false, bool WantResult = true, 232 bool IsZExt = false); 233 Register emitAdd_ri_(MVT VT, Register Op0, int64_t Imm); 234 Register emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 235 bool SetFlags = false, bool WantResult = true, 236 bool IsZExt = false); 237 Register emitSubs_rr(MVT RetVT, Register LHSReg, Register RHSReg, 238 bool WantResult = true); 239 Register emitSubs_rs(MVT RetVT, Register LHSReg, Register RHSReg, 240 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 241 bool WantResult = true); 242 Register emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 243 const Value *RHS); 244 Register emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, Register LHSReg, 245 uint64_t Imm); 246 Register emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, Register LHSReg, 247 Register RHSReg, uint64_t ShiftImm); 248 Register emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm); 249 Register emitMul_rr(MVT RetVT, Register Op0, Register Op1); 250 Register emitSMULL_rr(MVT RetVT, Register Op0, Register Op1); 251 Register emitUMULL_rr(MVT RetVT, Register Op0, Register Op1); 252 Register emitLSL_rr(MVT RetVT, Register Op0Reg, Register Op1Reg); 253 Register emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm, 254 bool IsZExt = true); 255 Register emitLSR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg); 256 Register emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm, 257 bool IsZExt = true); 258 Register emitASR_rr(MVT RetVT, Register Op0Reg, Register Op1Reg); 259 Register emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0Reg, uint64_t Imm, 260 bool IsZExt = false); 261 262 Register materializeInt(const ConstantInt *CI, MVT VT); 263 Register materializeFP(const ConstantFP *CFP, MVT VT); 264 Register materializeGV(const GlobalValue *GV); 265 266 // Call handling routines. 267 private: 268 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 269 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 270 unsigned &NumBytes); 271 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes); 272 273 public: 274 // Backend specific FastISel code. 275 Register fastMaterializeAlloca(const AllocaInst *AI) override; 276 Register fastMaterializeConstant(const Constant *C) override; 277 Register fastMaterializeFloatZero(const ConstantFP *CF) override; 278 279 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 280 const TargetLibraryInfo *LibInfo) 281 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 282 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); 283 Context = &FuncInfo.Fn->getContext(); 284 } 285 286 bool fastSelectInstruction(const Instruction *I) override; 287 288 #include "AArch64GenFastISel.inc" 289 }; 290 291 } // end anonymous namespace 292 293 /// Check if the sign-/zero-extend will be a noop. 294 static bool isIntExtFree(const Instruction *I) { 295 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 296 "Unexpected integer extend instruction."); 297 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 298 "Unexpected value type."); 299 bool IsZExt = isa<ZExtInst>(I); 300 301 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 302 if (LI->hasOneUse()) 303 return true; 304 305 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 306 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 307 return true; 308 309 return false; 310 } 311 312 /// Determine the implicit scale factor that is applied by a memory 313 /// operation for a given value type. 314 static unsigned getImplicitScaleFactor(MVT VT) { 315 switch (VT.SimpleTy) { 316 default: 317 return 0; // invalid 318 case MVT::i1: // fall-through 319 case MVT::i8: 320 return 1; 321 case MVT::i16: 322 return 2; 323 case MVT::i32: // fall-through 324 case MVT::f32: 325 return 4; 326 case MVT::i64: // fall-through 327 case MVT::f64: 328 return 8; 329 } 330 } 331 332 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 333 if (CC == CallingConv::GHC) 334 return CC_AArch64_GHC; 335 if (CC == CallingConv::CFGuard_Check) 336 return CC_AArch64_Win64_CFGuard_Check; 337 if (Subtarget->isTargetDarwin()) 338 return CC_AArch64_DarwinPCS; 339 if (Subtarget->isTargetWindows()) 340 return CC_AArch64_Win64PCS; 341 return CC_AArch64_AAPCS; 342 } 343 344 Register AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 345 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 346 "Alloca should always return a pointer."); 347 348 // Don't handle dynamic allocas. 349 auto SI = FuncInfo.StaticAllocaMap.find(AI); 350 if (SI == FuncInfo.StaticAllocaMap.end()) 351 return Register(); 352 353 if (SI != FuncInfo.StaticAllocaMap.end()) { 354 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 355 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 356 ResultReg) 357 .addFrameIndex(SI->second) 358 .addImm(0) 359 .addImm(0); 360 return ResultReg; 361 } 362 363 return Register(); 364 } 365 366 Register AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 367 if (VT > MVT::i64) 368 return Register(); 369 370 if (!CI->isZero()) 371 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 372 373 // Create a copy from the zero register to materialize a "0" value. 374 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 375 : &AArch64::GPR32RegClass; 376 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 377 Register ResultReg = createResultReg(RC); 378 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 379 ResultReg).addReg(ZeroReg, getKillRegState(true)); 380 return ResultReg; 381 } 382 383 Register AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 384 // Positive zero (+0.0) has to be materialized with a fmov from the zero 385 // register, because the immediate version of fmov cannot encode zero. 386 if (CFP->isNullValue()) 387 return fastMaterializeFloatZero(CFP); 388 389 if (VT != MVT::f32 && VT != MVT::f64) 390 return Register(); 391 392 const APFloat Val = CFP->getValueAPF(); 393 bool Is64Bit = (VT == MVT::f64); 394 // This checks to see if we can use FMOV instructions to materialize 395 // a constant, otherwise we have to materialize via the constant pool. 396 int Imm = 397 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 398 if (Imm != -1) { 399 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 400 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 401 } 402 403 // For the large code model materialize the FP constant in code. 404 if (TM.getCodeModel() == CodeModel::Large) { 405 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 406 const TargetRegisterClass *RC = Is64Bit ? 407 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 408 409 Register TmpReg = createResultReg(RC); 410 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg) 411 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 412 413 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 414 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 415 TII.get(TargetOpcode::COPY), ResultReg) 416 .addReg(TmpReg, getKillRegState(true)); 417 418 return ResultReg; 419 } 420 421 // Materialize via constant pool. MachineConstantPool wants an explicit 422 // alignment. 423 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 424 425 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 426 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 427 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 428 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 429 430 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 431 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 432 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 433 .addReg(ADRPReg) 434 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 435 return ResultReg; 436 } 437 438 Register AArch64FastISel::materializeGV(const GlobalValue *GV) { 439 // We can't handle thread-local variables quickly yet. 440 if (GV->isThreadLocal()) 441 return Register(); 442 443 // MachO still uses GOT for large code-model accesses, but ELF requires 444 // movz/movk sequences, which FastISel doesn't handle yet. 445 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 446 return Register(); 447 448 if (FuncInfo.MF->getInfo<AArch64FunctionInfo>()->hasELFSignedGOT()) 449 return Register(); 450 451 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 452 453 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 454 if (!DestEVT.isSimple()) 455 return Register(); 456 457 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 458 Register ResultReg; 459 460 if (OpFlags & AArch64II::MO_GOT) { 461 // ADRP + LDRX 462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 463 ADRPReg) 464 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 465 466 unsigned LdrOpc; 467 if (Subtarget->isTargetILP32()) { 468 ResultReg = createResultReg(&AArch64::GPR32RegClass); 469 LdrOpc = AArch64::LDRWui; 470 } else { 471 ResultReg = createResultReg(&AArch64::GPR64RegClass); 472 LdrOpc = AArch64::LDRXui; 473 } 474 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc), 475 ResultReg) 476 .addReg(ADRPReg) 477 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 478 AArch64II::MO_NC | OpFlags); 479 if (!Subtarget->isTargetILP32()) 480 return ResultReg; 481 482 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 483 // so we must extend the result on ILP32. 484 Register Result64 = createResultReg(&AArch64::GPR64RegClass); 485 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 486 TII.get(TargetOpcode::SUBREG_TO_REG)) 487 .addDef(Result64) 488 .addImm(0) 489 .addReg(ResultReg, RegState::Kill) 490 .addImm(AArch64::sub_32); 491 return Result64; 492 } else { 493 // ADRP + ADDX 494 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 495 ADRPReg) 496 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 497 498 if (OpFlags & AArch64II::MO_TAGGED) { 499 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 500 // We do so by creating a MOVK that sets bits 48-63 of the register to 501 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 502 // the small code model so we can assume a binary size of <= 4GB, which 503 // makes the untagged PC relative offset positive. The binary must also be 504 // loaded into address range [0, 2^48). Both of these properties need to 505 // be ensured at runtime when using tagged addresses. 506 // 507 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that 508 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands 509 // are not exactly 1:1 with FastISel so we cannot easily abstract this 510 // out. At some point, it would be nice to find a way to not have this 511 // duplicate code. 512 Register DstReg = createResultReg(&AArch64::GPR64commonRegClass); 513 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi), 514 DstReg) 515 .addReg(ADRPReg) 516 .addGlobalAddress(GV, /*Offset=*/0x100000000, 517 AArch64II::MO_PREL | AArch64II::MO_G3) 518 .addImm(48); 519 ADRPReg = DstReg; 520 } 521 522 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 523 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 524 ResultReg) 525 .addReg(ADRPReg) 526 .addGlobalAddress(GV, 0, 527 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 528 .addImm(0); 529 } 530 return ResultReg; 531 } 532 533 Register AArch64FastISel::fastMaterializeConstant(const Constant *C) { 534 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 535 536 // Only handle simple types. 537 if (!CEVT.isSimple()) 538 return Register(); 539 MVT VT = CEVT.getSimpleVT(); 540 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 541 // 'null' pointers need to have a somewhat special treatment. 542 if (isa<ConstantPointerNull>(C)) { 543 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 544 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 545 } 546 547 if (const auto *CI = dyn_cast<ConstantInt>(C)) 548 return materializeInt(CI, VT); 549 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 550 return materializeFP(CFP, VT); 551 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 552 return materializeGV(GV); 553 554 return Register(); 555 } 556 557 Register AArch64FastISel::fastMaterializeFloatZero(const ConstantFP *CFP) { 558 assert(CFP->isNullValue() && 559 "Floating-point constant is not a positive zero."); 560 MVT VT; 561 if (!isTypeLegal(CFP->getType(), VT)) 562 return Register(); 563 564 if (VT != MVT::f32 && VT != MVT::f64) 565 return Register(); 566 567 bool Is64Bit = (VT == MVT::f64); 568 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 569 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 570 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg); 571 } 572 573 /// Check if the multiply is by a power-of-2 constant. 574 static bool isMulPowOf2(const Value *I) { 575 if (const auto *MI = dyn_cast<MulOperator>(I)) { 576 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 577 if (C->getValue().isPowerOf2()) 578 return true; 579 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 580 if (C->getValue().isPowerOf2()) 581 return true; 582 } 583 return false; 584 } 585 586 // Computes the address to get to an object. 587 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 588 { 589 const User *U = nullptr; 590 unsigned Opcode = Instruction::UserOp1; 591 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 592 // Don't walk into other basic blocks unless the object is an alloca from 593 // another block, otherwise it may not have a virtual register assigned. 594 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 595 FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) { 596 Opcode = I->getOpcode(); 597 U = I; 598 } 599 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 600 Opcode = C->getOpcode(); 601 U = C; 602 } 603 604 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 605 if (Ty->getAddressSpace() > 255) 606 // Fast instruction selection doesn't support the special 607 // address spaces. 608 return false; 609 610 switch (Opcode) { 611 default: 612 break; 613 case Instruction::BitCast: 614 // Look through bitcasts. 615 return computeAddress(U->getOperand(0), Addr, Ty); 616 617 case Instruction::IntToPtr: 618 // Look past no-op inttoptrs. 619 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 620 TLI.getPointerTy(DL)) 621 return computeAddress(U->getOperand(0), Addr, Ty); 622 break; 623 624 case Instruction::PtrToInt: 625 // Look past no-op ptrtoints. 626 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 627 return computeAddress(U->getOperand(0), Addr, Ty); 628 break; 629 630 case Instruction::GetElementPtr: { 631 Address SavedAddr = Addr; 632 uint64_t TmpOffset = Addr.getOffset(); 633 634 // Iterate through the GEP folding the constants into offsets where 635 // we can. 636 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 637 GTI != E; ++GTI) { 638 const Value *Op = GTI.getOperand(); 639 if (StructType *STy = GTI.getStructTypeOrNull()) { 640 const StructLayout *SL = DL.getStructLayout(STy); 641 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 642 TmpOffset += SL->getElementOffset(Idx); 643 } else { 644 uint64_t S = GTI.getSequentialElementStride(DL); 645 while (true) { 646 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 647 // Constant-offset addressing. 648 TmpOffset += CI->getSExtValue() * S; 649 break; 650 } 651 if (canFoldAddIntoGEP(U, Op)) { 652 // A compatible add with a constant operand. Fold the constant. 653 ConstantInt *CI = 654 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 655 TmpOffset += CI->getSExtValue() * S; 656 // Iterate on the other operand. 657 Op = cast<AddOperator>(Op)->getOperand(0); 658 continue; 659 } 660 // Unsupported 661 goto unsupported_gep; 662 } 663 } 664 } 665 666 // Try to grab the base operand now. 667 Addr.setOffset(TmpOffset); 668 if (computeAddress(U->getOperand(0), Addr, Ty)) 669 return true; 670 671 // We failed, restore everything and try the other options. 672 Addr = SavedAddr; 673 674 unsupported_gep: 675 break; 676 } 677 case Instruction::Alloca: { 678 const AllocaInst *AI = cast<AllocaInst>(Obj); 679 DenseMap<const AllocaInst *, int>::iterator SI = 680 FuncInfo.StaticAllocaMap.find(AI); 681 if (SI != FuncInfo.StaticAllocaMap.end()) { 682 Addr.setKind(Address::FrameIndexBase); 683 Addr.setFI(SI->second); 684 return true; 685 } 686 break; 687 } 688 case Instruction::Add: { 689 // Adds of constants are common and easy enough. 690 const Value *LHS = U->getOperand(0); 691 const Value *RHS = U->getOperand(1); 692 693 if (isa<ConstantInt>(LHS)) 694 std::swap(LHS, RHS); 695 696 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 697 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 698 return computeAddress(LHS, Addr, Ty); 699 } 700 701 Address Backup = Addr; 702 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 703 return true; 704 Addr = Backup; 705 706 break; 707 } 708 case Instruction::Sub: { 709 // Subs of constants are common and easy enough. 710 const Value *LHS = U->getOperand(0); 711 const Value *RHS = U->getOperand(1); 712 713 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 714 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 715 return computeAddress(LHS, Addr, Ty); 716 } 717 break; 718 } 719 case Instruction::Shl: { 720 if (Addr.getOffsetReg()) 721 break; 722 723 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 724 if (!CI) 725 break; 726 727 unsigned Val = CI->getZExtValue(); 728 if (Val < 1 || Val > 3) 729 break; 730 731 uint64_t NumBytes = 0; 732 if (Ty && Ty->isSized()) { 733 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 734 NumBytes = NumBits / 8; 735 if (!isPowerOf2_64(NumBits)) 736 NumBytes = 0; 737 } 738 739 if (NumBytes != (1ULL << Val)) 740 break; 741 742 Addr.setShift(Val); 743 Addr.setExtendType(AArch64_AM::LSL); 744 745 const Value *Src = U->getOperand(0); 746 if (const auto *I = dyn_cast<Instruction>(Src)) { 747 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) { 748 // Fold the zext or sext when it won't become a noop. 749 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 750 if (!isIntExtFree(ZE) && 751 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 752 Addr.setExtendType(AArch64_AM::UXTW); 753 Src = ZE->getOperand(0); 754 } 755 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 756 if (!isIntExtFree(SE) && 757 SE->getOperand(0)->getType()->isIntegerTy(32)) { 758 Addr.setExtendType(AArch64_AM::SXTW); 759 Src = SE->getOperand(0); 760 } 761 } 762 } 763 } 764 765 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 766 if (AI->getOpcode() == Instruction::And) { 767 const Value *LHS = AI->getOperand(0); 768 const Value *RHS = AI->getOperand(1); 769 770 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 771 if (C->getValue() == 0xffffffff) 772 std::swap(LHS, RHS); 773 774 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 775 if (C->getValue() == 0xffffffff) { 776 Addr.setExtendType(AArch64_AM::UXTW); 777 Register Reg = getRegForValue(LHS); 778 if (!Reg) 779 return false; 780 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 781 Addr.setOffsetReg(Reg); 782 return true; 783 } 784 } 785 786 Register Reg = getRegForValue(Src); 787 if (!Reg) 788 return false; 789 Addr.setOffsetReg(Reg); 790 return true; 791 } 792 case Instruction::Mul: { 793 if (Addr.getOffsetReg()) 794 break; 795 796 if (!isMulPowOf2(U)) 797 break; 798 799 const Value *LHS = U->getOperand(0); 800 const Value *RHS = U->getOperand(1); 801 802 // Canonicalize power-of-2 value to the RHS. 803 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 804 if (C->getValue().isPowerOf2()) 805 std::swap(LHS, RHS); 806 807 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 808 const auto *C = cast<ConstantInt>(RHS); 809 unsigned Val = C->getValue().logBase2(); 810 if (Val < 1 || Val > 3) 811 break; 812 813 uint64_t NumBytes = 0; 814 if (Ty && Ty->isSized()) { 815 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 816 NumBytes = NumBits / 8; 817 if (!isPowerOf2_64(NumBits)) 818 NumBytes = 0; 819 } 820 821 if (NumBytes != (1ULL << Val)) 822 break; 823 824 Addr.setShift(Val); 825 Addr.setExtendType(AArch64_AM::LSL); 826 827 const Value *Src = LHS; 828 if (const auto *I = dyn_cast<Instruction>(Src)) { 829 if (FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB) { 830 // Fold the zext or sext when it won't become a noop. 831 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 832 if (!isIntExtFree(ZE) && 833 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 834 Addr.setExtendType(AArch64_AM::UXTW); 835 Src = ZE->getOperand(0); 836 } 837 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 838 if (!isIntExtFree(SE) && 839 SE->getOperand(0)->getType()->isIntegerTy(32)) { 840 Addr.setExtendType(AArch64_AM::SXTW); 841 Src = SE->getOperand(0); 842 } 843 } 844 } 845 } 846 847 Register Reg = getRegForValue(Src); 848 if (!Reg) 849 return false; 850 Addr.setOffsetReg(Reg); 851 return true; 852 } 853 case Instruction::And: { 854 if (Addr.getOffsetReg()) 855 break; 856 857 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 858 break; 859 860 const Value *LHS = U->getOperand(0); 861 const Value *RHS = U->getOperand(1); 862 863 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 864 if (C->getValue() == 0xffffffff) 865 std::swap(LHS, RHS); 866 867 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 868 if (C->getValue() == 0xffffffff) { 869 Addr.setShift(0); 870 Addr.setExtendType(AArch64_AM::LSL); 871 Addr.setExtendType(AArch64_AM::UXTW); 872 873 Register Reg = getRegForValue(LHS); 874 if (!Reg) 875 return false; 876 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 877 Addr.setOffsetReg(Reg); 878 return true; 879 } 880 break; 881 } 882 case Instruction::SExt: 883 case Instruction::ZExt: { 884 if (!Addr.getReg() || Addr.getOffsetReg()) 885 break; 886 887 const Value *Src = nullptr; 888 // Fold the zext or sext when it won't become a noop. 889 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 890 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 891 Addr.setExtendType(AArch64_AM::UXTW); 892 Src = ZE->getOperand(0); 893 } 894 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 895 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 896 Addr.setExtendType(AArch64_AM::SXTW); 897 Src = SE->getOperand(0); 898 } 899 } 900 901 if (!Src) 902 break; 903 904 Addr.setShift(0); 905 Register Reg = getRegForValue(Src); 906 if (!Reg) 907 return false; 908 Addr.setOffsetReg(Reg); 909 return true; 910 } 911 } // end switch 912 913 if (Addr.isRegBase() && !Addr.getReg()) { 914 Register Reg = getRegForValue(Obj); 915 if (!Reg) 916 return false; 917 Addr.setReg(Reg); 918 return true; 919 } 920 921 if (!Addr.getOffsetReg()) { 922 Register Reg = getRegForValue(Obj); 923 if (!Reg) 924 return false; 925 Addr.setOffsetReg(Reg); 926 return true; 927 } 928 929 return false; 930 } 931 932 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 933 const User *U = nullptr; 934 unsigned Opcode = Instruction::UserOp1; 935 bool InMBB = true; 936 937 if (const auto *I = dyn_cast<Instruction>(V)) { 938 Opcode = I->getOpcode(); 939 U = I; 940 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 941 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 942 Opcode = C->getOpcode(); 943 U = C; 944 } 945 946 switch (Opcode) { 947 default: break; 948 case Instruction::BitCast: 949 // Look past bitcasts if its operand is in the same BB. 950 if (InMBB) 951 return computeCallAddress(U->getOperand(0), Addr); 952 break; 953 case Instruction::IntToPtr: 954 // Look past no-op inttoptrs if its operand is in the same BB. 955 if (InMBB && 956 TLI.getValueType(DL, U->getOperand(0)->getType()) == 957 TLI.getPointerTy(DL)) 958 return computeCallAddress(U->getOperand(0), Addr); 959 break; 960 case Instruction::PtrToInt: 961 // Look past no-op ptrtoints if its operand is in the same BB. 962 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 963 return computeCallAddress(U->getOperand(0), Addr); 964 break; 965 } 966 967 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 968 Addr.setGlobalValue(GV); 969 return true; 970 } 971 972 // If all else fails, try to materialize the value in a register. 973 if (!Addr.getGlobalValue()) { 974 Addr.setReg(getRegForValue(V)); 975 return Addr.getReg().isValid(); 976 } 977 978 return false; 979 } 980 981 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 982 EVT evt = TLI.getValueType(DL, Ty, true); 983 984 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 985 return false; 986 987 // Only handle simple types. 988 if (evt == MVT::Other || !evt.isSimple()) 989 return false; 990 VT = evt.getSimpleVT(); 991 992 // This is a legal type, but it's not something we handle in fast-isel. 993 if (VT == MVT::f128) 994 return false; 995 996 // Handle all other legal types, i.e. a register that will directly hold this 997 // value. 998 return TLI.isTypeLegal(VT); 999 } 1000 1001 /// Determine if the value type is supported by FastISel. 1002 /// 1003 /// FastISel for AArch64 can handle more value types than are legal. This adds 1004 /// simple value type such as i1, i8, and i16. 1005 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 1006 if (Ty->isVectorTy() && !IsVectorAllowed) 1007 return false; 1008 1009 if (isTypeLegal(Ty, VT)) 1010 return true; 1011 1012 // If this is a type than can be sign or zero-extended to a basic operation 1013 // go ahead and accept it now. 1014 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 1015 return true; 1016 1017 return false; 1018 } 1019 1020 bool AArch64FastISel::isValueAvailable(const Value *V) const { 1021 if (!isa<Instruction>(V)) 1022 return true; 1023 1024 const auto *I = cast<Instruction>(V); 1025 return FuncInfo.getMBB(I->getParent()) == FuncInfo.MBB; 1026 } 1027 1028 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1029 if (Subtarget->isTargetILP32()) 1030 return false; 1031 1032 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1033 if (!ScaleFactor) 1034 return false; 1035 1036 bool ImmediateOffsetNeedsLowering = false; 1037 bool RegisterOffsetNeedsLowering = false; 1038 int64_t Offset = Addr.getOffset(); 1039 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1040 ImmediateOffsetNeedsLowering = true; 1041 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1042 !isUInt<12>(Offset / ScaleFactor)) 1043 ImmediateOffsetNeedsLowering = true; 1044 1045 // Cannot encode an offset register and an immediate offset in the same 1046 // instruction. Fold the immediate offset into the load/store instruction and 1047 // emit an additional add to take care of the offset register. 1048 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1049 RegisterOffsetNeedsLowering = true; 1050 1051 // Cannot encode zero register as base. 1052 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1053 RegisterOffsetNeedsLowering = true; 1054 1055 // If this is a stack pointer and the offset needs to be simplified then put 1056 // the alloca address into a register, set the base type back to register and 1057 // continue. This should almost never happen. 1058 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1059 { 1060 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1061 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 1062 ResultReg) 1063 .addFrameIndex(Addr.getFI()) 1064 .addImm(0) 1065 .addImm(0); 1066 Addr.setKind(Address::RegBase); 1067 Addr.setReg(ResultReg); 1068 } 1069 1070 if (RegisterOffsetNeedsLowering) { 1071 Register ResultReg; 1072 if (Addr.getReg()) { 1073 if (Addr.getExtendType() == AArch64_AM::SXTW || 1074 Addr.getExtendType() == AArch64_AM::UXTW ) 1075 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1076 Addr.getOffsetReg(), Addr.getExtendType(), 1077 Addr.getShift()); 1078 else 1079 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1080 Addr.getOffsetReg(), AArch64_AM::LSL, 1081 Addr.getShift()); 1082 } else { 1083 if (Addr.getExtendType() == AArch64_AM::UXTW) 1084 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1085 Addr.getShift(), /*IsZExt=*/true); 1086 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1087 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1088 Addr.getShift(), /*IsZExt=*/false); 1089 else 1090 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1091 Addr.getShift()); 1092 } 1093 if (!ResultReg) 1094 return false; 1095 1096 Addr.setReg(ResultReg); 1097 Addr.setOffsetReg(0); 1098 Addr.setShift(0); 1099 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1100 } 1101 1102 // Since the offset is too large for the load/store instruction get the 1103 // reg+offset into a register. 1104 if (ImmediateOffsetNeedsLowering) { 1105 Register ResultReg; 1106 if (Addr.getReg()) 1107 // Try to fold the immediate into the add instruction. 1108 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); 1109 else 1110 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1111 1112 if (!ResultReg) 1113 return false; 1114 Addr.setReg(ResultReg); 1115 Addr.setOffset(0); 1116 } 1117 return true; 1118 } 1119 1120 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1121 const MachineInstrBuilder &MIB, 1122 MachineMemOperand::Flags Flags, 1123 unsigned ScaleFactor, 1124 MachineMemOperand *MMO) { 1125 int64_t Offset = Addr.getOffset() / ScaleFactor; 1126 // Frame base works a bit differently. Handle it separately. 1127 if (Addr.isFIBase()) { 1128 int FI = Addr.getFI(); 1129 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1130 // and alignment should be based on the VT. 1131 MMO = FuncInfo.MF->getMachineMemOperand( 1132 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1133 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1134 // Now add the rest of the operands. 1135 MIB.addFrameIndex(FI).addImm(Offset); 1136 } else { 1137 assert(Addr.isRegBase() && "Unexpected address kind."); 1138 const MCInstrDesc &II = MIB->getDesc(); 1139 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1140 Addr.setReg( 1141 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1142 Addr.setOffsetReg( 1143 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1144 if (Addr.getOffsetReg()) { 1145 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1146 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1147 Addr.getExtendType() == AArch64_AM::SXTX; 1148 MIB.addReg(Addr.getReg()); 1149 MIB.addReg(Addr.getOffsetReg()); 1150 MIB.addImm(IsSigned); 1151 MIB.addImm(Addr.getShift() != 0); 1152 } else 1153 MIB.addReg(Addr.getReg()).addImm(Offset); 1154 } 1155 1156 if (MMO) 1157 MIB.addMemOperand(MMO); 1158 } 1159 1160 Register AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1161 const Value *RHS, bool SetFlags, 1162 bool WantResult, bool IsZExt) { 1163 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1164 bool NeedExtend = false; 1165 switch (RetVT.SimpleTy) { 1166 default: 1167 return Register(); 1168 case MVT::i1: 1169 NeedExtend = true; 1170 break; 1171 case MVT::i8: 1172 NeedExtend = true; 1173 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1174 break; 1175 case MVT::i16: 1176 NeedExtend = true; 1177 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1178 break; 1179 case MVT::i32: // fall-through 1180 case MVT::i64: 1181 break; 1182 } 1183 MVT SrcVT = RetVT; 1184 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1185 1186 // Canonicalize immediates to the RHS first. 1187 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1188 std::swap(LHS, RHS); 1189 1190 // Canonicalize mul by power of 2 to the RHS. 1191 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1192 if (isMulPowOf2(LHS)) 1193 std::swap(LHS, RHS); 1194 1195 // Canonicalize shift immediate to the RHS. 1196 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1197 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1198 if (isa<ConstantInt>(SI->getOperand(1))) 1199 if (SI->getOpcode() == Instruction::Shl || 1200 SI->getOpcode() == Instruction::LShr || 1201 SI->getOpcode() == Instruction::AShr ) 1202 std::swap(LHS, RHS); 1203 1204 Register LHSReg = getRegForValue(LHS); 1205 if (!LHSReg) 1206 return Register(); 1207 1208 if (NeedExtend) 1209 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1210 1211 Register ResultReg; 1212 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1213 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1214 if (C->isNegative()) 1215 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags, 1216 WantResult); 1217 else 1218 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, 1219 WantResult); 1220 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1221 if (C->isNullValue()) 1222 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult); 1223 1224 if (ResultReg) 1225 return ResultReg; 1226 1227 // Only extend the RHS within the instruction if there is a valid extend type. 1228 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1229 isValueAvailable(RHS)) { 1230 Register RHSReg = getRegForValue(RHS); 1231 if (!RHSReg) 1232 return Register(); 1233 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0, 1234 SetFlags, WantResult); 1235 } 1236 1237 // Check if the mul can be folded into the instruction. 1238 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1239 if (isMulPowOf2(RHS)) { 1240 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1241 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1242 1243 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1244 if (C->getValue().isPowerOf2()) 1245 std::swap(MulLHS, MulRHS); 1246 1247 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1248 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1249 Register RHSReg = getRegForValue(MulLHS); 1250 if (!RHSReg) 1251 return Register(); 1252 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL, 1253 ShiftVal, SetFlags, WantResult); 1254 if (ResultReg) 1255 return ResultReg; 1256 } 1257 } 1258 1259 // Check if the shift can be folded into the instruction. 1260 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1261 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1262 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1263 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1264 switch (SI->getOpcode()) { 1265 default: break; 1266 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1267 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1268 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1269 } 1270 uint64_t ShiftVal = C->getZExtValue(); 1271 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1272 Register RHSReg = getRegForValue(SI->getOperand(0)); 1273 if (!RHSReg) 1274 return Register(); 1275 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, 1276 ShiftVal, SetFlags, WantResult); 1277 if (ResultReg) 1278 return ResultReg; 1279 } 1280 } 1281 } 1282 } 1283 1284 Register RHSReg = getRegForValue(RHS); 1285 if (!RHSReg) 1286 return Register(); 1287 1288 if (NeedExtend) 1289 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1290 1291 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); 1292 } 1293 1294 Register AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, Register LHSReg, 1295 Register RHSReg, bool SetFlags, 1296 bool WantResult) { 1297 assert(LHSReg && RHSReg && "Invalid register number."); 1298 1299 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1300 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1301 return Register(); 1302 1303 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1304 return Register(); 1305 1306 static const unsigned OpcTable[2][2][2] = { 1307 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1308 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1309 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1310 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1311 }; 1312 bool Is64Bit = RetVT == MVT::i64; 1313 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1314 const TargetRegisterClass *RC = 1315 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1316 Register ResultReg; 1317 if (WantResult) 1318 ResultReg = createResultReg(RC); 1319 else 1320 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1321 1322 const MCInstrDesc &II = TII.get(Opc); 1323 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1324 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1325 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1326 .addReg(LHSReg) 1327 .addReg(RHSReg); 1328 return ResultReg; 1329 } 1330 1331 Register AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, Register LHSReg, 1332 uint64_t Imm, bool SetFlags, 1333 bool WantResult) { 1334 assert(LHSReg && "Invalid register number."); 1335 1336 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1337 return Register(); 1338 1339 unsigned ShiftImm; 1340 if (isUInt<12>(Imm)) 1341 ShiftImm = 0; 1342 else if ((Imm & 0xfff000) == Imm) { 1343 ShiftImm = 12; 1344 Imm >>= 12; 1345 } else 1346 return Register(); 1347 1348 static const unsigned OpcTable[2][2][2] = { 1349 { { AArch64::SUBWri, AArch64::SUBXri }, 1350 { AArch64::ADDWri, AArch64::ADDXri } }, 1351 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1352 { AArch64::ADDSWri, AArch64::ADDSXri } } 1353 }; 1354 bool Is64Bit = RetVT == MVT::i64; 1355 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1356 const TargetRegisterClass *RC; 1357 if (SetFlags) 1358 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1359 else 1360 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1361 Register ResultReg; 1362 if (WantResult) 1363 ResultReg = createResultReg(RC); 1364 else 1365 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1366 1367 const MCInstrDesc &II = TII.get(Opc); 1368 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1369 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1370 .addReg(LHSReg) 1371 .addImm(Imm) 1372 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1373 return ResultReg; 1374 } 1375 1376 Register AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, Register LHSReg, 1377 Register RHSReg, 1378 AArch64_AM::ShiftExtendType ShiftType, 1379 uint64_t ShiftImm, bool SetFlags, 1380 bool WantResult) { 1381 assert(LHSReg && RHSReg && "Invalid register number."); 1382 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1383 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1384 1385 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1386 return Register(); 1387 1388 // Don't deal with undefined shifts. 1389 if (ShiftImm >= RetVT.getSizeInBits()) 1390 return Register(); 1391 1392 static const unsigned OpcTable[2][2][2] = { 1393 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1394 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1395 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1396 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1397 }; 1398 bool Is64Bit = RetVT == MVT::i64; 1399 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1400 const TargetRegisterClass *RC = 1401 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1402 Register ResultReg; 1403 if (WantResult) 1404 ResultReg = createResultReg(RC); 1405 else 1406 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1407 1408 const MCInstrDesc &II = TII.get(Opc); 1409 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1410 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1411 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1412 .addReg(LHSReg) 1413 .addReg(RHSReg) 1414 .addImm(getShifterImm(ShiftType, ShiftImm)); 1415 return ResultReg; 1416 } 1417 1418 Register AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, Register LHSReg, 1419 Register RHSReg, 1420 AArch64_AM::ShiftExtendType ExtType, 1421 uint64_t ShiftImm, bool SetFlags, 1422 bool WantResult) { 1423 assert(LHSReg && RHSReg && "Invalid register number."); 1424 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1425 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1426 1427 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1428 return Register(); 1429 1430 if (ShiftImm >= 4) 1431 return Register(); 1432 1433 static const unsigned OpcTable[2][2][2] = { 1434 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1435 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1436 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1437 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1438 }; 1439 bool Is64Bit = RetVT == MVT::i64; 1440 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1441 const TargetRegisterClass *RC = nullptr; 1442 if (SetFlags) 1443 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1444 else 1445 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1446 Register ResultReg; 1447 if (WantResult) 1448 ResultReg = createResultReg(RC); 1449 else 1450 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1451 1452 const MCInstrDesc &II = TII.get(Opc); 1453 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1454 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1455 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1456 .addReg(LHSReg) 1457 .addReg(RHSReg) 1458 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1459 return ResultReg; 1460 } 1461 1462 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1463 Type *Ty = LHS->getType(); 1464 EVT EVT = TLI.getValueType(DL, Ty, true); 1465 if (!EVT.isSimple()) 1466 return false; 1467 MVT VT = EVT.getSimpleVT(); 1468 1469 switch (VT.SimpleTy) { 1470 default: 1471 return false; 1472 case MVT::i1: 1473 case MVT::i8: 1474 case MVT::i16: 1475 case MVT::i32: 1476 case MVT::i64: 1477 return emitICmp(VT, LHS, RHS, IsZExt); 1478 case MVT::f32: 1479 case MVT::f64: 1480 return emitFCmp(VT, LHS, RHS); 1481 } 1482 } 1483 1484 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1485 bool IsZExt) { 1486 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1487 IsZExt) 1488 .isValid(); 1489 } 1490 1491 bool AArch64FastISel::emitICmp_ri(MVT RetVT, Register LHSReg, uint64_t Imm) { 1492 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, 1493 /*SetFlags=*/true, /*WantResult=*/false) 1494 .isValid(); 1495 } 1496 1497 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1498 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1499 return false; 1500 1501 // Check to see if the 2nd operand is a constant that we can encode directly 1502 // in the compare. 1503 bool UseImm = false; 1504 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1505 if (CFP->isZero() && !CFP->isNegative()) 1506 UseImm = true; 1507 1508 Register LHSReg = getRegForValue(LHS); 1509 if (!LHSReg) 1510 return false; 1511 1512 if (UseImm) { 1513 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1514 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1515 .addReg(LHSReg); 1516 return true; 1517 } 1518 1519 Register RHSReg = getRegForValue(RHS); 1520 if (!RHSReg) 1521 return false; 1522 1523 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1525 .addReg(LHSReg) 1526 .addReg(RHSReg); 1527 return true; 1528 } 1529 1530 Register AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1531 bool SetFlags, bool WantResult, bool IsZExt) { 1532 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1533 IsZExt); 1534 } 1535 1536 /// This method is a wrapper to simplify add emission. 1537 /// 1538 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1539 /// that fails, then try to materialize the immediate into a register and use 1540 /// emitAddSub_rr instead. 1541 Register AArch64FastISel::emitAdd_ri_(MVT VT, Register Op0, int64_t Imm) { 1542 Register ResultReg; 1543 if (Imm < 0) 1544 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm); 1545 else 1546 ResultReg = emitAddSub_ri(true, VT, Op0, Imm); 1547 1548 if (ResultReg) 1549 return ResultReg; 1550 1551 Register CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1552 if (!CReg) 1553 return Register(); 1554 1555 ResultReg = emitAddSub_rr(true, VT, Op0, CReg); 1556 return ResultReg; 1557 } 1558 1559 Register AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1560 bool SetFlags, bool WantResult, bool IsZExt) { 1561 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1562 IsZExt); 1563 } 1564 1565 Register AArch64FastISel::emitSubs_rr(MVT RetVT, Register LHSReg, 1566 Register RHSReg, bool WantResult) { 1567 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, 1568 /*SetFlags=*/true, WantResult); 1569 } 1570 1571 Register AArch64FastISel::emitSubs_rs(MVT RetVT, Register LHSReg, 1572 Register RHSReg, 1573 AArch64_AM::ShiftExtendType ShiftType, 1574 uint64_t ShiftImm, bool WantResult) { 1575 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, 1576 ShiftImm, /*SetFlags=*/true, WantResult); 1577 } 1578 1579 Register AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1580 const Value *LHS, const Value *RHS) { 1581 // Canonicalize immediates to the RHS first. 1582 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1583 std::swap(LHS, RHS); 1584 1585 // Canonicalize mul by power-of-2 to the RHS. 1586 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1587 if (isMulPowOf2(LHS)) 1588 std::swap(LHS, RHS); 1589 1590 // Canonicalize shift immediate to the RHS. 1591 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1592 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1593 if (isa<ConstantInt>(SI->getOperand(1))) 1594 std::swap(LHS, RHS); 1595 1596 Register LHSReg = getRegForValue(LHS); 1597 if (!LHSReg) 1598 return Register(); 1599 1600 Register ResultReg; 1601 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1602 uint64_t Imm = C->getZExtValue(); 1603 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); 1604 } 1605 if (ResultReg) 1606 return ResultReg; 1607 1608 // Check if the mul can be folded into the instruction. 1609 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1610 if (isMulPowOf2(RHS)) { 1611 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1612 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1613 1614 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1615 if (C->getValue().isPowerOf2()) 1616 std::swap(MulLHS, MulRHS); 1617 1618 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1619 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1620 1621 Register RHSReg = getRegForValue(MulLHS); 1622 if (!RHSReg) 1623 return Register(); 1624 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1625 if (ResultReg) 1626 return ResultReg; 1627 } 1628 } 1629 1630 // Check if the shift can be folded into the instruction. 1631 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1632 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1633 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1634 uint64_t ShiftVal = C->getZExtValue(); 1635 Register RHSReg = getRegForValue(SI->getOperand(0)); 1636 if (!RHSReg) 1637 return Register(); 1638 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1639 if (ResultReg) 1640 return ResultReg; 1641 } 1642 } 1643 1644 Register RHSReg = getRegForValue(RHS); 1645 if (!RHSReg) 1646 return Register(); 1647 1648 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1649 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg); 1650 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1651 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1652 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1653 } 1654 return ResultReg; 1655 } 1656 1657 Register AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1658 Register LHSReg, uint64_t Imm) { 1659 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1660 "ISD nodes are not consecutive!"); 1661 static const unsigned OpcTable[3][2] = { 1662 { AArch64::ANDWri, AArch64::ANDXri }, 1663 { AArch64::ORRWri, AArch64::ORRXri }, 1664 { AArch64::EORWri, AArch64::EORXri } 1665 }; 1666 const TargetRegisterClass *RC; 1667 unsigned Opc; 1668 unsigned RegSize; 1669 switch (RetVT.SimpleTy) { 1670 default: 1671 return Register(); 1672 case MVT::i1: 1673 case MVT::i8: 1674 case MVT::i16: 1675 case MVT::i32: { 1676 unsigned Idx = ISDOpc - ISD::AND; 1677 Opc = OpcTable[Idx][0]; 1678 RC = &AArch64::GPR32spRegClass; 1679 RegSize = 32; 1680 break; 1681 } 1682 case MVT::i64: 1683 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1684 RC = &AArch64::GPR64spRegClass; 1685 RegSize = 64; 1686 break; 1687 } 1688 1689 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1690 return Register(); 1691 1692 Register ResultReg = 1693 fastEmitInst_ri(Opc, RC, LHSReg, 1694 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1695 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1696 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1697 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1698 } 1699 return ResultReg; 1700 } 1701 1702 Register AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1703 Register LHSReg, Register RHSReg, 1704 uint64_t ShiftImm) { 1705 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1706 "ISD nodes are not consecutive!"); 1707 static const unsigned OpcTable[3][2] = { 1708 { AArch64::ANDWrs, AArch64::ANDXrs }, 1709 { AArch64::ORRWrs, AArch64::ORRXrs }, 1710 { AArch64::EORWrs, AArch64::EORXrs } 1711 }; 1712 1713 // Don't deal with undefined shifts. 1714 if (ShiftImm >= RetVT.getSizeInBits()) 1715 return Register(); 1716 1717 const TargetRegisterClass *RC; 1718 unsigned Opc; 1719 switch (RetVT.SimpleTy) { 1720 default: 1721 return Register(); 1722 case MVT::i1: 1723 case MVT::i8: 1724 case MVT::i16: 1725 case MVT::i32: 1726 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1727 RC = &AArch64::GPR32RegClass; 1728 break; 1729 case MVT::i64: 1730 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1731 RC = &AArch64::GPR64RegClass; 1732 break; 1733 } 1734 Register ResultReg = 1735 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg, 1736 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1737 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1738 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1739 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1740 } 1741 return ResultReg; 1742 } 1743 1744 Register AArch64FastISel::emitAnd_ri(MVT RetVT, Register LHSReg, uint64_t Imm) { 1745 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm); 1746 } 1747 1748 Register AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1749 bool WantZExt, MachineMemOperand *MMO) { 1750 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1751 return Register(); 1752 1753 // Simplify this down to something we can handle. 1754 if (!simplifyAddress(Addr, VT)) 1755 return Register(); 1756 1757 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1758 if (!ScaleFactor) 1759 llvm_unreachable("Unexpected value type."); 1760 1761 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1762 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1763 bool UseScaled = true; 1764 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1765 UseScaled = false; 1766 ScaleFactor = 1; 1767 } 1768 1769 static const unsigned GPOpcTable[2][8][4] = { 1770 // Sign-extend. 1771 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1772 AArch64::LDURXi }, 1773 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1774 AArch64::LDURXi }, 1775 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1776 AArch64::LDRXui }, 1777 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1778 AArch64::LDRXui }, 1779 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1780 AArch64::LDRXroX }, 1781 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1782 AArch64::LDRXroX }, 1783 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1784 AArch64::LDRXroW }, 1785 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1786 AArch64::LDRXroW } 1787 }, 1788 // Zero-extend. 1789 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1790 AArch64::LDURXi }, 1791 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1792 AArch64::LDURXi }, 1793 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1794 AArch64::LDRXui }, 1795 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1796 AArch64::LDRXui }, 1797 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1798 AArch64::LDRXroX }, 1799 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1800 AArch64::LDRXroX }, 1801 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1802 AArch64::LDRXroW }, 1803 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1804 AArch64::LDRXroW } 1805 } 1806 }; 1807 1808 static const unsigned FPOpcTable[4][2] = { 1809 { AArch64::LDURSi, AArch64::LDURDi }, 1810 { AArch64::LDRSui, AArch64::LDRDui }, 1811 { AArch64::LDRSroX, AArch64::LDRDroX }, 1812 { AArch64::LDRSroW, AArch64::LDRDroW } 1813 }; 1814 1815 unsigned Opc; 1816 const TargetRegisterClass *RC; 1817 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1818 Addr.getOffsetReg(); 1819 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1820 if (Addr.getExtendType() == AArch64_AM::UXTW || 1821 Addr.getExtendType() == AArch64_AM::SXTW) 1822 Idx++; 1823 1824 bool IsRet64Bit = RetVT == MVT::i64; 1825 switch (VT.SimpleTy) { 1826 default: 1827 llvm_unreachable("Unexpected value type."); 1828 case MVT::i1: // Intentional fall-through. 1829 case MVT::i8: 1830 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1831 RC = (IsRet64Bit && !WantZExt) ? 1832 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1833 break; 1834 case MVT::i16: 1835 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1836 RC = (IsRet64Bit && !WantZExt) ? 1837 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1838 break; 1839 case MVT::i32: 1840 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1841 RC = (IsRet64Bit && !WantZExt) ? 1842 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1843 break; 1844 case MVT::i64: 1845 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1846 RC = &AArch64::GPR64RegClass; 1847 break; 1848 case MVT::f32: 1849 Opc = FPOpcTable[Idx][0]; 1850 RC = &AArch64::FPR32RegClass; 1851 break; 1852 case MVT::f64: 1853 Opc = FPOpcTable[Idx][1]; 1854 RC = &AArch64::FPR64RegClass; 1855 break; 1856 } 1857 1858 // Create the base instruction, then add the operands. 1859 Register ResultReg = createResultReg(RC); 1860 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1861 TII.get(Opc), ResultReg); 1862 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1863 1864 // Loading an i1 requires special handling. 1865 if (VT == MVT::i1) { 1866 Register ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); 1867 assert(ANDReg && "Unexpected AND instruction emission failure."); 1868 ResultReg = ANDReg; 1869 } 1870 1871 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1872 // the 32bit reg to a 64bit reg. 1873 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1874 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 1875 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1876 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1877 .addImm(0) 1878 .addReg(ResultReg, getKillRegState(true)) 1879 .addImm(AArch64::sub_32); 1880 ResultReg = Reg64; 1881 } 1882 return ResultReg; 1883 } 1884 1885 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1886 MVT VT; 1887 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1888 return false; 1889 1890 if (VT.isVector()) 1891 return selectOperator(I, I->getOpcode()); 1892 1893 Register ResultReg; 1894 switch (I->getOpcode()) { 1895 default: 1896 llvm_unreachable("Unexpected instruction."); 1897 case Instruction::Add: 1898 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1899 break; 1900 case Instruction::Sub: 1901 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1902 break; 1903 } 1904 if (!ResultReg) 1905 return false; 1906 1907 updateValueMap(I, ResultReg); 1908 return true; 1909 } 1910 1911 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1912 MVT VT; 1913 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1914 return false; 1915 1916 if (VT.isVector()) 1917 return selectOperator(I, I->getOpcode()); 1918 1919 Register ResultReg; 1920 switch (I->getOpcode()) { 1921 default: 1922 llvm_unreachable("Unexpected instruction."); 1923 case Instruction::And: 1924 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1925 break; 1926 case Instruction::Or: 1927 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1928 break; 1929 case Instruction::Xor: 1930 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1931 break; 1932 } 1933 if (!ResultReg) 1934 return false; 1935 1936 updateValueMap(I, ResultReg); 1937 return true; 1938 } 1939 1940 bool AArch64FastISel::selectLoad(const Instruction *I) { 1941 MVT VT; 1942 // Verify we have a legal type before going any further. Currently, we handle 1943 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1944 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1945 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1946 cast<LoadInst>(I)->isAtomic()) 1947 return false; 1948 1949 const Value *SV = I->getOperand(0); 1950 if (TLI.supportSwiftError()) { 1951 // Swifterror values can come from either a function parameter with 1952 // swifterror attribute or an alloca with swifterror attribute. 1953 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1954 if (Arg->hasSwiftErrorAttr()) 1955 return false; 1956 } 1957 1958 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1959 if (Alloca->isSwiftError()) 1960 return false; 1961 } 1962 } 1963 1964 // See if we can handle this address. 1965 Address Addr; 1966 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1967 return false; 1968 1969 // Fold the following sign-/zero-extend into the load instruction. 1970 bool WantZExt = true; 1971 MVT RetVT = VT; 1972 const Value *IntExtVal = nullptr; 1973 if (I->hasOneUse()) { 1974 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1975 if (isTypeSupported(ZE->getType(), RetVT)) 1976 IntExtVal = ZE; 1977 else 1978 RetVT = VT; 1979 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1980 if (isTypeSupported(SE->getType(), RetVT)) 1981 IntExtVal = SE; 1982 else 1983 RetVT = VT; 1984 WantZExt = false; 1985 } 1986 } 1987 1988 Register ResultReg = 1989 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1990 if (!ResultReg) 1991 return false; 1992 1993 // There are a few different cases we have to handle, because the load or the 1994 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1995 // SelectionDAG. There is also an ordering issue when both instructions are in 1996 // different basic blocks. 1997 // 1.) The load instruction is selected by FastISel, but the integer extend 1998 // not. This usually happens when the integer extend is in a different 1999 // basic block and SelectionDAG took over for that basic block. 2000 // 2.) The load instruction is selected before the integer extend. This only 2001 // happens when the integer extend is in a different basic block. 2002 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2003 // by FastISel. This happens if there are instructions between the load 2004 // and the integer extend that couldn't be selected by FastISel. 2005 if (IntExtVal) { 2006 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2007 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2008 // it when it selects the integer extend. 2009 Register Reg = lookUpRegForValue(IntExtVal); 2010 auto *MI = MRI.getUniqueVRegDef(Reg); 2011 if (!MI) { 2012 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2013 if (WantZExt) { 2014 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2015 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2016 ResultReg = std::prev(I)->getOperand(0).getReg(); 2017 removeDeadCode(I, std::next(I)); 2018 } else 2019 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2020 AArch64::sub_32); 2021 } 2022 updateValueMap(I, ResultReg); 2023 return true; 2024 } 2025 2026 // The integer extend has already been emitted - delete all the instructions 2027 // that have been emitted by the integer extend lowering code and use the 2028 // result from the load instruction directly. 2029 while (MI) { 2030 Reg = 0; 2031 for (auto &Opnd : MI->uses()) { 2032 if (Opnd.isReg()) { 2033 Reg = Opnd.getReg(); 2034 break; 2035 } 2036 } 2037 MachineBasicBlock::iterator I(MI); 2038 removeDeadCode(I, std::next(I)); 2039 MI = nullptr; 2040 if (Reg) 2041 MI = MRI.getUniqueVRegDef(Reg); 2042 } 2043 updateValueMap(IntExtVal, ResultReg); 2044 return true; 2045 } 2046 2047 updateValueMap(I, ResultReg); 2048 return true; 2049 } 2050 2051 bool AArch64FastISel::emitStoreRelease(MVT VT, Register SrcReg, 2052 Register AddrReg, 2053 MachineMemOperand *MMO) { 2054 unsigned Opc; 2055 switch (VT.SimpleTy) { 2056 default: return false; 2057 case MVT::i8: Opc = AArch64::STLRB; break; 2058 case MVT::i16: Opc = AArch64::STLRH; break; 2059 case MVT::i32: Opc = AArch64::STLRW; break; 2060 case MVT::i64: Opc = AArch64::STLRX; break; 2061 } 2062 2063 const MCInstrDesc &II = TII.get(Opc); 2064 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2065 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2066 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2067 .addReg(SrcReg) 2068 .addReg(AddrReg) 2069 .addMemOperand(MMO); 2070 return true; 2071 } 2072 2073 bool AArch64FastISel::emitStore(MVT VT, Register SrcReg, Address Addr, 2074 MachineMemOperand *MMO) { 2075 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2076 return false; 2077 2078 // Simplify this down to something we can handle. 2079 if (!simplifyAddress(Addr, VT)) 2080 return false; 2081 2082 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2083 if (!ScaleFactor) 2084 llvm_unreachable("Unexpected value type."); 2085 2086 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2087 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2088 bool UseScaled = true; 2089 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2090 UseScaled = false; 2091 ScaleFactor = 1; 2092 } 2093 2094 static const unsigned OpcTable[4][6] = { 2095 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2096 AArch64::STURSi, AArch64::STURDi }, 2097 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2098 AArch64::STRSui, AArch64::STRDui }, 2099 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2100 AArch64::STRSroX, AArch64::STRDroX }, 2101 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2102 AArch64::STRSroW, AArch64::STRDroW } 2103 }; 2104 2105 unsigned Opc; 2106 bool VTIsi1 = false; 2107 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2108 Addr.getOffsetReg(); 2109 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2110 if (Addr.getExtendType() == AArch64_AM::UXTW || 2111 Addr.getExtendType() == AArch64_AM::SXTW) 2112 Idx++; 2113 2114 switch (VT.SimpleTy) { 2115 default: llvm_unreachable("Unexpected value type."); 2116 case MVT::i1: VTIsi1 = true; [[fallthrough]]; 2117 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2118 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2119 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2120 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2121 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2122 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2123 } 2124 2125 // Storing an i1 requires special handling. 2126 if (VTIsi1 && SrcReg != AArch64::WZR) { 2127 Register ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); 2128 assert(ANDReg && "Unexpected AND instruction emission failure."); 2129 SrcReg = ANDReg; 2130 } 2131 // Create the base instruction, then add the operands. 2132 const MCInstrDesc &II = TII.get(Opc); 2133 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2134 MachineInstrBuilder MIB = 2135 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg); 2136 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2137 2138 return true; 2139 } 2140 2141 bool AArch64FastISel::selectStore(const Instruction *I) { 2142 MVT VT; 2143 const Value *Op0 = I->getOperand(0); 2144 // Verify we have a legal type before going any further. Currently, we handle 2145 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2146 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2147 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2148 return false; 2149 2150 const Value *PtrV = I->getOperand(1); 2151 if (TLI.supportSwiftError()) { 2152 // Swifterror values can come from either a function parameter with 2153 // swifterror attribute or an alloca with swifterror attribute. 2154 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2155 if (Arg->hasSwiftErrorAttr()) 2156 return false; 2157 } 2158 2159 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2160 if (Alloca->isSwiftError()) 2161 return false; 2162 } 2163 } 2164 2165 // Get the value to be stored into a register. Use the zero register directly 2166 // when possible to avoid an unnecessary copy and a wasted register. 2167 Register SrcReg; 2168 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2169 if (CI->isZero()) 2170 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2171 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2172 if (CF->isZero() && !CF->isNegative()) { 2173 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2174 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2175 } 2176 } 2177 2178 if (!SrcReg) 2179 SrcReg = getRegForValue(Op0); 2180 2181 if (!SrcReg) 2182 return false; 2183 2184 auto *SI = cast<StoreInst>(I); 2185 2186 // Try to emit a STLR for seq_cst/release. 2187 if (SI->isAtomic()) { 2188 AtomicOrdering Ord = SI->getOrdering(); 2189 // The non-atomic instructions are sufficient for relaxed stores. 2190 if (isReleaseOrStronger(Ord)) { 2191 // The STLR addressing mode only supports a base reg; pass that directly. 2192 Register AddrReg = getRegForValue(PtrV); 2193 if (!AddrReg) 2194 return false; 2195 return emitStoreRelease(VT, SrcReg, AddrReg, 2196 createMachineMemOperandFor(I)); 2197 } 2198 } 2199 2200 // See if we can handle this address. 2201 Address Addr; 2202 if (!computeAddress(PtrV, Addr, Op0->getType())) 2203 return false; 2204 2205 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2206 return false; 2207 return true; 2208 } 2209 2210 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2211 switch (Pred) { 2212 case CmpInst::FCMP_ONE: 2213 case CmpInst::FCMP_UEQ: 2214 default: 2215 // AL is our "false" for now. The other two need more compares. 2216 return AArch64CC::AL; 2217 case CmpInst::ICMP_EQ: 2218 case CmpInst::FCMP_OEQ: 2219 return AArch64CC::EQ; 2220 case CmpInst::ICMP_SGT: 2221 case CmpInst::FCMP_OGT: 2222 return AArch64CC::GT; 2223 case CmpInst::ICMP_SGE: 2224 case CmpInst::FCMP_OGE: 2225 return AArch64CC::GE; 2226 case CmpInst::ICMP_UGT: 2227 case CmpInst::FCMP_UGT: 2228 return AArch64CC::HI; 2229 case CmpInst::FCMP_OLT: 2230 return AArch64CC::MI; 2231 case CmpInst::ICMP_ULE: 2232 case CmpInst::FCMP_OLE: 2233 return AArch64CC::LS; 2234 case CmpInst::FCMP_ORD: 2235 return AArch64CC::VC; 2236 case CmpInst::FCMP_UNO: 2237 return AArch64CC::VS; 2238 case CmpInst::FCMP_UGE: 2239 return AArch64CC::PL; 2240 case CmpInst::ICMP_SLT: 2241 case CmpInst::FCMP_ULT: 2242 return AArch64CC::LT; 2243 case CmpInst::ICMP_SLE: 2244 case CmpInst::FCMP_ULE: 2245 return AArch64CC::LE; 2246 case CmpInst::FCMP_UNE: 2247 case CmpInst::ICMP_NE: 2248 return AArch64CC::NE; 2249 case CmpInst::ICMP_UGE: 2250 return AArch64CC::HS; 2251 case CmpInst::ICMP_ULT: 2252 return AArch64CC::LO; 2253 } 2254 } 2255 2256 /// Try to emit a combined compare-and-branch instruction. 2257 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2258 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2259 // will not be produced, as they are conditional branch instructions that do 2260 // not set flags. 2261 if (FuncInfo.MF->getFunction().hasFnAttribute( 2262 Attribute::SpeculativeLoadHardening)) 2263 return false; 2264 2265 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2266 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2267 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2268 2269 const Value *LHS = CI->getOperand(0); 2270 const Value *RHS = CI->getOperand(1); 2271 2272 MVT VT; 2273 if (!isTypeSupported(LHS->getType(), VT)) 2274 return false; 2275 2276 unsigned BW = VT.getSizeInBits(); 2277 if (BW > 64) 2278 return false; 2279 2280 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0)); 2281 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1)); 2282 2283 // Try to take advantage of fallthrough opportunities. 2284 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2285 std::swap(TBB, FBB); 2286 Predicate = CmpInst::getInversePredicate(Predicate); 2287 } 2288 2289 int TestBit = -1; 2290 bool IsCmpNE; 2291 switch (Predicate) { 2292 default: 2293 return false; 2294 case CmpInst::ICMP_EQ: 2295 case CmpInst::ICMP_NE: 2296 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2297 std::swap(LHS, RHS); 2298 2299 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2300 return false; 2301 2302 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2303 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2304 const Value *AndLHS = AI->getOperand(0); 2305 const Value *AndRHS = AI->getOperand(1); 2306 2307 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2308 if (C->getValue().isPowerOf2()) 2309 std::swap(AndLHS, AndRHS); 2310 2311 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2312 if (C->getValue().isPowerOf2()) { 2313 TestBit = C->getValue().logBase2(); 2314 LHS = AndLHS; 2315 } 2316 } 2317 2318 if (VT == MVT::i1) 2319 TestBit = 0; 2320 2321 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2322 break; 2323 case CmpInst::ICMP_SLT: 2324 case CmpInst::ICMP_SGE: 2325 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2326 return false; 2327 2328 TestBit = BW - 1; 2329 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2330 break; 2331 case CmpInst::ICMP_SGT: 2332 case CmpInst::ICMP_SLE: 2333 if (!isa<ConstantInt>(RHS)) 2334 return false; 2335 2336 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2337 return false; 2338 2339 TestBit = BW - 1; 2340 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2341 break; 2342 } // end switch 2343 2344 static const unsigned OpcTable[2][2][2] = { 2345 { {AArch64::CBZW, AArch64::CBZX }, 2346 {AArch64::CBNZW, AArch64::CBNZX} }, 2347 { {AArch64::TBZW, AArch64::TBZX }, 2348 {AArch64::TBNZW, AArch64::TBNZX} } 2349 }; 2350 2351 bool IsBitTest = TestBit != -1; 2352 bool Is64Bit = BW == 64; 2353 if (TestBit < 32 && TestBit >= 0) 2354 Is64Bit = false; 2355 2356 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2357 const MCInstrDesc &II = TII.get(Opc); 2358 2359 Register SrcReg = getRegForValue(LHS); 2360 if (!SrcReg) 2361 return false; 2362 2363 if (BW == 64 && !Is64Bit) 2364 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); 2365 2366 if ((BW < 32) && !IsBitTest) 2367 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2368 2369 // Emit the combined compare and branch instruction. 2370 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2371 MachineInstrBuilder MIB = 2372 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 2373 .addReg(SrcReg); 2374 if (IsBitTest) 2375 MIB.addImm(TestBit); 2376 MIB.addMBB(TBB); 2377 2378 finishCondBranch(BI->getParent(), TBB, FBB); 2379 return true; 2380 } 2381 2382 bool AArch64FastISel::selectBranch(const Instruction *I) { 2383 const BranchInst *BI = cast<BranchInst>(I); 2384 if (BI->isUnconditional()) { 2385 MachineBasicBlock *MSucc = FuncInfo.getMBB(BI->getSuccessor(0)); 2386 fastEmitBranch(MSucc, BI->getDebugLoc()); 2387 return true; 2388 } 2389 2390 MachineBasicBlock *TBB = FuncInfo.getMBB(BI->getSuccessor(0)); 2391 MachineBasicBlock *FBB = FuncInfo.getMBB(BI->getSuccessor(1)); 2392 2393 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2394 if (CI->hasOneUse() && isValueAvailable(CI)) { 2395 // Try to optimize or fold the cmp. 2396 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2397 switch (Predicate) { 2398 default: 2399 break; 2400 case CmpInst::FCMP_FALSE: 2401 fastEmitBranch(FBB, MIMD.getDL()); 2402 return true; 2403 case CmpInst::FCMP_TRUE: 2404 fastEmitBranch(TBB, MIMD.getDL()); 2405 return true; 2406 } 2407 2408 // Try to emit a combined compare-and-branch first. 2409 if (emitCompareAndBranch(BI)) 2410 return true; 2411 2412 // Try to take advantage of fallthrough opportunities. 2413 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2414 std::swap(TBB, FBB); 2415 Predicate = CmpInst::getInversePredicate(Predicate); 2416 } 2417 2418 // Emit the cmp. 2419 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2420 return false; 2421 2422 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2423 // instruction. 2424 AArch64CC::CondCode CC = getCompareCC(Predicate); 2425 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2426 switch (Predicate) { 2427 default: 2428 break; 2429 case CmpInst::FCMP_UEQ: 2430 ExtraCC = AArch64CC::EQ; 2431 CC = AArch64CC::VS; 2432 break; 2433 case CmpInst::FCMP_ONE: 2434 ExtraCC = AArch64CC::MI; 2435 CC = AArch64CC::GT; 2436 break; 2437 } 2438 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2439 2440 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2441 if (ExtraCC != AArch64CC::AL) { 2442 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2443 .addImm(ExtraCC) 2444 .addMBB(TBB); 2445 } 2446 2447 // Emit the branch. 2448 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2449 .addImm(CC) 2450 .addMBB(TBB); 2451 2452 finishCondBranch(BI->getParent(), TBB, FBB); 2453 return true; 2454 } 2455 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2456 uint64_t Imm = CI->getZExtValue(); 2457 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2458 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B)) 2459 .addMBB(Target); 2460 2461 // Obtain the branch probability and add the target to the successor list. 2462 if (FuncInfo.BPI) { 2463 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2464 BI->getParent(), Target->getBasicBlock()); 2465 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2466 } else 2467 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2468 return true; 2469 } else { 2470 AArch64CC::CondCode CC = AArch64CC::NE; 2471 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2472 // Fake request the condition, otherwise the intrinsic might be completely 2473 // optimized away. 2474 Register CondReg = getRegForValue(BI->getCondition()); 2475 if (!CondReg) 2476 return false; 2477 2478 // Emit the branch. 2479 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2480 .addImm(CC) 2481 .addMBB(TBB); 2482 2483 finishCondBranch(BI->getParent(), TBB, FBB); 2484 return true; 2485 } 2486 } 2487 2488 Register CondReg = getRegForValue(BI->getCondition()); 2489 if (!CondReg) 2490 return false; 2491 2492 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2493 unsigned Opcode = AArch64::TBNZW; 2494 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2495 std::swap(TBB, FBB); 2496 Opcode = AArch64::TBZW; 2497 } 2498 2499 const MCInstrDesc &II = TII.get(Opcode); 2500 Register ConstrainedCondReg 2501 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2502 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2503 .addReg(ConstrainedCondReg) 2504 .addImm(0) 2505 .addMBB(TBB); 2506 2507 finishCondBranch(BI->getParent(), TBB, FBB); 2508 return true; 2509 } 2510 2511 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2512 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2513 Register AddrReg = getRegForValue(BI->getOperand(0)); 2514 if (!AddrReg) 2515 return false; 2516 2517 // Authenticated indirectbr is not implemented yet. 2518 if (FuncInfo.MF->getFunction().hasFnAttribute("ptrauth-indirect-gotos")) 2519 return false; 2520 2521 // Emit the indirect branch. 2522 const MCInstrDesc &II = TII.get(AArch64::BR); 2523 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg); 2525 2526 // Make sure the CFG is up-to-date. 2527 for (const auto *Succ : BI->successors()) 2528 FuncInfo.MBB->addSuccessor(FuncInfo.getMBB(Succ)); 2529 2530 return true; 2531 } 2532 2533 bool AArch64FastISel::selectCmp(const Instruction *I) { 2534 const CmpInst *CI = cast<CmpInst>(I); 2535 2536 // Vectors of i1 are weird: bail out. 2537 if (CI->getType()->isVectorTy()) 2538 return false; 2539 2540 // Try to optimize or fold the cmp. 2541 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2542 Register ResultReg; 2543 switch (Predicate) { 2544 default: 2545 break; 2546 case CmpInst::FCMP_FALSE: 2547 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2548 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 2549 TII.get(TargetOpcode::COPY), ResultReg) 2550 .addReg(AArch64::WZR, getKillRegState(true)); 2551 break; 2552 case CmpInst::FCMP_TRUE: 2553 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2554 break; 2555 } 2556 2557 if (ResultReg) { 2558 updateValueMap(I, ResultReg); 2559 return true; 2560 } 2561 2562 // Emit the cmp. 2563 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2564 return false; 2565 2566 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2567 2568 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2569 // condition codes are inverted, because they are used by CSINC. 2570 static unsigned CondCodeTable[2][2] = { 2571 { AArch64CC::NE, AArch64CC::VC }, 2572 { AArch64CC::PL, AArch64CC::LE } 2573 }; 2574 unsigned *CondCodes = nullptr; 2575 switch (Predicate) { 2576 default: 2577 break; 2578 case CmpInst::FCMP_UEQ: 2579 CondCodes = &CondCodeTable[0][0]; 2580 break; 2581 case CmpInst::FCMP_ONE: 2582 CondCodes = &CondCodeTable[1][0]; 2583 break; 2584 } 2585 2586 if (CondCodes) { 2587 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2588 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2589 TmpReg1) 2590 .addReg(AArch64::WZR, getKillRegState(true)) 2591 .addReg(AArch64::WZR, getKillRegState(true)) 2592 .addImm(CondCodes[0]); 2593 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2594 ResultReg) 2595 .addReg(TmpReg1, getKillRegState(true)) 2596 .addReg(AArch64::WZR, getKillRegState(true)) 2597 .addImm(CondCodes[1]); 2598 2599 updateValueMap(I, ResultReg); 2600 return true; 2601 } 2602 2603 // Now set a register based on the comparison. 2604 AArch64CC::CondCode CC = getCompareCC(Predicate); 2605 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2606 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2607 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2608 ResultReg) 2609 .addReg(AArch64::WZR, getKillRegState(true)) 2610 .addReg(AArch64::WZR, getKillRegState(true)) 2611 .addImm(invertedCC); 2612 2613 updateValueMap(I, ResultReg); 2614 return true; 2615 } 2616 2617 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2618 /// value. 2619 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2620 if (!SI->getType()->isIntegerTy(1)) 2621 return false; 2622 2623 const Value *Src1Val, *Src2Val; 2624 unsigned Opc = 0; 2625 bool NeedExtraOp = false; 2626 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2627 if (CI->isOne()) { 2628 Src1Val = SI->getCondition(); 2629 Src2Val = SI->getFalseValue(); 2630 Opc = AArch64::ORRWrr; 2631 } else { 2632 assert(CI->isZero()); 2633 Src1Val = SI->getFalseValue(); 2634 Src2Val = SI->getCondition(); 2635 Opc = AArch64::BICWrr; 2636 } 2637 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2638 if (CI->isOne()) { 2639 Src1Val = SI->getCondition(); 2640 Src2Val = SI->getTrueValue(); 2641 Opc = AArch64::ORRWrr; 2642 NeedExtraOp = true; 2643 } else { 2644 assert(CI->isZero()); 2645 Src1Val = SI->getCondition(); 2646 Src2Val = SI->getTrueValue(); 2647 Opc = AArch64::ANDWrr; 2648 } 2649 } 2650 2651 if (!Opc) 2652 return false; 2653 2654 Register Src1Reg = getRegForValue(Src1Val); 2655 if (!Src1Reg) 2656 return false; 2657 2658 Register Src2Reg = getRegForValue(Src2Val); 2659 if (!Src2Reg) 2660 return false; 2661 2662 if (NeedExtraOp) 2663 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1); 2664 2665 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2666 Src2Reg); 2667 updateValueMap(SI, ResultReg); 2668 return true; 2669 } 2670 2671 bool AArch64FastISel::selectSelect(const Instruction *I) { 2672 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2673 MVT VT; 2674 if (!isTypeSupported(I->getType(), VT)) 2675 return false; 2676 2677 unsigned Opc; 2678 const TargetRegisterClass *RC; 2679 switch (VT.SimpleTy) { 2680 default: 2681 return false; 2682 case MVT::i1: 2683 case MVT::i8: 2684 case MVT::i16: 2685 case MVT::i32: 2686 Opc = AArch64::CSELWr; 2687 RC = &AArch64::GPR32RegClass; 2688 break; 2689 case MVT::i64: 2690 Opc = AArch64::CSELXr; 2691 RC = &AArch64::GPR64RegClass; 2692 break; 2693 case MVT::f32: 2694 Opc = AArch64::FCSELSrrr; 2695 RC = &AArch64::FPR32RegClass; 2696 break; 2697 case MVT::f64: 2698 Opc = AArch64::FCSELDrrr; 2699 RC = &AArch64::FPR64RegClass; 2700 break; 2701 } 2702 2703 const SelectInst *SI = cast<SelectInst>(I); 2704 const Value *Cond = SI->getCondition(); 2705 AArch64CC::CondCode CC = AArch64CC::NE; 2706 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2707 2708 if (optimizeSelect(SI)) 2709 return true; 2710 2711 // Try to pickup the flags, so we don't have to emit another compare. 2712 if (foldXALUIntrinsic(CC, I, Cond)) { 2713 // Fake request the condition to force emission of the XALU intrinsic. 2714 Register CondReg = getRegForValue(Cond); 2715 if (!CondReg) 2716 return false; 2717 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2718 isValueAvailable(Cond)) { 2719 const auto *Cmp = cast<CmpInst>(Cond); 2720 // Try to optimize or fold the cmp. 2721 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2722 const Value *FoldSelect = nullptr; 2723 switch (Predicate) { 2724 default: 2725 break; 2726 case CmpInst::FCMP_FALSE: 2727 FoldSelect = SI->getFalseValue(); 2728 break; 2729 case CmpInst::FCMP_TRUE: 2730 FoldSelect = SI->getTrueValue(); 2731 break; 2732 } 2733 2734 if (FoldSelect) { 2735 Register SrcReg = getRegForValue(FoldSelect); 2736 if (!SrcReg) 2737 return false; 2738 2739 updateValueMap(I, SrcReg); 2740 return true; 2741 } 2742 2743 // Emit the cmp. 2744 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2745 return false; 2746 2747 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2748 CC = getCompareCC(Predicate); 2749 switch (Predicate) { 2750 default: 2751 break; 2752 case CmpInst::FCMP_UEQ: 2753 ExtraCC = AArch64CC::EQ; 2754 CC = AArch64CC::VS; 2755 break; 2756 case CmpInst::FCMP_ONE: 2757 ExtraCC = AArch64CC::MI; 2758 CC = AArch64CC::GT; 2759 break; 2760 } 2761 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2762 } else { 2763 Register CondReg = getRegForValue(Cond); 2764 if (!CondReg) 2765 return false; 2766 2767 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2768 CondReg = constrainOperandRegClass(II, CondReg, 1); 2769 2770 // Emit a TST instruction (ANDS wzr, reg, #imm). 2771 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, 2772 AArch64::WZR) 2773 .addReg(CondReg) 2774 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2775 } 2776 2777 Register Src1Reg = getRegForValue(SI->getTrueValue()); 2778 Register Src2Reg = getRegForValue(SI->getFalseValue()); 2779 2780 if (!Src1Reg || !Src2Reg) 2781 return false; 2782 2783 if (ExtraCC != AArch64CC::AL) 2784 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC); 2785 2786 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC); 2787 updateValueMap(I, ResultReg); 2788 return true; 2789 } 2790 2791 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2792 Value *V = I->getOperand(0); 2793 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2794 return false; 2795 2796 Register Op = getRegForValue(V); 2797 if (Op == 0) 2798 return false; 2799 2800 Register ResultReg = createResultReg(&AArch64::FPR64RegClass); 2801 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr), 2802 ResultReg).addReg(Op); 2803 updateValueMap(I, ResultReg); 2804 return true; 2805 } 2806 2807 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2808 Value *V = I->getOperand(0); 2809 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2810 return false; 2811 2812 Register Op = getRegForValue(V); 2813 if (Op == 0) 2814 return false; 2815 2816 Register ResultReg = createResultReg(&AArch64::FPR32RegClass); 2817 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr), 2818 ResultReg).addReg(Op); 2819 updateValueMap(I, ResultReg); 2820 return true; 2821 } 2822 2823 // FPToUI and FPToSI 2824 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2825 MVT DestVT; 2826 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2827 return false; 2828 2829 Register SrcReg = getRegForValue(I->getOperand(0)); 2830 if (!SrcReg) 2831 return false; 2832 2833 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2834 if (SrcVT == MVT::f128 || SrcVT == MVT::f16 || SrcVT == MVT::bf16) 2835 return false; 2836 2837 unsigned Opc; 2838 if (SrcVT == MVT::f64) { 2839 if (Signed) 2840 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2841 else 2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2843 } else { 2844 if (Signed) 2845 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2846 else 2847 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2848 } 2849 Register ResultReg = createResultReg( 2850 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2851 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 2852 .addReg(SrcReg); 2853 updateValueMap(I, ResultReg); 2854 return true; 2855 } 2856 2857 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2858 MVT DestVT; 2859 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2860 return false; 2861 // Let regular ISEL handle FP16 2862 if (DestVT == MVT::f16 || DestVT == MVT::bf16) 2863 return false; 2864 2865 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2866 "Unexpected value type."); 2867 2868 Register SrcReg = getRegForValue(I->getOperand(0)); 2869 if (!SrcReg) 2870 return false; 2871 2872 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2873 2874 // Handle sign-extension. 2875 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2876 SrcReg = 2877 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2878 if (!SrcReg) 2879 return false; 2880 } 2881 2882 unsigned Opc; 2883 if (SrcVT == MVT::i64) { 2884 if (Signed) 2885 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2886 else 2887 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2888 } else { 2889 if (Signed) 2890 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2891 else 2892 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2893 } 2894 2895 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg); 2896 updateValueMap(I, ResultReg); 2897 return true; 2898 } 2899 2900 bool AArch64FastISel::fastLowerArguments() { 2901 if (!FuncInfo.CanLowerReturn) 2902 return false; 2903 2904 const Function *F = FuncInfo.Fn; 2905 if (F->isVarArg()) 2906 return false; 2907 2908 CallingConv::ID CC = F->getCallingConv(); 2909 if (CC != CallingConv::C && CC != CallingConv::Swift) 2910 return false; 2911 2912 if (Subtarget->hasCustomCallingConv()) 2913 return false; 2914 2915 // Only handle simple cases of up to 8 GPR and FPR each. 2916 unsigned GPRCnt = 0; 2917 unsigned FPRCnt = 0; 2918 for (auto const &Arg : F->args()) { 2919 if (Arg.hasAttribute(Attribute::ByVal) || 2920 Arg.hasAttribute(Attribute::InReg) || 2921 Arg.hasAttribute(Attribute::StructRet) || 2922 Arg.hasAttribute(Attribute::SwiftSelf) || 2923 Arg.hasAttribute(Attribute::SwiftAsync) || 2924 Arg.hasAttribute(Attribute::SwiftError) || 2925 Arg.hasAttribute(Attribute::Nest)) 2926 return false; 2927 2928 Type *ArgTy = Arg.getType(); 2929 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2930 return false; 2931 2932 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2933 if (!ArgVT.isSimple()) 2934 return false; 2935 2936 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2937 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2938 return false; 2939 2940 if (VT.isVector() && 2941 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2942 return false; 2943 2944 if (VT >= MVT::i1 && VT <= MVT::i64) 2945 ++GPRCnt; 2946 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2947 VT.is128BitVector()) 2948 ++FPRCnt; 2949 else 2950 return false; 2951 2952 if (GPRCnt > 8 || FPRCnt > 8) 2953 return false; 2954 } 2955 2956 static const MCPhysReg Registers[6][8] = { 2957 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2958 AArch64::W5, AArch64::W6, AArch64::W7 }, 2959 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2960 AArch64::X5, AArch64::X6, AArch64::X7 }, 2961 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2962 AArch64::H5, AArch64::H6, AArch64::H7 }, 2963 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2964 AArch64::S5, AArch64::S6, AArch64::S7 }, 2965 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2966 AArch64::D5, AArch64::D6, AArch64::D7 }, 2967 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2968 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2969 }; 2970 2971 unsigned GPRIdx = 0; 2972 unsigned FPRIdx = 0; 2973 for (auto const &Arg : F->args()) { 2974 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2975 unsigned SrcReg; 2976 const TargetRegisterClass *RC; 2977 if (VT >= MVT::i1 && VT <= MVT::i32) { 2978 SrcReg = Registers[0][GPRIdx++]; 2979 RC = &AArch64::GPR32RegClass; 2980 VT = MVT::i32; 2981 } else if (VT == MVT::i64) { 2982 SrcReg = Registers[1][GPRIdx++]; 2983 RC = &AArch64::GPR64RegClass; 2984 } else if (VT == MVT::f16 || VT == MVT::bf16) { 2985 SrcReg = Registers[2][FPRIdx++]; 2986 RC = &AArch64::FPR16RegClass; 2987 } else if (VT == MVT::f32) { 2988 SrcReg = Registers[3][FPRIdx++]; 2989 RC = &AArch64::FPR32RegClass; 2990 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2991 SrcReg = Registers[4][FPRIdx++]; 2992 RC = &AArch64::FPR64RegClass; 2993 } else if (VT.is128BitVector()) { 2994 SrcReg = Registers[5][FPRIdx++]; 2995 RC = &AArch64::FPR128RegClass; 2996 } else 2997 llvm_unreachable("Unexpected value type."); 2998 2999 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 3000 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 3001 // Without this, EmitLiveInCopies may eliminate the livein if its only 3002 // use is a bitcast (which isn't turned into an instruction). 3003 Register ResultReg = createResultReg(RC); 3004 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3005 TII.get(TargetOpcode::COPY), ResultReg) 3006 .addReg(DstReg, getKillRegState(true)); 3007 updateValueMap(&Arg, ResultReg); 3008 } 3009 return true; 3010 } 3011 3012 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3013 SmallVectorImpl<MVT> &OutVTs, 3014 unsigned &NumBytes) { 3015 CallingConv::ID CC = CLI.CallConv; 3016 SmallVector<CCValAssign, 16> ArgLocs; 3017 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3018 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3019 3020 // Get a count of how many bytes are to be pushed on the stack. 3021 NumBytes = CCInfo.getStackSize(); 3022 3023 // Issue CALLSEQ_START 3024 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3025 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown)) 3026 .addImm(NumBytes).addImm(0); 3027 3028 // Process the args. 3029 for (CCValAssign &VA : ArgLocs) { 3030 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3031 MVT ArgVT = OutVTs[VA.getValNo()]; 3032 3033 Register ArgReg = getRegForValue(ArgVal); 3034 if (!ArgReg) 3035 return false; 3036 3037 // Handle arg promotion: SExt, ZExt, AExt. 3038 switch (VA.getLocInfo()) { 3039 case CCValAssign::Full: 3040 break; 3041 case CCValAssign::SExt: { 3042 MVT DestVT = VA.getLocVT(); 3043 MVT SrcVT = ArgVT; 3044 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3045 if (!ArgReg) 3046 return false; 3047 break; 3048 } 3049 case CCValAssign::AExt: 3050 // Intentional fall-through. 3051 case CCValAssign::ZExt: { 3052 MVT DestVT = VA.getLocVT(); 3053 MVT SrcVT = ArgVT; 3054 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3055 if (!ArgReg) 3056 return false; 3057 break; 3058 } 3059 default: 3060 llvm_unreachable("Unknown arg promotion!"); 3061 } 3062 3063 // Now copy/store arg to correct locations. 3064 if (VA.isRegLoc() && !VA.needsCustom()) { 3065 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3066 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3067 CLI.OutRegs.push_back(VA.getLocReg()); 3068 } else if (VA.needsCustom()) { 3069 // FIXME: Handle custom args. 3070 return false; 3071 } else { 3072 assert(VA.isMemLoc() && "Assuming store on stack."); 3073 3074 // Don't emit stores for undef values. 3075 if (isa<UndefValue>(ArgVal)) 3076 continue; 3077 3078 // Need to store on the stack. 3079 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3080 3081 unsigned BEAlign = 0; 3082 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3083 BEAlign = 8 - ArgSize; 3084 3085 Address Addr; 3086 Addr.setKind(Address::RegBase); 3087 Addr.setReg(AArch64::SP); 3088 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3089 3090 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3091 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3092 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3093 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3094 3095 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3096 return false; 3097 } 3098 } 3099 return true; 3100 } 3101 3102 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) { 3103 CallingConv::ID CC = CLI.CallConv; 3104 3105 // Issue CALLSEQ_END 3106 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3107 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp)) 3108 .addImm(NumBytes).addImm(0); 3109 3110 // Now the return values. 3111 SmallVector<CCValAssign, 16> RVLocs; 3112 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3113 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC)); 3114 3115 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy); 3116 for (unsigned i = 0; i != RVLocs.size(); ++i) { 3117 CCValAssign &VA = RVLocs[i]; 3118 MVT CopyVT = VA.getValVT(); 3119 Register CopyReg = ResultReg + i; 3120 3121 // TODO: Handle big-endian results 3122 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3123 return false; 3124 3125 // Copy result out of their specified physreg. 3126 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 3127 CopyReg) 3128 .addReg(VA.getLocReg()); 3129 CLI.InRegs.push_back(VA.getLocReg()); 3130 } 3131 3132 CLI.ResultReg = ResultReg; 3133 CLI.NumResultRegs = RVLocs.size(); 3134 3135 return true; 3136 } 3137 3138 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3139 CallingConv::ID CC = CLI.CallConv; 3140 bool IsTailCall = CLI.IsTailCall; 3141 bool IsVarArg = CLI.IsVarArg; 3142 const Value *Callee = CLI.Callee; 3143 MCSymbol *Symbol = CLI.Symbol; 3144 3145 if (!Callee && !Symbol) 3146 return false; 3147 3148 // Allow SelectionDAG isel to handle calls to functions like setjmp that need 3149 // a bti instruction following the call. 3150 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && 3151 !Subtarget->noBTIAtReturnTwice() && 3152 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 3153 return false; 3154 3155 // Allow SelectionDAG isel to handle indirect calls with KCFI checks. 3156 if (CLI.CB && CLI.CB->isIndirectCall() && 3157 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi)) 3158 return false; 3159 3160 // Allow SelectionDAG isel to handle tail calls. 3161 if (IsTailCall) 3162 return false; 3163 3164 // FIXME: we could and should support this, but for now correctness at -O0 is 3165 // more important. 3166 if (Subtarget->isTargetILP32()) 3167 return false; 3168 3169 CodeModel::Model CM = TM.getCodeModel(); 3170 // Only support the small-addressing and large code models. 3171 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3172 return false; 3173 3174 // FIXME: Add large code model support for ELF. 3175 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3176 return false; 3177 3178 // ELF -fno-plt compiled intrinsic calls do not have the nonlazybind 3179 // attribute. Check "RtLibUseGOT" instead. 3180 if (MF->getFunction().getParent()->getRtLibUseGOT()) 3181 return false; 3182 3183 // Let SDISel handle vararg functions. 3184 if (IsVarArg) 3185 return false; 3186 3187 if (Subtarget->isWindowsArm64EC()) 3188 return false; 3189 3190 for (auto Flag : CLI.OutFlags) 3191 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3192 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) 3193 return false; 3194 3195 // Set up the argument vectors. 3196 SmallVector<MVT, 16> OutVTs; 3197 OutVTs.reserve(CLI.OutVals.size()); 3198 3199 for (auto *Val : CLI.OutVals) { 3200 MVT VT; 3201 if (!isTypeLegal(Val->getType(), VT) && 3202 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3203 return false; 3204 3205 // We don't handle vector parameters yet. 3206 if (VT.isVector() || VT.getSizeInBits() > 64) 3207 return false; 3208 3209 OutVTs.push_back(VT); 3210 } 3211 3212 Address Addr; 3213 if (Callee && !computeCallAddress(Callee, Addr)) 3214 return false; 3215 3216 // The weak function target may be zero; in that case we must use indirect 3217 // addressing via a stub on windows as it may be out of range for a 3218 // PC-relative jump. 3219 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3220 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3221 return false; 3222 3223 // Handle the arguments now that we've gotten them. 3224 unsigned NumBytes; 3225 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3226 return false; 3227 3228 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3229 if (RegInfo->isAnyArgRegReserved(*MF)) 3230 RegInfo->emitReservedArgRegCallError(*MF); 3231 3232 // Issue the call. 3233 MachineInstrBuilder MIB; 3234 if (Subtarget->useSmallAddressing()) { 3235 const MCInstrDesc &II = 3236 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3237 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II); 3238 if (Symbol) 3239 MIB.addSym(Symbol, 0); 3240 else if (Addr.getGlobalValue()) 3241 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3242 else if (Addr.getReg()) { 3243 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3244 MIB.addReg(Reg); 3245 } else 3246 return false; 3247 } else { 3248 Register CallReg; 3249 if (Symbol) { 3250 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3251 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 3252 ADRPReg) 3253 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3254 3255 CallReg = createResultReg(&AArch64::GPR64RegClass); 3256 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3257 TII.get(AArch64::LDRXui), CallReg) 3258 .addReg(ADRPReg) 3259 .addSym(Symbol, 3260 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3261 } else if (Addr.getGlobalValue()) 3262 CallReg = materializeGV(Addr.getGlobalValue()); 3263 else if (Addr.getReg()) 3264 CallReg = Addr.getReg(); 3265 3266 if (!CallReg) 3267 return false; 3268 3269 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3270 CallReg = constrainOperandRegClass(II, CallReg, 0); 3271 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg); 3272 } 3273 3274 // Add implicit physical register uses to the call. 3275 for (auto Reg : CLI.OutRegs) 3276 MIB.addReg(Reg, RegState::Implicit); 3277 3278 // Add a register mask with the call-preserved registers. 3279 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3280 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3281 3282 CLI.Call = MIB; 3283 3284 // Finish off the call including any return values. 3285 return finishCall(CLI, NumBytes); 3286 } 3287 3288 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) { 3289 if (Alignment) 3290 return Len / Alignment->value() <= 4; 3291 else 3292 return Len < 32; 3293 } 3294 3295 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3296 uint64_t Len, MaybeAlign Alignment) { 3297 // Make sure we don't bloat code by inlining very large memcpy's. 3298 if (!isMemCpySmall(Len, Alignment)) 3299 return false; 3300 3301 int64_t UnscaledOffset = 0; 3302 Address OrigDest = Dest; 3303 Address OrigSrc = Src; 3304 3305 while (Len) { 3306 MVT VT; 3307 if (!Alignment || *Alignment >= 8) { 3308 if (Len >= 8) 3309 VT = MVT::i64; 3310 else if (Len >= 4) 3311 VT = MVT::i32; 3312 else if (Len >= 2) 3313 VT = MVT::i16; 3314 else { 3315 VT = MVT::i8; 3316 } 3317 } else { 3318 assert(Alignment && "Alignment is set in this branch"); 3319 // Bound based on alignment. 3320 if (Len >= 4 && *Alignment == 4) 3321 VT = MVT::i32; 3322 else if (Len >= 2 && *Alignment == 2) 3323 VT = MVT::i16; 3324 else { 3325 VT = MVT::i8; 3326 } 3327 } 3328 3329 Register ResultReg = emitLoad(VT, VT, Src); 3330 if (!ResultReg) 3331 return false; 3332 3333 if (!emitStore(VT, ResultReg, Dest)) 3334 return false; 3335 3336 int64_t Size = VT.getSizeInBits() / 8; 3337 Len -= Size; 3338 UnscaledOffset += Size; 3339 3340 // We need to recompute the unscaled offset for each iteration. 3341 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3342 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3343 } 3344 3345 return true; 3346 } 3347 3348 /// Check if it is possible to fold the condition from the XALU intrinsic 3349 /// into the user. The condition code will only be updated on success. 3350 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3351 const Instruction *I, 3352 const Value *Cond) { 3353 if (!isa<ExtractValueInst>(Cond)) 3354 return false; 3355 3356 const auto *EV = cast<ExtractValueInst>(Cond); 3357 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3358 return false; 3359 3360 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3361 MVT RetVT; 3362 const Function *Callee = II->getCalledFunction(); 3363 Type *RetTy = 3364 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3365 if (!isTypeLegal(RetTy, RetVT)) 3366 return false; 3367 3368 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3369 return false; 3370 3371 const Value *LHS = II->getArgOperand(0); 3372 const Value *RHS = II->getArgOperand(1); 3373 3374 // Canonicalize immediate to the RHS. 3375 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3376 std::swap(LHS, RHS); 3377 3378 // Simplify multiplies. 3379 Intrinsic::ID IID = II->getIntrinsicID(); 3380 switch (IID) { 3381 default: 3382 break; 3383 case Intrinsic::smul_with_overflow: 3384 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3385 if (C->getValue() == 2) 3386 IID = Intrinsic::sadd_with_overflow; 3387 break; 3388 case Intrinsic::umul_with_overflow: 3389 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3390 if (C->getValue() == 2) 3391 IID = Intrinsic::uadd_with_overflow; 3392 break; 3393 } 3394 3395 AArch64CC::CondCode TmpCC; 3396 switch (IID) { 3397 default: 3398 return false; 3399 case Intrinsic::sadd_with_overflow: 3400 case Intrinsic::ssub_with_overflow: 3401 TmpCC = AArch64CC::VS; 3402 break; 3403 case Intrinsic::uadd_with_overflow: 3404 TmpCC = AArch64CC::HS; 3405 break; 3406 case Intrinsic::usub_with_overflow: 3407 TmpCC = AArch64CC::LO; 3408 break; 3409 case Intrinsic::smul_with_overflow: 3410 case Intrinsic::umul_with_overflow: 3411 TmpCC = AArch64CC::NE; 3412 break; 3413 } 3414 3415 // Check if both instructions are in the same basic block. 3416 if (!isValueAvailable(II)) 3417 return false; 3418 3419 // Make sure nothing is in the way 3420 BasicBlock::const_iterator Start(I); 3421 BasicBlock::const_iterator End(II); 3422 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3423 // We only expect extractvalue instructions between the intrinsic and the 3424 // instruction to be selected. 3425 if (!isa<ExtractValueInst>(Itr)) 3426 return false; 3427 3428 // Check that the extractvalue operand comes from the intrinsic. 3429 const auto *EVI = cast<ExtractValueInst>(Itr); 3430 if (EVI->getAggregateOperand() != II) 3431 return false; 3432 } 3433 3434 CC = TmpCC; 3435 return true; 3436 } 3437 3438 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3439 // FIXME: Handle more intrinsics. 3440 switch (II->getIntrinsicID()) { 3441 default: return false; 3442 case Intrinsic::frameaddress: { 3443 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3444 MFI.setFrameAddressIsTaken(true); 3445 3446 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3447 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3448 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3450 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3451 // Recursively load frame address 3452 // ldr x0, [fp] 3453 // ldr x0, [x0] 3454 // ldr x0, [x0] 3455 // ... 3456 Register DestReg; 3457 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3458 while (Depth--) { 3459 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3460 SrcReg, 0); 3461 assert(DestReg && "Unexpected LDR instruction emission failure."); 3462 SrcReg = DestReg; 3463 } 3464 3465 updateValueMap(II, SrcReg); 3466 return true; 3467 } 3468 case Intrinsic::sponentry: { 3469 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3470 3471 // SP = FP + Fixed Object + 16 3472 int FI = MFI.CreateFixedObject(4, 0, false); 3473 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3474 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3475 TII.get(AArch64::ADDXri), ResultReg) 3476 .addFrameIndex(FI) 3477 .addImm(0) 3478 .addImm(0); 3479 3480 updateValueMap(II, ResultReg); 3481 return true; 3482 } 3483 case Intrinsic::memcpy: 3484 case Intrinsic::memmove: { 3485 const auto *MTI = cast<MemTransferInst>(II); 3486 // Don't handle volatile. 3487 if (MTI->isVolatile()) 3488 return false; 3489 3490 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3491 // we would emit dead code because we don't currently handle memmoves. 3492 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3493 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3494 // Small memcpy's are common enough that we want to do them without a call 3495 // if possible. 3496 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3497 MaybeAlign Alignment; 3498 if (MTI->getDestAlign() || MTI->getSourceAlign()) 3499 Alignment = std::min(MTI->getDestAlign().valueOrOne(), 3500 MTI->getSourceAlign().valueOrOne()); 3501 if (isMemCpySmall(Len, Alignment)) { 3502 Address Dest, Src; 3503 if (!computeAddress(MTI->getRawDest(), Dest) || 3504 !computeAddress(MTI->getRawSource(), Src)) 3505 return false; 3506 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3507 return true; 3508 } 3509 } 3510 3511 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3512 return false; 3513 3514 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3515 // Fast instruction selection doesn't support the special 3516 // address spaces. 3517 return false; 3518 3519 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3520 return lowerCallTo(II, IntrMemName, II->arg_size() - 1); 3521 } 3522 case Intrinsic::memset: { 3523 const MemSetInst *MSI = cast<MemSetInst>(II); 3524 // Don't handle volatile. 3525 if (MSI->isVolatile()) 3526 return false; 3527 3528 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3529 return false; 3530 3531 if (MSI->getDestAddressSpace() > 255) 3532 // Fast instruction selection doesn't support the special 3533 // address spaces. 3534 return false; 3535 3536 return lowerCallTo(II, "memset", II->arg_size() - 1); 3537 } 3538 case Intrinsic::sin: 3539 case Intrinsic::cos: 3540 case Intrinsic::tan: 3541 case Intrinsic::pow: { 3542 MVT RetVT; 3543 if (!isTypeLegal(II->getType(), RetVT)) 3544 return false; 3545 3546 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3547 return false; 3548 3549 static const RTLIB::Libcall LibCallTable[4][2] = { 3550 {RTLIB::SIN_F32, RTLIB::SIN_F64}, 3551 {RTLIB::COS_F32, RTLIB::COS_F64}, 3552 {RTLIB::TAN_F32, RTLIB::TAN_F64}, 3553 {RTLIB::POW_F32, RTLIB::POW_F64}}; 3554 RTLIB::Libcall LC; 3555 bool Is64Bit = RetVT == MVT::f64; 3556 switch (II->getIntrinsicID()) { 3557 default: 3558 llvm_unreachable("Unexpected intrinsic."); 3559 case Intrinsic::sin: 3560 LC = LibCallTable[0][Is64Bit]; 3561 break; 3562 case Intrinsic::cos: 3563 LC = LibCallTable[1][Is64Bit]; 3564 break; 3565 case Intrinsic::tan: 3566 LC = LibCallTable[2][Is64Bit]; 3567 break; 3568 case Intrinsic::pow: 3569 LC = LibCallTable[3][Is64Bit]; 3570 break; 3571 } 3572 3573 ArgListTy Args; 3574 Args.reserve(II->arg_size()); 3575 3576 // Populate the argument list. 3577 for (auto &Arg : II->args()) { 3578 ArgListEntry Entry; 3579 Entry.Val = Arg; 3580 Entry.Ty = Arg->getType(); 3581 Args.push_back(Entry); 3582 } 3583 3584 CallLoweringInfo CLI; 3585 MCContext &Ctx = MF->getContext(); 3586 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3587 TLI.getLibcallName(LC), std::move(Args)); 3588 if (!lowerCallTo(CLI)) 3589 return false; 3590 updateValueMap(II, CLI.ResultReg); 3591 return true; 3592 } 3593 case Intrinsic::fabs: { 3594 MVT VT; 3595 if (!isTypeLegal(II->getType(), VT)) 3596 return false; 3597 3598 unsigned Opc; 3599 switch (VT.SimpleTy) { 3600 default: 3601 return false; 3602 case MVT::f32: 3603 Opc = AArch64::FABSSr; 3604 break; 3605 case MVT::f64: 3606 Opc = AArch64::FABSDr; 3607 break; 3608 } 3609 Register SrcReg = getRegForValue(II->getOperand(0)); 3610 if (!SrcReg) 3611 return false; 3612 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3613 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 3614 .addReg(SrcReg); 3615 updateValueMap(II, ResultReg); 3616 return true; 3617 } 3618 case Intrinsic::trap: 3619 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3620 .addImm(1); 3621 return true; 3622 case Intrinsic::debugtrap: 3623 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3624 .addImm(0xF000); 3625 return true; 3626 3627 case Intrinsic::sqrt: { 3628 Type *RetTy = II->getCalledFunction()->getReturnType(); 3629 3630 MVT VT; 3631 if (!isTypeLegal(RetTy, VT)) 3632 return false; 3633 3634 Register Op0Reg = getRegForValue(II->getOperand(0)); 3635 if (!Op0Reg) 3636 return false; 3637 3638 Register ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg); 3639 if (!ResultReg) 3640 return false; 3641 3642 updateValueMap(II, ResultReg); 3643 return true; 3644 } 3645 case Intrinsic::sadd_with_overflow: 3646 case Intrinsic::uadd_with_overflow: 3647 case Intrinsic::ssub_with_overflow: 3648 case Intrinsic::usub_with_overflow: 3649 case Intrinsic::smul_with_overflow: 3650 case Intrinsic::umul_with_overflow: { 3651 // This implements the basic lowering of the xalu with overflow intrinsics. 3652 const Function *Callee = II->getCalledFunction(); 3653 auto *Ty = cast<StructType>(Callee->getReturnType()); 3654 Type *RetTy = Ty->getTypeAtIndex(0U); 3655 3656 MVT VT; 3657 if (!isTypeLegal(RetTy, VT)) 3658 return false; 3659 3660 if (VT != MVT::i32 && VT != MVT::i64) 3661 return false; 3662 3663 const Value *LHS = II->getArgOperand(0); 3664 const Value *RHS = II->getArgOperand(1); 3665 // Canonicalize immediate to the RHS. 3666 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3667 std::swap(LHS, RHS); 3668 3669 // Simplify multiplies. 3670 Intrinsic::ID IID = II->getIntrinsicID(); 3671 switch (IID) { 3672 default: 3673 break; 3674 case Intrinsic::smul_with_overflow: 3675 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3676 if (C->getValue() == 2) { 3677 IID = Intrinsic::sadd_with_overflow; 3678 RHS = LHS; 3679 } 3680 break; 3681 case Intrinsic::umul_with_overflow: 3682 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3683 if (C->getValue() == 2) { 3684 IID = Intrinsic::uadd_with_overflow; 3685 RHS = LHS; 3686 } 3687 break; 3688 } 3689 3690 Register ResultReg1, ResultReg2, MulReg; 3691 AArch64CC::CondCode CC = AArch64CC::Invalid; 3692 switch (IID) { 3693 default: llvm_unreachable("Unexpected intrinsic!"); 3694 case Intrinsic::sadd_with_overflow: 3695 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3696 CC = AArch64CC::VS; 3697 break; 3698 case Intrinsic::uadd_with_overflow: 3699 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3700 CC = AArch64CC::HS; 3701 break; 3702 case Intrinsic::ssub_with_overflow: 3703 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3704 CC = AArch64CC::VS; 3705 break; 3706 case Intrinsic::usub_with_overflow: 3707 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3708 CC = AArch64CC::LO; 3709 break; 3710 case Intrinsic::smul_with_overflow: { 3711 CC = AArch64CC::NE; 3712 Register LHSReg = getRegForValue(LHS); 3713 if (!LHSReg) 3714 return false; 3715 3716 Register RHSReg = getRegForValue(RHS); 3717 if (!RHSReg) 3718 return false; 3719 3720 if (VT == MVT::i32) { 3721 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg); 3722 Register MulSubReg = 3723 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3724 // cmp xreg, wreg, sxtw 3725 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg, 3726 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, 3727 /*WantResult=*/false); 3728 MulReg = MulSubReg; 3729 } else { 3730 assert(VT == MVT::i64 && "Unexpected value type."); 3731 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3732 // reused in the next instruction. 3733 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3734 Register SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg); 3735 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63, 3736 /*WantResult=*/false); 3737 } 3738 break; 3739 } 3740 case Intrinsic::umul_with_overflow: { 3741 CC = AArch64CC::NE; 3742 Register LHSReg = getRegForValue(LHS); 3743 if (!LHSReg) 3744 return false; 3745 3746 Register RHSReg = getRegForValue(RHS); 3747 if (!RHSReg) 3748 return false; 3749 3750 if (VT == MVT::i32) { 3751 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg); 3752 // tst xreg, #0xffffffff00000000 3753 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3754 TII.get(AArch64::ANDSXri), AArch64::XZR) 3755 .addReg(MulReg) 3756 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64)); 3757 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3758 } else { 3759 assert(VT == MVT::i64 && "Unexpected value type."); 3760 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3761 // reused in the next instruction. 3762 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3763 Register UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg); 3764 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false); 3765 } 3766 break; 3767 } 3768 } 3769 3770 if (MulReg) { 3771 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3772 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3773 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3774 } 3775 3776 if (!ResultReg1) 3777 return false; 3778 3779 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3780 AArch64::WZR, AArch64::WZR, 3781 getInvertedCondCode(CC)); 3782 (void)ResultReg2; 3783 assert((ResultReg1 + 1) == ResultReg2 && 3784 "Nonconsecutive result registers."); 3785 updateValueMap(II, ResultReg1, 2); 3786 return true; 3787 } 3788 case Intrinsic::aarch64_crc32b: 3789 case Intrinsic::aarch64_crc32h: 3790 case Intrinsic::aarch64_crc32w: 3791 case Intrinsic::aarch64_crc32x: 3792 case Intrinsic::aarch64_crc32cb: 3793 case Intrinsic::aarch64_crc32ch: 3794 case Intrinsic::aarch64_crc32cw: 3795 case Intrinsic::aarch64_crc32cx: { 3796 if (!Subtarget->hasCRC()) 3797 return false; 3798 3799 unsigned Opc; 3800 switch (II->getIntrinsicID()) { 3801 default: 3802 llvm_unreachable("Unexpected intrinsic!"); 3803 case Intrinsic::aarch64_crc32b: 3804 Opc = AArch64::CRC32Brr; 3805 break; 3806 case Intrinsic::aarch64_crc32h: 3807 Opc = AArch64::CRC32Hrr; 3808 break; 3809 case Intrinsic::aarch64_crc32w: 3810 Opc = AArch64::CRC32Wrr; 3811 break; 3812 case Intrinsic::aarch64_crc32x: 3813 Opc = AArch64::CRC32Xrr; 3814 break; 3815 case Intrinsic::aarch64_crc32cb: 3816 Opc = AArch64::CRC32CBrr; 3817 break; 3818 case Intrinsic::aarch64_crc32ch: 3819 Opc = AArch64::CRC32CHrr; 3820 break; 3821 case Intrinsic::aarch64_crc32cw: 3822 Opc = AArch64::CRC32CWrr; 3823 break; 3824 case Intrinsic::aarch64_crc32cx: 3825 Opc = AArch64::CRC32CXrr; 3826 break; 3827 } 3828 3829 Register LHSReg = getRegForValue(II->getArgOperand(0)); 3830 Register RHSReg = getRegForValue(II->getArgOperand(1)); 3831 if (!LHSReg || !RHSReg) 3832 return false; 3833 3834 Register ResultReg = 3835 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg); 3836 updateValueMap(II, ResultReg); 3837 return true; 3838 } 3839 } 3840 return false; 3841 } 3842 3843 bool AArch64FastISel::selectRet(const Instruction *I) { 3844 const ReturnInst *Ret = cast<ReturnInst>(I); 3845 const Function &F = *I->getParent()->getParent(); 3846 3847 if (!FuncInfo.CanLowerReturn) 3848 return false; 3849 3850 if (F.isVarArg()) 3851 return false; 3852 3853 if (TLI.supportSwiftError() && 3854 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3855 return false; 3856 3857 if (TLI.supportSplitCSR(FuncInfo.MF)) 3858 return false; 3859 3860 // Build a list of return value registers. 3861 SmallVector<Register, 4> RetRegs; 3862 3863 if (Ret->getNumOperands() > 0) { 3864 CallingConv::ID CC = F.getCallingConv(); 3865 SmallVector<ISD::OutputArg, 4> Outs; 3866 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3867 3868 // Analyze operands of the call, assigning locations to each operand. 3869 SmallVector<CCValAssign, 16> ValLocs; 3870 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3871 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS); 3872 3873 // Only handle a single return value for now. 3874 if (ValLocs.size() != 1) 3875 return false; 3876 3877 CCValAssign &VA = ValLocs[0]; 3878 const Value *RV = Ret->getOperand(0); 3879 3880 // Don't bother handling odd stuff for now. 3881 if ((VA.getLocInfo() != CCValAssign::Full) && 3882 (VA.getLocInfo() != CCValAssign::BCvt)) 3883 return false; 3884 3885 // Only handle register returns for now. 3886 if (!VA.isRegLoc()) 3887 return false; 3888 3889 Register Reg = getRegForValue(RV); 3890 if (!Reg) 3891 return false; 3892 3893 Register SrcReg = Reg + VA.getValNo(); 3894 Register DestReg = VA.getLocReg(); 3895 // Avoid a cross-class copy. This is very unlikely. 3896 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3897 return false; 3898 3899 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3900 if (!RVEVT.isSimple()) 3901 return false; 3902 3903 // Vectors (of > 1 lane) in big endian need tricky handling. 3904 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && 3905 !Subtarget->isLittleEndian()) 3906 return false; 3907 3908 MVT RVVT = RVEVT.getSimpleVT(); 3909 if (RVVT == MVT::f128) 3910 return false; 3911 3912 MVT DestVT = VA.getValVT(); 3913 // Special handling for extended integers. 3914 if (RVVT != DestVT) { 3915 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3916 return false; 3917 3918 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3919 return false; 3920 3921 bool IsZExt = Outs[0].Flags.isZExt(); 3922 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3923 if (!SrcReg) 3924 return false; 3925 } 3926 3927 // "Callee" (i.e. value producer) zero extends pointers at function 3928 // boundary. 3929 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3930 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff); 3931 3932 // Make the copy. 3933 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3934 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3935 3936 // Add register to return instruction. 3937 RetRegs.push_back(VA.getLocReg()); 3938 } 3939 3940 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3941 TII.get(AArch64::RET_ReallyLR)); 3942 for (Register RetReg : RetRegs) 3943 MIB.addReg(RetReg, RegState::Implicit); 3944 return true; 3945 } 3946 3947 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3948 Type *DestTy = I->getType(); 3949 Value *Op = I->getOperand(0); 3950 Type *SrcTy = Op->getType(); 3951 3952 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3953 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3954 if (!SrcEVT.isSimple()) 3955 return false; 3956 if (!DestEVT.isSimple()) 3957 return false; 3958 3959 MVT SrcVT = SrcEVT.getSimpleVT(); 3960 MVT DestVT = DestEVT.getSimpleVT(); 3961 3962 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3963 SrcVT != MVT::i8) 3964 return false; 3965 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3966 DestVT != MVT::i1) 3967 return false; 3968 3969 Register SrcReg = getRegForValue(Op); 3970 if (!SrcReg) 3971 return false; 3972 3973 // If we're truncating from i64 to a smaller non-legal type then generate an 3974 // AND. Otherwise, we know the high bits are undefined and a truncate only 3975 // generate a COPY. We cannot mark the source register also as result 3976 // register, because this can incorrectly transfer the kill flag onto the 3977 // source register. 3978 Register ResultReg; 3979 if (SrcVT == MVT::i64) { 3980 uint64_t Mask = 0; 3981 switch (DestVT.SimpleTy) { 3982 default: 3983 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3984 return false; 3985 case MVT::i1: 3986 Mask = 0x1; 3987 break; 3988 case MVT::i8: 3989 Mask = 0xff; 3990 break; 3991 case MVT::i16: 3992 Mask = 0xffff; 3993 break; 3994 } 3995 // Issue an extract_subreg to get the lower 32-bits. 3996 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, 3997 AArch64::sub_32); 3998 // Create the AND instruction which performs the actual truncation. 3999 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask); 4000 assert(ResultReg && "Unexpected AND instruction emission failure."); 4001 } else { 4002 ResultReg = createResultReg(&AArch64::GPR32RegClass); 4003 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4004 TII.get(TargetOpcode::COPY), ResultReg) 4005 .addReg(SrcReg); 4006 } 4007 4008 updateValueMap(I, ResultReg); 4009 return true; 4010 } 4011 4012 Register AArch64FastISel::emiti1Ext(Register SrcReg, MVT DestVT, bool IsZExt) { 4013 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 4014 DestVT == MVT::i64) && 4015 "Unexpected value type."); 4016 // Handle i8 and i16 as i32. 4017 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4018 DestVT = MVT::i32; 4019 4020 if (IsZExt) { 4021 Register ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1); 4022 assert(ResultReg && "Unexpected AND instruction emission failure."); 4023 if (DestVT == MVT::i64) { 4024 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 4025 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 4026 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4027 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4028 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4029 .addImm(0) 4030 .addReg(ResultReg) 4031 .addImm(AArch64::sub_32); 4032 ResultReg = Reg64; 4033 } 4034 return ResultReg; 4035 } else { 4036 if (DestVT == MVT::i64) { 4037 // FIXME: We're SExt i1 to i64. 4038 return Register(); 4039 } 4040 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 4041 0, 0); 4042 } 4043 } 4044 4045 Register AArch64FastISel::emitMul_rr(MVT RetVT, Register Op0, Register Op1) { 4046 unsigned Opc; 4047 Register ZReg; 4048 switch (RetVT.SimpleTy) { 4049 default: 4050 return Register(); 4051 case MVT::i8: 4052 case MVT::i16: 4053 case MVT::i32: 4054 RetVT = MVT::i32; 4055 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4056 case MVT::i64: 4057 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4058 } 4059 4060 const TargetRegisterClass *RC = 4061 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4062 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg); 4063 } 4064 4065 Register AArch64FastISel::emitSMULL_rr(MVT RetVT, Register Op0, Register Op1) { 4066 if (RetVT != MVT::i64) 4067 return Register(); 4068 4069 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4070 Op0, Op1, AArch64::XZR); 4071 } 4072 4073 Register AArch64FastISel::emitUMULL_rr(MVT RetVT, Register Op0, Register Op1) { 4074 if (RetVT != MVT::i64) 4075 return Register(); 4076 4077 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4078 Op0, Op1, AArch64::XZR); 4079 } 4080 4081 Register AArch64FastISel::emitLSL_rr(MVT RetVT, Register Op0Reg, 4082 Register Op1Reg) { 4083 unsigned Opc = 0; 4084 bool NeedTrunc = false; 4085 uint64_t Mask = 0; 4086 switch (RetVT.SimpleTy) { 4087 default: 4088 return Register(); 4089 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4090 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4091 case MVT::i32: Opc = AArch64::LSLVWr; break; 4092 case MVT::i64: Opc = AArch64::LSLVXr; break; 4093 } 4094 4095 const TargetRegisterClass *RC = 4096 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4097 if (NeedTrunc) 4098 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4099 4100 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4101 if (NeedTrunc) 4102 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4103 return ResultReg; 4104 } 4105 4106 Register AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, Register Op0, 4107 uint64_t Shift, bool IsZExt) { 4108 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4109 "Unexpected source/return type pair."); 4110 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4111 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4112 "Unexpected source value type."); 4113 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4114 RetVT == MVT::i64) && "Unexpected return value type."); 4115 4116 bool Is64Bit = (RetVT == MVT::i64); 4117 unsigned RegSize = Is64Bit ? 64 : 32; 4118 unsigned DstBits = RetVT.getSizeInBits(); 4119 unsigned SrcBits = SrcVT.getSizeInBits(); 4120 const TargetRegisterClass *RC = 4121 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4122 4123 // Just emit a copy for "zero" shifts. 4124 if (Shift == 0) { 4125 if (RetVT == SrcVT) { 4126 Register ResultReg = createResultReg(RC); 4127 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4128 TII.get(TargetOpcode::COPY), ResultReg) 4129 .addReg(Op0); 4130 return ResultReg; 4131 } else 4132 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4133 } 4134 4135 // Don't deal with undefined shifts. 4136 if (Shift >= DstBits) 4137 return Register(); 4138 4139 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4140 // {S|U}BFM Wd, Wn, #r, #s 4141 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4142 4143 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4144 // %2 = shl i16 %1, 4 4145 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4146 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4147 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4148 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4149 4150 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4151 // %2 = shl i16 %1, 8 4152 // Wd<32+7-24,32-24> = Wn<7:0> 4153 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4154 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4155 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4156 4157 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4158 // %2 = shl i16 %1, 12 4159 // Wd<32+3-20,32-20> = Wn<3:0> 4160 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4161 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4162 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4163 4164 unsigned ImmR = RegSize - Shift; 4165 // Limit the width to the length of the source type. 4166 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4167 static const unsigned OpcTable[2][2] = { 4168 {AArch64::SBFMWri, AArch64::SBFMXri}, 4169 {AArch64::UBFMWri, AArch64::UBFMXri} 4170 }; 4171 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4172 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4173 Register TmpReg = MRI.createVirtualRegister(RC); 4174 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4175 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4176 .addImm(0) 4177 .addReg(Op0) 4178 .addImm(AArch64::sub_32); 4179 Op0 = TmpReg; 4180 } 4181 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4182 } 4183 4184 Register AArch64FastISel::emitLSR_rr(MVT RetVT, Register Op0Reg, 4185 Register Op1Reg) { 4186 unsigned Opc = 0; 4187 bool NeedTrunc = false; 4188 uint64_t Mask = 0; 4189 switch (RetVT.SimpleTy) { 4190 default: 4191 return Register(); 4192 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4193 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4194 case MVT::i32: Opc = AArch64::LSRVWr; break; 4195 case MVT::i64: Opc = AArch64::LSRVXr; break; 4196 } 4197 4198 const TargetRegisterClass *RC = 4199 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4200 if (NeedTrunc) { 4201 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask); 4202 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4203 } 4204 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4205 if (NeedTrunc) 4206 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4207 return ResultReg; 4208 } 4209 4210 Register AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, Register Op0, 4211 uint64_t Shift, bool IsZExt) { 4212 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4213 "Unexpected source/return type pair."); 4214 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4215 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4216 "Unexpected source value type."); 4217 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4218 RetVT == MVT::i64) && "Unexpected return value type."); 4219 4220 bool Is64Bit = (RetVT == MVT::i64); 4221 unsigned RegSize = Is64Bit ? 64 : 32; 4222 unsigned DstBits = RetVT.getSizeInBits(); 4223 unsigned SrcBits = SrcVT.getSizeInBits(); 4224 const TargetRegisterClass *RC = 4225 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4226 4227 // Just emit a copy for "zero" shifts. 4228 if (Shift == 0) { 4229 if (RetVT == SrcVT) { 4230 Register ResultReg = createResultReg(RC); 4231 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4232 TII.get(TargetOpcode::COPY), ResultReg) 4233 .addReg(Op0); 4234 return ResultReg; 4235 } else 4236 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4237 } 4238 4239 // Don't deal with undefined shifts. 4240 if (Shift >= DstBits) 4241 return Register(); 4242 4243 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4244 // {S|U}BFM Wd, Wn, #r, #s 4245 // Wd<s-r:0> = Wn<s:r> when r <= s 4246 4247 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4248 // %2 = lshr i16 %1, 4 4249 // Wd<7-4:0> = Wn<7:4> 4250 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4251 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4252 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4253 4254 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4255 // %2 = lshr i16 %1, 8 4256 // Wd<7-7,0> = Wn<7:7> 4257 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4258 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4259 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4260 4261 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4262 // %2 = lshr i16 %1, 12 4263 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4264 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4265 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4266 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4267 4268 if (Shift >= SrcBits && IsZExt) 4269 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4270 4271 // It is not possible to fold a sign-extend into the LShr instruction. In this 4272 // case emit a sign-extend. 4273 if (!IsZExt) { 4274 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4275 if (!Op0) 4276 return Register(); 4277 SrcVT = RetVT; 4278 SrcBits = SrcVT.getSizeInBits(); 4279 IsZExt = true; 4280 } 4281 4282 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4283 unsigned ImmS = SrcBits - 1; 4284 static const unsigned OpcTable[2][2] = { 4285 {AArch64::SBFMWri, AArch64::SBFMXri}, 4286 {AArch64::UBFMWri, AArch64::UBFMXri} 4287 }; 4288 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4289 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4290 Register TmpReg = MRI.createVirtualRegister(RC); 4291 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4292 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4293 .addImm(0) 4294 .addReg(Op0) 4295 .addImm(AArch64::sub_32); 4296 Op0 = TmpReg; 4297 } 4298 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4299 } 4300 4301 Register AArch64FastISel::emitASR_rr(MVT RetVT, Register Op0Reg, 4302 Register Op1Reg) { 4303 unsigned Opc = 0; 4304 bool NeedTrunc = false; 4305 uint64_t Mask = 0; 4306 switch (RetVT.SimpleTy) { 4307 default: 4308 return Register(); 4309 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4310 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4311 case MVT::i32: Opc = AArch64::ASRVWr; break; 4312 case MVT::i64: Opc = AArch64::ASRVXr; break; 4313 } 4314 4315 const TargetRegisterClass *RC = 4316 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4317 if (NeedTrunc) { 4318 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4319 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4320 } 4321 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4322 if (NeedTrunc) 4323 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4324 return ResultReg; 4325 } 4326 4327 Register AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, Register Op0, 4328 uint64_t Shift, bool IsZExt) { 4329 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4330 "Unexpected source/return type pair."); 4331 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4332 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4333 "Unexpected source value type."); 4334 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4335 RetVT == MVT::i64) && "Unexpected return value type."); 4336 4337 bool Is64Bit = (RetVT == MVT::i64); 4338 unsigned RegSize = Is64Bit ? 64 : 32; 4339 unsigned DstBits = RetVT.getSizeInBits(); 4340 unsigned SrcBits = SrcVT.getSizeInBits(); 4341 const TargetRegisterClass *RC = 4342 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4343 4344 // Just emit a copy for "zero" shifts. 4345 if (Shift == 0) { 4346 if (RetVT == SrcVT) { 4347 Register ResultReg = createResultReg(RC); 4348 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4349 TII.get(TargetOpcode::COPY), ResultReg) 4350 .addReg(Op0); 4351 return ResultReg; 4352 } else 4353 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4354 } 4355 4356 // Don't deal with undefined shifts. 4357 if (Shift >= DstBits) 4358 return Register(); 4359 4360 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4361 // {S|U}BFM Wd, Wn, #r, #s 4362 // Wd<s-r:0> = Wn<s:r> when r <= s 4363 4364 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4365 // %2 = ashr i16 %1, 4 4366 // Wd<7-4:0> = Wn<7:4> 4367 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4368 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4369 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4370 4371 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4372 // %2 = ashr i16 %1, 8 4373 // Wd<7-7,0> = Wn<7:7> 4374 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4375 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4376 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4377 4378 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4379 // %2 = ashr i16 %1, 12 4380 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4381 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4382 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4383 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4384 4385 if (Shift >= SrcBits && IsZExt) 4386 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4387 4388 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4389 unsigned ImmS = SrcBits - 1; 4390 static const unsigned OpcTable[2][2] = { 4391 {AArch64::SBFMWri, AArch64::SBFMXri}, 4392 {AArch64::UBFMWri, AArch64::UBFMXri} 4393 }; 4394 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4395 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4396 Register TmpReg = MRI.createVirtualRegister(RC); 4397 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4398 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4399 .addImm(0) 4400 .addReg(Op0) 4401 .addImm(AArch64::sub_32); 4402 Op0 = TmpReg; 4403 } 4404 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4405 } 4406 4407 Register AArch64FastISel::emitIntExt(MVT SrcVT, Register SrcReg, MVT DestVT, 4408 bool IsZExt) { 4409 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4410 4411 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4412 // DestVT are odd things, so test to make sure that they are both types we can 4413 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4414 // bail out to SelectionDAG. 4415 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4416 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4417 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4418 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4419 return Register(); 4420 4421 unsigned Opc; 4422 unsigned Imm = 0; 4423 4424 switch (SrcVT.SimpleTy) { 4425 default: 4426 return Register(); 4427 case MVT::i1: 4428 return emiti1Ext(SrcReg, DestVT, IsZExt); 4429 case MVT::i8: 4430 if (DestVT == MVT::i64) 4431 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4432 else 4433 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4434 Imm = 7; 4435 break; 4436 case MVT::i16: 4437 if (DestVT == MVT::i64) 4438 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4439 else 4440 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4441 Imm = 15; 4442 break; 4443 case MVT::i32: 4444 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4445 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4446 Imm = 31; 4447 break; 4448 } 4449 4450 // Handle i8 and i16 as i32. 4451 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4452 DestVT = MVT::i32; 4453 else if (DestVT == MVT::i64) { 4454 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4455 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4456 TII.get(AArch64::SUBREG_TO_REG), Src64) 4457 .addImm(0) 4458 .addReg(SrcReg) 4459 .addImm(AArch64::sub_32); 4460 SrcReg = Src64; 4461 } 4462 4463 const TargetRegisterClass *RC = 4464 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4465 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm); 4466 } 4467 4468 static bool isZExtLoad(const MachineInstr *LI) { 4469 switch (LI->getOpcode()) { 4470 default: 4471 return false; 4472 case AArch64::LDURBBi: 4473 case AArch64::LDURHHi: 4474 case AArch64::LDURWi: 4475 case AArch64::LDRBBui: 4476 case AArch64::LDRHHui: 4477 case AArch64::LDRWui: 4478 case AArch64::LDRBBroX: 4479 case AArch64::LDRHHroX: 4480 case AArch64::LDRWroX: 4481 case AArch64::LDRBBroW: 4482 case AArch64::LDRHHroW: 4483 case AArch64::LDRWroW: 4484 return true; 4485 } 4486 } 4487 4488 static bool isSExtLoad(const MachineInstr *LI) { 4489 switch (LI->getOpcode()) { 4490 default: 4491 return false; 4492 case AArch64::LDURSBWi: 4493 case AArch64::LDURSHWi: 4494 case AArch64::LDURSBXi: 4495 case AArch64::LDURSHXi: 4496 case AArch64::LDURSWi: 4497 case AArch64::LDRSBWui: 4498 case AArch64::LDRSHWui: 4499 case AArch64::LDRSBXui: 4500 case AArch64::LDRSHXui: 4501 case AArch64::LDRSWui: 4502 case AArch64::LDRSBWroX: 4503 case AArch64::LDRSHWroX: 4504 case AArch64::LDRSBXroX: 4505 case AArch64::LDRSHXroX: 4506 case AArch64::LDRSWroX: 4507 case AArch64::LDRSBWroW: 4508 case AArch64::LDRSHWroW: 4509 case AArch64::LDRSBXroW: 4510 case AArch64::LDRSHXroW: 4511 case AArch64::LDRSWroW: 4512 return true; 4513 } 4514 } 4515 4516 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4517 MVT SrcVT) { 4518 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4519 if (!LI || !LI->hasOneUse()) 4520 return false; 4521 4522 // Check if the load instruction has already been selected. 4523 Register Reg = lookUpRegForValue(LI); 4524 if (!Reg) 4525 return false; 4526 4527 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4528 if (!MI) 4529 return false; 4530 4531 // Check if the correct load instruction has been emitted - SelectionDAG might 4532 // have emitted a zero-extending load, but we need a sign-extending load. 4533 bool IsZExt = isa<ZExtInst>(I); 4534 const auto *LoadMI = MI; 4535 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4536 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4537 Register LoadReg = MI->getOperand(1).getReg(); 4538 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4539 assert(LoadMI && "Expected valid instruction"); 4540 } 4541 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4542 return false; 4543 4544 // Nothing to be done. 4545 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4546 updateValueMap(I, Reg); 4547 return true; 4548 } 4549 4550 if (IsZExt) { 4551 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 4552 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4553 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4554 .addImm(0) 4555 .addReg(Reg, getKillRegState(true)) 4556 .addImm(AArch64::sub_32); 4557 Reg = Reg64; 4558 } else { 4559 assert((MI->getOpcode() == TargetOpcode::COPY && 4560 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4561 "Expected copy instruction"); 4562 Reg = MI->getOperand(1).getReg(); 4563 MachineBasicBlock::iterator I(MI); 4564 removeDeadCode(I, std::next(I)); 4565 } 4566 updateValueMap(I, Reg); 4567 return true; 4568 } 4569 4570 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4571 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4572 "Unexpected integer extend instruction."); 4573 MVT RetVT; 4574 MVT SrcVT; 4575 if (!isTypeSupported(I->getType(), RetVT)) 4576 return false; 4577 4578 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4579 return false; 4580 4581 // Try to optimize already sign-/zero-extended values from load instructions. 4582 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4583 return true; 4584 4585 Register SrcReg = getRegForValue(I->getOperand(0)); 4586 if (!SrcReg) 4587 return false; 4588 4589 // Try to optimize already sign-/zero-extended values from function arguments. 4590 bool IsZExt = isa<ZExtInst>(I); 4591 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4592 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4593 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4594 Register ResultReg = createResultReg(&AArch64::GPR64RegClass); 4595 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4596 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4597 .addImm(0) 4598 .addReg(SrcReg) 4599 .addImm(AArch64::sub_32); 4600 SrcReg = ResultReg; 4601 } 4602 4603 updateValueMap(I, SrcReg); 4604 return true; 4605 } 4606 } 4607 4608 Register ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4609 if (!ResultReg) 4610 return false; 4611 4612 updateValueMap(I, ResultReg); 4613 return true; 4614 } 4615 4616 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4617 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4618 if (!DestEVT.isSimple()) 4619 return false; 4620 4621 MVT DestVT = DestEVT.getSimpleVT(); 4622 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4623 return false; 4624 4625 unsigned DivOpc; 4626 bool Is64bit = (DestVT == MVT::i64); 4627 switch (ISDOpcode) { 4628 default: 4629 return false; 4630 case ISD::SREM: 4631 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4632 break; 4633 case ISD::UREM: 4634 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4635 break; 4636 } 4637 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4638 Register Src0Reg = getRegForValue(I->getOperand(0)); 4639 if (!Src0Reg) 4640 return false; 4641 4642 Register Src1Reg = getRegForValue(I->getOperand(1)); 4643 if (!Src1Reg) 4644 return false; 4645 4646 const TargetRegisterClass *RC = 4647 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4648 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg); 4649 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4650 // The remainder is computed as numerator - (quotient * denominator) using the 4651 // MSUB instruction. 4652 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg); 4653 updateValueMap(I, ResultReg); 4654 return true; 4655 } 4656 4657 bool AArch64FastISel::selectMul(const Instruction *I) { 4658 MVT VT; 4659 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4660 return false; 4661 4662 if (VT.isVector()) 4663 return selectBinaryOp(I, ISD::MUL); 4664 4665 const Value *Src0 = I->getOperand(0); 4666 const Value *Src1 = I->getOperand(1); 4667 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4668 if (C->getValue().isPowerOf2()) 4669 std::swap(Src0, Src1); 4670 4671 // Try to simplify to a shift instruction. 4672 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4673 if (C->getValue().isPowerOf2()) { 4674 uint64_t ShiftVal = C->getValue().logBase2(); 4675 MVT SrcVT = VT; 4676 bool IsZExt = true; 4677 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4678 if (!isIntExtFree(ZExt)) { 4679 MVT VT; 4680 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4681 SrcVT = VT; 4682 IsZExt = true; 4683 Src0 = ZExt->getOperand(0); 4684 } 4685 } 4686 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4687 if (!isIntExtFree(SExt)) { 4688 MVT VT; 4689 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4690 SrcVT = VT; 4691 IsZExt = false; 4692 Src0 = SExt->getOperand(0); 4693 } 4694 } 4695 } 4696 4697 Register Src0Reg = getRegForValue(Src0); 4698 if (!Src0Reg) 4699 return false; 4700 4701 Register ResultReg = emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt); 4702 4703 if (ResultReg) { 4704 updateValueMap(I, ResultReg); 4705 return true; 4706 } 4707 } 4708 4709 Register Src0Reg = getRegForValue(I->getOperand(0)); 4710 if (!Src0Reg) 4711 return false; 4712 4713 Register Src1Reg = getRegForValue(I->getOperand(1)); 4714 if (!Src1Reg) 4715 return false; 4716 4717 Register ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg); 4718 4719 if (!ResultReg) 4720 return false; 4721 4722 updateValueMap(I, ResultReg); 4723 return true; 4724 } 4725 4726 bool AArch64FastISel::selectShift(const Instruction *I) { 4727 MVT RetVT; 4728 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4729 return false; 4730 4731 if (RetVT.isVector()) 4732 return selectOperator(I, I->getOpcode()); 4733 4734 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4735 Register ResultReg; 4736 uint64_t ShiftVal = C->getZExtValue(); 4737 MVT SrcVT = RetVT; 4738 bool IsZExt = I->getOpcode() != Instruction::AShr; 4739 const Value *Op0 = I->getOperand(0); 4740 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4741 if (!isIntExtFree(ZExt)) { 4742 MVT TmpVT; 4743 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4744 SrcVT = TmpVT; 4745 IsZExt = true; 4746 Op0 = ZExt->getOperand(0); 4747 } 4748 } 4749 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4750 if (!isIntExtFree(SExt)) { 4751 MVT TmpVT; 4752 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4753 SrcVT = TmpVT; 4754 IsZExt = false; 4755 Op0 = SExt->getOperand(0); 4756 } 4757 } 4758 } 4759 4760 Register Op0Reg = getRegForValue(Op0); 4761 if (!Op0Reg) 4762 return false; 4763 4764 switch (I->getOpcode()) { 4765 default: llvm_unreachable("Unexpected instruction."); 4766 case Instruction::Shl: 4767 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4768 break; 4769 case Instruction::AShr: 4770 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4771 break; 4772 case Instruction::LShr: 4773 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4774 break; 4775 } 4776 if (!ResultReg) 4777 return false; 4778 4779 updateValueMap(I, ResultReg); 4780 return true; 4781 } 4782 4783 Register Op0Reg = getRegForValue(I->getOperand(0)); 4784 if (!Op0Reg) 4785 return false; 4786 4787 Register Op1Reg = getRegForValue(I->getOperand(1)); 4788 if (!Op1Reg) 4789 return false; 4790 4791 Register ResultReg; 4792 switch (I->getOpcode()) { 4793 default: llvm_unreachable("Unexpected instruction."); 4794 case Instruction::Shl: 4795 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); 4796 break; 4797 case Instruction::AShr: 4798 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); 4799 break; 4800 case Instruction::LShr: 4801 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); 4802 break; 4803 } 4804 4805 if (!ResultReg) 4806 return false; 4807 4808 updateValueMap(I, ResultReg); 4809 return true; 4810 } 4811 4812 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4813 MVT RetVT, SrcVT; 4814 4815 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4816 return false; 4817 if (!isTypeLegal(I->getType(), RetVT)) 4818 return false; 4819 4820 unsigned Opc; 4821 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4822 Opc = AArch64::FMOVWSr; 4823 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4824 Opc = AArch64::FMOVXDr; 4825 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4826 Opc = AArch64::FMOVSWr; 4827 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4828 Opc = AArch64::FMOVDXr; 4829 else 4830 return false; 4831 4832 const TargetRegisterClass *RC = nullptr; 4833 switch (RetVT.SimpleTy) { 4834 default: llvm_unreachable("Unexpected value type."); 4835 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4836 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4837 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4838 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4839 } 4840 Register Op0Reg = getRegForValue(I->getOperand(0)); 4841 if (!Op0Reg) 4842 return false; 4843 4844 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg); 4845 if (!ResultReg) 4846 return false; 4847 4848 updateValueMap(I, ResultReg); 4849 return true; 4850 } 4851 4852 bool AArch64FastISel::selectFRem(const Instruction *I) { 4853 MVT RetVT; 4854 if (!isTypeLegal(I->getType(), RetVT)) 4855 return false; 4856 4857 RTLIB::Libcall LC; 4858 switch (RetVT.SimpleTy) { 4859 default: 4860 return false; 4861 case MVT::f32: 4862 LC = RTLIB::REM_F32; 4863 break; 4864 case MVT::f64: 4865 LC = RTLIB::REM_F64; 4866 break; 4867 } 4868 4869 ArgListTy Args; 4870 Args.reserve(I->getNumOperands()); 4871 4872 // Populate the argument list. 4873 for (auto &Arg : I->operands()) { 4874 ArgListEntry Entry; 4875 Entry.Val = Arg; 4876 Entry.Ty = Arg->getType(); 4877 Args.push_back(Entry); 4878 } 4879 4880 CallLoweringInfo CLI; 4881 MCContext &Ctx = MF->getContext(); 4882 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4883 TLI.getLibcallName(LC), std::move(Args)); 4884 if (!lowerCallTo(CLI)) 4885 return false; 4886 updateValueMap(I, CLI.ResultReg); 4887 return true; 4888 } 4889 4890 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4891 MVT VT; 4892 if (!isTypeLegal(I->getType(), VT)) 4893 return false; 4894 4895 if (!isa<ConstantInt>(I->getOperand(1))) 4896 return selectBinaryOp(I, ISD::SDIV); 4897 4898 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4899 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4900 !(C.isPowerOf2() || C.isNegatedPowerOf2())) 4901 return selectBinaryOp(I, ISD::SDIV); 4902 4903 unsigned Lg2 = C.countr_zero(); 4904 Register Src0Reg = getRegForValue(I->getOperand(0)); 4905 if (!Src0Reg) 4906 return false; 4907 4908 if (cast<BinaryOperator>(I)->isExact()) { 4909 Register ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2); 4910 if (!ResultReg) 4911 return false; 4912 updateValueMap(I, ResultReg); 4913 return true; 4914 } 4915 4916 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4917 Register AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne); 4918 if (!AddReg) 4919 return false; 4920 4921 // (Src0 < 0) ? Pow2 - 1 : 0; 4922 if (!emitICmp_ri(VT, Src0Reg, 0)) 4923 return false; 4924 4925 unsigned SelectOpc; 4926 const TargetRegisterClass *RC; 4927 if (VT == MVT::i64) { 4928 SelectOpc = AArch64::CSELXr; 4929 RC = &AArch64::GPR64RegClass; 4930 } else { 4931 SelectOpc = AArch64::CSELWr; 4932 RC = &AArch64::GPR32RegClass; 4933 } 4934 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg, 4935 AArch64CC::LT); 4936 if (!SelectReg) 4937 return false; 4938 4939 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4940 // negate the result. 4941 Register ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4942 Register ResultReg; 4943 if (C.isNegative()) 4944 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg, 4945 AArch64_AM::ASR, Lg2); 4946 else 4947 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2); 4948 4949 if (!ResultReg) 4950 return false; 4951 4952 updateValueMap(I, ResultReg); 4953 return true; 4954 } 4955 4956 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4957 /// have to duplicate it for AArch64, because otherwise we would fail during the 4958 /// sign-extend emission. 4959 Register AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4960 Register IdxN = getRegForValue(Idx); 4961 if (!IdxN) 4962 // Unhandled operand. Halt "fast" selection and bail. 4963 return Register(); 4964 4965 // If the index is smaller or larger than intptr_t, truncate or extend it. 4966 MVT PtrVT = TLI.getPointerTy(DL); 4967 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4968 if (IdxVT.bitsLT(PtrVT)) { 4969 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 4970 } else if (IdxVT.bitsGT(PtrVT)) 4971 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4972 return IdxN; 4973 } 4974 4975 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4976 /// duplicate it for AArch64, because otherwise we would bail out even for 4977 /// simple cases. This is because the standard fastEmit functions don't cover 4978 /// MUL at all and ADD is lowered very inefficientily. 4979 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4980 if (Subtarget->isTargetILP32()) 4981 return false; 4982 4983 Register N = getRegForValue(I->getOperand(0)); 4984 if (!N) 4985 return false; 4986 4987 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4988 // into a single N = N + TotalOffset. 4989 uint64_t TotalOffs = 0; 4990 MVT VT = TLI.getPointerTy(DL); 4991 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4992 GTI != E; ++GTI) { 4993 const Value *Idx = GTI.getOperand(); 4994 if (auto *StTy = GTI.getStructTypeOrNull()) { 4995 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4996 // N = N + Offset 4997 if (Field) 4998 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4999 } else { 5000 // If this is a constant subscript, handle it quickly. 5001 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 5002 if (CI->isZero()) 5003 continue; 5004 // N = N + Offset 5005 TotalOffs += GTI.getSequentialElementStride(DL) * 5006 cast<ConstantInt>(CI)->getSExtValue(); 5007 continue; 5008 } 5009 if (TotalOffs) { 5010 N = emitAdd_ri_(VT, N, TotalOffs); 5011 if (!N) 5012 return false; 5013 TotalOffs = 0; 5014 } 5015 5016 // N = N + Idx * ElementSize; 5017 uint64_t ElementSize = GTI.getSequentialElementStride(DL); 5018 Register IdxN = getRegForGEPIndex(Idx); 5019 if (!IdxN) 5020 return false; 5021 5022 if (ElementSize != 1) { 5023 Register C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 5024 if (!C) 5025 return false; 5026 IdxN = emitMul_rr(VT, IdxN, C); 5027 if (!IdxN) 5028 return false; 5029 } 5030 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN); 5031 if (!N) 5032 return false; 5033 } 5034 } 5035 if (TotalOffs) { 5036 N = emitAdd_ri_(VT, N, TotalOffs); 5037 if (!N) 5038 return false; 5039 } 5040 updateValueMap(I, N); 5041 return true; 5042 } 5043 5044 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 5045 assert(TM.getOptLevel() == CodeGenOptLevel::None && 5046 "cmpxchg survived AtomicExpand at optlevel > -O0"); 5047 5048 auto *RetPairTy = cast<StructType>(I->getType()); 5049 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5050 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5051 "cmpxchg has a non-i1 status result"); 5052 5053 MVT VT; 5054 if (!isTypeLegal(RetTy, VT)) 5055 return false; 5056 5057 const TargetRegisterClass *ResRC; 5058 unsigned Opc, CmpOpc; 5059 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5060 // extractvalue selection doesn't support that. 5061 if (VT == MVT::i32) { 5062 Opc = AArch64::CMP_SWAP_32; 5063 CmpOpc = AArch64::SUBSWrs; 5064 ResRC = &AArch64::GPR32RegClass; 5065 } else if (VT == MVT::i64) { 5066 Opc = AArch64::CMP_SWAP_64; 5067 CmpOpc = AArch64::SUBSXrs; 5068 ResRC = &AArch64::GPR64RegClass; 5069 } else { 5070 return false; 5071 } 5072 5073 const MCInstrDesc &II = TII.get(Opc); 5074 5075 Register AddrReg = getRegForValue(I->getPointerOperand()); 5076 Register DesiredReg = getRegForValue(I->getCompareOperand()); 5077 Register NewReg = getRegForValue(I->getNewValOperand()); 5078 5079 if (!AddrReg || !DesiredReg || !NewReg) 5080 return false; 5081 5082 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 5083 DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1); 5084 NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2); 5085 5086 const Register ResultReg1 = createResultReg(ResRC); 5087 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5088 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5089 5090 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5091 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 5092 .addDef(ResultReg1) 5093 .addDef(ScratchReg) 5094 .addUse(AddrReg) 5095 .addUse(DesiredReg) 5096 .addUse(NewReg); 5097 5098 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc)) 5099 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5100 .addUse(ResultReg1) 5101 .addUse(DesiredReg) 5102 .addImm(0); 5103 5104 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr)) 5105 .addDef(ResultReg2) 5106 .addUse(AArch64::WZR) 5107 .addUse(AArch64::WZR) 5108 .addImm(AArch64CC::NE); 5109 5110 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5111 updateValueMap(I, ResultReg1, 2); 5112 return true; 5113 } 5114 5115 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5116 if (TLI.fallBackToDAGISel(*I)) 5117 return false; 5118 switch (I->getOpcode()) { 5119 default: 5120 break; 5121 case Instruction::Add: 5122 case Instruction::Sub: 5123 return selectAddSub(I); 5124 case Instruction::Mul: 5125 return selectMul(I); 5126 case Instruction::SDiv: 5127 return selectSDiv(I); 5128 case Instruction::SRem: 5129 if (!selectBinaryOp(I, ISD::SREM)) 5130 return selectRem(I, ISD::SREM); 5131 return true; 5132 case Instruction::URem: 5133 if (!selectBinaryOp(I, ISD::UREM)) 5134 return selectRem(I, ISD::UREM); 5135 return true; 5136 case Instruction::Shl: 5137 case Instruction::LShr: 5138 case Instruction::AShr: 5139 return selectShift(I); 5140 case Instruction::And: 5141 case Instruction::Or: 5142 case Instruction::Xor: 5143 return selectLogicalOp(I); 5144 case Instruction::Br: 5145 return selectBranch(I); 5146 case Instruction::IndirectBr: 5147 return selectIndirectBr(I); 5148 case Instruction::BitCast: 5149 if (!FastISel::selectBitCast(I)) 5150 return selectBitCast(I); 5151 return true; 5152 case Instruction::FPToSI: 5153 if (!selectCast(I, ISD::FP_TO_SINT)) 5154 return selectFPToInt(I, /*Signed=*/true); 5155 return true; 5156 case Instruction::FPToUI: 5157 return selectFPToInt(I, /*Signed=*/false); 5158 case Instruction::ZExt: 5159 case Instruction::SExt: 5160 return selectIntExt(I); 5161 case Instruction::Trunc: 5162 if (!selectCast(I, ISD::TRUNCATE)) 5163 return selectTrunc(I); 5164 return true; 5165 case Instruction::FPExt: 5166 return selectFPExt(I); 5167 case Instruction::FPTrunc: 5168 return selectFPTrunc(I); 5169 case Instruction::SIToFP: 5170 if (!selectCast(I, ISD::SINT_TO_FP)) 5171 return selectIntToFP(I, /*Signed=*/true); 5172 return true; 5173 case Instruction::UIToFP: 5174 return selectIntToFP(I, /*Signed=*/false); 5175 case Instruction::Load: 5176 return selectLoad(I); 5177 case Instruction::Store: 5178 return selectStore(I); 5179 case Instruction::FCmp: 5180 case Instruction::ICmp: 5181 return selectCmp(I); 5182 case Instruction::Select: 5183 return selectSelect(I); 5184 case Instruction::Ret: 5185 return selectRet(I); 5186 case Instruction::FRem: 5187 return selectFRem(I); 5188 case Instruction::GetElementPtr: 5189 return selectGetElementPtr(I); 5190 case Instruction::AtomicCmpXchg: 5191 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5192 } 5193 5194 // fall-back to target-independent instruction selection. 5195 return selectOperator(I, I->getOpcode()); 5196 } 5197 5198 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5199 const TargetLibraryInfo *LibInfo) { 5200 5201 SMEAttrs CallerAttrs = 5202 FuncInfo.MF->getInfo<AArch64FunctionInfo>()->getSMEFnAttrs(); 5203 if (CallerAttrs.hasZAState() || CallerAttrs.hasZT0State() || 5204 CallerAttrs.hasStreamingInterfaceOrBody() || 5205 CallerAttrs.hasStreamingCompatibleInterface() || 5206 CallerAttrs.hasAgnosticZAInterface()) 5207 return nullptr; 5208 return new AArch64FastISel(FuncInfo, LibInfo); 5209 } 5210