1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/APFloat.h" 23 #include "llvm/ADT/APInt.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/Analysis/BranchProbabilityInfo.h" 27 #include "llvm/CodeGen/CallingConvLower.h" 28 #include "llvm/CodeGen/FastISel.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineConstantPool.h" 33 #include "llvm/CodeGen/MachineFrameInfo.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineMemOperand.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/MachineValueType.h" 39 #include "llvm/CodeGen/RuntimeLibcalls.h" 40 #include "llvm/CodeGen/ValueTypes.h" 41 #include "llvm/IR/Argument.h" 42 #include "llvm/IR/Attributes.h" 43 #include "llvm/IR/BasicBlock.h" 44 #include "llvm/IR/CallingConv.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/Constants.h" 47 #include "llvm/IR/DataLayout.h" 48 #include "llvm/IR/DerivedTypes.h" 49 #include "llvm/IR/Function.h" 50 #include "llvm/IR/GetElementPtrTypeIterator.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/InstrTypes.h" 53 #include "llvm/IR/Instruction.h" 54 #include "llvm/IR/Instructions.h" 55 #include "llvm/IR/IntrinsicInst.h" 56 #include "llvm/IR/Intrinsics.h" 57 #include "llvm/IR/IntrinsicsAArch64.h" 58 #include "llvm/IR/Operator.h" 59 #include "llvm/IR/Type.h" 60 #include "llvm/IR/User.h" 61 #include "llvm/IR/Value.h" 62 #include "llvm/MC/MCInstrDesc.h" 63 #include "llvm/MC/MCRegisterInfo.h" 64 #include "llvm/MC/MCSymbol.h" 65 #include "llvm/Support/AtomicOrdering.h" 66 #include "llvm/Support/Casting.h" 67 #include "llvm/Support/CodeGen.h" 68 #include "llvm/Support/Compiler.h" 69 #include "llvm/Support/ErrorHandling.h" 70 #include "llvm/Support/MathExtras.h" 71 #include <algorithm> 72 #include <cassert> 73 #include <cstdint> 74 #include <iterator> 75 #include <utility> 76 77 using namespace llvm; 78 79 namespace { 80 81 class AArch64FastISel final : public FastISel { 82 class Address { 83 public: 84 using BaseKind = enum { 85 RegBase, 86 FrameIndexBase 87 }; 88 89 private: 90 BaseKind Kind = RegBase; 91 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 92 union { 93 unsigned Reg; 94 int FI; 95 } Base; 96 unsigned OffsetReg = 0; 97 unsigned Shift = 0; 98 int64_t Offset = 0; 99 const GlobalValue *GV = nullptr; 100 101 public: 102 Address() { Base.Reg = 0; } 103 104 void setKind(BaseKind K) { Kind = K; } 105 BaseKind getKind() const { return Kind; } 106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 108 bool isRegBase() const { return Kind == RegBase; } 109 bool isFIBase() const { return Kind == FrameIndexBase; } 110 111 void setReg(unsigned Reg) { 112 assert(isRegBase() && "Invalid base register access!"); 113 Base.Reg = Reg; 114 } 115 116 unsigned getReg() const { 117 assert(isRegBase() && "Invalid base register access!"); 118 return Base.Reg; 119 } 120 121 void setOffsetReg(unsigned Reg) { 122 OffsetReg = Reg; 123 } 124 125 unsigned getOffsetReg() const { 126 return OffsetReg; 127 } 128 129 void setFI(unsigned FI) { 130 assert(isFIBase() && "Invalid base frame index access!"); 131 Base.FI = FI; 132 } 133 134 unsigned getFI() const { 135 assert(isFIBase() && "Invalid base frame index access!"); 136 return Base.FI; 137 } 138 139 void setOffset(int64_t O) { Offset = O; } 140 int64_t getOffset() { return Offset; } 141 void setShift(unsigned S) { Shift = S; } 142 unsigned getShift() { return Shift; } 143 144 void setGlobalValue(const GlobalValue *G) { GV = G; } 145 const GlobalValue *getGlobalValue() { return GV; } 146 }; 147 148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 149 /// make the right decision when generating code for different targets. 150 const AArch64Subtarget *Subtarget; 151 LLVMContext *Context; 152 153 bool fastLowerArguments() override; 154 bool fastLowerCall(CallLoweringInfo &CLI) override; 155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 156 157 private: 158 // Selection routines. 159 bool selectAddSub(const Instruction *I); 160 bool selectLogicalOp(const Instruction *I); 161 bool selectLoad(const Instruction *I); 162 bool selectStore(const Instruction *I); 163 bool selectBranch(const Instruction *I); 164 bool selectIndirectBr(const Instruction *I); 165 bool selectCmp(const Instruction *I); 166 bool selectSelect(const Instruction *I); 167 bool selectFPExt(const Instruction *I); 168 bool selectFPTrunc(const Instruction *I); 169 bool selectFPToInt(const Instruction *I, bool Signed); 170 bool selectIntToFP(const Instruction *I, bool Signed); 171 bool selectRem(const Instruction *I, unsigned ISDOpcode); 172 bool selectRet(const Instruction *I); 173 bool selectTrunc(const Instruction *I); 174 bool selectIntExt(const Instruction *I); 175 bool selectMul(const Instruction *I); 176 bool selectShift(const Instruction *I); 177 bool selectBitCast(const Instruction *I); 178 bool selectFRem(const Instruction *I); 179 bool selectSDiv(const Instruction *I); 180 bool selectGetElementPtr(const Instruction *I); 181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 182 183 // Utility helper routines. 184 bool isTypeLegal(Type *Ty, MVT &VT); 185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 186 bool isValueAvailable(const Value *V) const; 187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 188 bool computeCallAddress(const Value *V, Address &Addr); 189 bool simplifyAddress(Address &Addr, MVT VT); 190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 191 MachineMemOperand::Flags Flags, 192 unsigned ScaleFactor, MachineMemOperand *MMO); 193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); 194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 195 MaybeAlign Alignment); 196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 197 const Value *Cond); 198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 199 bool optimizeSelect(const SelectInst *SI); 200 unsigned getRegForGEPIndex(const Value *Idx); 201 202 // Emit helper routines. 203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 204 const Value *RHS, bool SetFlags = false, 205 bool WantResult = true, bool IsZExt = false); 206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 207 unsigned RHSReg, bool SetFlags = false, 208 bool WantResult = true); 209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 210 uint64_t Imm, bool SetFlags = false, 211 bool WantResult = true); 212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, 214 uint64_t ShiftImm, bool SetFlags = false, 215 bool WantResult = true); 216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, 218 uint64_t ShiftImm, bool SetFlags = false, 219 bool WantResult = true); 220 221 // Emit functions. 222 bool emitCompareAndBranch(const BranchInst *BI); 223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 228 MachineMemOperand *MMO = nullptr); 229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 230 MachineMemOperand *MMO = nullptr); 231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 232 MachineMemOperand *MMO = nullptr); 233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 236 bool SetFlags = false, bool WantResult = true, 237 bool IsZExt = false); 238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); 239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 240 bool SetFlags = false, bool WantResult = true, 241 bool IsZExt = false); 242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 243 bool WantResult = true); 244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 246 bool WantResult = true); 247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 248 const Value *RHS); 249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 250 uint64_t Imm); 251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 252 unsigned RHSReg, uint64_t ShiftImm); 253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); 255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 259 bool IsZExt = true); 260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 262 bool IsZExt = true); 263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 265 bool IsZExt = false); 266 267 unsigned materializeInt(const ConstantInt *CI, MVT VT); 268 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 269 unsigned materializeGV(const GlobalValue *GV); 270 271 // Call handling routines. 272 private: 273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 275 unsigned &NumBytes); 276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes); 277 278 public: 279 // Backend specific FastISel code. 280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 281 unsigned fastMaterializeConstant(const Constant *C) override; 282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 283 284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 285 const TargetLibraryInfo *LibInfo) 286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); 288 Context = &FuncInfo.Fn->getContext(); 289 } 290 291 bool fastSelectInstruction(const Instruction *I) override; 292 293 #include "AArch64GenFastISel.inc" 294 }; 295 296 } // end anonymous namespace 297 298 /// Check if the sign-/zero-extend will be a noop. 299 static bool isIntExtFree(const Instruction *I) { 300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 301 "Unexpected integer extend instruction."); 302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 303 "Unexpected value type."); 304 bool IsZExt = isa<ZExtInst>(I); 305 306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 307 if (LI->hasOneUse()) 308 return true; 309 310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 312 return true; 313 314 return false; 315 } 316 317 /// Determine the implicit scale factor that is applied by a memory 318 /// operation for a given value type. 319 static unsigned getImplicitScaleFactor(MVT VT) { 320 switch (VT.SimpleTy) { 321 default: 322 return 0; // invalid 323 case MVT::i1: // fall-through 324 case MVT::i8: 325 return 1; 326 case MVT::i16: 327 return 2; 328 case MVT::i32: // fall-through 329 case MVT::f32: 330 return 4; 331 case MVT::i64: // fall-through 332 case MVT::f64: 333 return 8; 334 } 335 } 336 337 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 338 if (CC == CallingConv::GHC) 339 return CC_AArch64_GHC; 340 if (CC == CallingConv::CFGuard_Check) 341 return CC_AArch64_Win64_CFGuard_Check; 342 if (Subtarget->isTargetDarwin()) 343 return CC_AArch64_DarwinPCS; 344 if (Subtarget->isTargetWindows()) 345 return CC_AArch64_Win64PCS; 346 return CC_AArch64_AAPCS; 347 } 348 349 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 350 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 351 "Alloca should always return a pointer."); 352 353 // Don't handle dynamic allocas. 354 if (!FuncInfo.StaticAllocaMap.count(AI)) 355 return 0; 356 357 DenseMap<const AllocaInst *, int>::iterator SI = 358 FuncInfo.StaticAllocaMap.find(AI); 359 360 if (SI != FuncInfo.StaticAllocaMap.end()) { 361 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 363 ResultReg) 364 .addFrameIndex(SI->second) 365 .addImm(0) 366 .addImm(0); 367 return ResultReg; 368 } 369 370 return 0; 371 } 372 373 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 374 if (VT > MVT::i64) 375 return 0; 376 377 if (!CI->isZero()) 378 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 379 380 // Create a copy from the zero register to materialize a "0" value. 381 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 382 : &AArch64::GPR32RegClass; 383 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 384 Register ResultReg = createResultReg(RC); 385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 386 ResultReg).addReg(ZeroReg, getKillRegState(true)); 387 return ResultReg; 388 } 389 390 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 391 // Positive zero (+0.0) has to be materialized with a fmov from the zero 392 // register, because the immediate version of fmov cannot encode zero. 393 if (CFP->isNullValue()) 394 return fastMaterializeFloatZero(CFP); 395 396 if (VT != MVT::f32 && VT != MVT::f64) 397 return 0; 398 399 const APFloat Val = CFP->getValueAPF(); 400 bool Is64Bit = (VT == MVT::f64); 401 // This checks to see if we can use FMOV instructions to materialize 402 // a constant, otherwise we have to materialize via the constant pool. 403 int Imm = 404 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 405 if (Imm != -1) { 406 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 407 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 408 } 409 410 // For the large code model materialize the FP constant in code. 411 if (TM.getCodeModel() == CodeModel::Large) { 412 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 413 const TargetRegisterClass *RC = Is64Bit ? 414 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 415 416 Register TmpReg = createResultReg(RC); 417 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg) 418 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 419 420 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 422 TII.get(TargetOpcode::COPY), ResultReg) 423 .addReg(TmpReg, getKillRegState(true)); 424 425 return ResultReg; 426 } 427 428 // Materialize via constant pool. MachineConstantPool wants an explicit 429 // alignment. 430 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 431 432 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 433 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 435 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 436 437 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 438 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 440 .addReg(ADRPReg) 441 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 442 return ResultReg; 443 } 444 445 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 446 // We can't handle thread-local variables quickly yet. 447 if (GV->isThreadLocal()) 448 return 0; 449 450 // MachO still uses GOT for large code-model accesses, but ELF requires 451 // movz/movk sequences, which FastISel doesn't handle yet. 452 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 453 return 0; 454 455 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 456 457 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 458 if (!DestEVT.isSimple()) 459 return 0; 460 461 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 462 unsigned ResultReg; 463 464 if (OpFlags & AArch64II::MO_GOT) { 465 // ADRP + LDRX 466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 467 ADRPReg) 468 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 469 470 unsigned LdrOpc; 471 if (Subtarget->isTargetILP32()) { 472 ResultReg = createResultReg(&AArch64::GPR32RegClass); 473 LdrOpc = AArch64::LDRWui; 474 } else { 475 ResultReg = createResultReg(&AArch64::GPR64RegClass); 476 LdrOpc = AArch64::LDRXui; 477 } 478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc), 479 ResultReg) 480 .addReg(ADRPReg) 481 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 482 AArch64II::MO_NC | OpFlags); 483 if (!Subtarget->isTargetILP32()) 484 return ResultReg; 485 486 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 487 // so we must extend the result on ILP32. 488 Register Result64 = createResultReg(&AArch64::GPR64RegClass); 489 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 490 TII.get(TargetOpcode::SUBREG_TO_REG)) 491 .addDef(Result64) 492 .addImm(0) 493 .addReg(ResultReg, RegState::Kill) 494 .addImm(AArch64::sub_32); 495 return Result64; 496 } else { 497 // ADRP + ADDX 498 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 499 ADRPReg) 500 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 501 502 if (OpFlags & AArch64II::MO_TAGGED) { 503 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 504 // We do so by creating a MOVK that sets bits 48-63 of the register to 505 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 506 // the small code model so we can assume a binary size of <= 4GB, which 507 // makes the untagged PC relative offset positive. The binary must also be 508 // loaded into address range [0, 2^48). Both of these properties need to 509 // be ensured at runtime when using tagged addresses. 510 // 511 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that 512 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands 513 // are not exactly 1:1 with FastISel so we cannot easily abstract this 514 // out. At some point, it would be nice to find a way to not have this 515 // duplciate code. 516 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass); 517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi), 518 DstReg) 519 .addReg(ADRPReg) 520 .addGlobalAddress(GV, /*Offset=*/0x100000000, 521 AArch64II::MO_PREL | AArch64II::MO_G3) 522 .addImm(48); 523 ADRPReg = DstReg; 524 } 525 526 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 528 ResultReg) 529 .addReg(ADRPReg) 530 .addGlobalAddress(GV, 0, 531 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 532 .addImm(0); 533 } 534 return ResultReg; 535 } 536 537 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 538 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 539 540 // Only handle simple types. 541 if (!CEVT.isSimple()) 542 return 0; 543 MVT VT = CEVT.getSimpleVT(); 544 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 545 // 'null' pointers need to have a somewhat special treatment. 546 if (isa<ConstantPointerNull>(C)) { 547 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 548 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 549 } 550 551 if (const auto *CI = dyn_cast<ConstantInt>(C)) 552 return materializeInt(CI, VT); 553 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 554 return materializeFP(CFP, VT); 555 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 556 return materializeGV(GV); 557 558 return 0; 559 } 560 561 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 562 assert(CFP->isNullValue() && 563 "Floating-point constant is not a positive zero."); 564 MVT VT; 565 if (!isTypeLegal(CFP->getType(), VT)) 566 return 0; 567 568 if (VT != MVT::f32 && VT != MVT::f64) 569 return 0; 570 571 bool Is64Bit = (VT == MVT::f64); 572 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 573 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 574 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg); 575 } 576 577 /// Check if the multiply is by a power-of-2 constant. 578 static bool isMulPowOf2(const Value *I) { 579 if (const auto *MI = dyn_cast<MulOperator>(I)) { 580 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 581 if (C->getValue().isPowerOf2()) 582 return true; 583 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 584 if (C->getValue().isPowerOf2()) 585 return true; 586 } 587 return false; 588 } 589 590 // Computes the address to get to an object. 591 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 592 { 593 const User *U = nullptr; 594 unsigned Opcode = Instruction::UserOp1; 595 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 596 // Don't walk into other basic blocks unless the object is an alloca from 597 // another block, otherwise it may not have a virtual register assigned. 598 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 599 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 600 Opcode = I->getOpcode(); 601 U = I; 602 } 603 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 604 Opcode = C->getOpcode(); 605 U = C; 606 } 607 608 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 609 if (Ty->getAddressSpace() > 255) 610 // Fast instruction selection doesn't support the special 611 // address spaces. 612 return false; 613 614 switch (Opcode) { 615 default: 616 break; 617 case Instruction::BitCast: 618 // Look through bitcasts. 619 return computeAddress(U->getOperand(0), Addr, Ty); 620 621 case Instruction::IntToPtr: 622 // Look past no-op inttoptrs. 623 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 624 TLI.getPointerTy(DL)) 625 return computeAddress(U->getOperand(0), Addr, Ty); 626 break; 627 628 case Instruction::PtrToInt: 629 // Look past no-op ptrtoints. 630 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 631 return computeAddress(U->getOperand(0), Addr, Ty); 632 break; 633 634 case Instruction::GetElementPtr: { 635 Address SavedAddr = Addr; 636 uint64_t TmpOffset = Addr.getOffset(); 637 638 // Iterate through the GEP folding the constants into offsets where 639 // we can. 640 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 641 GTI != E; ++GTI) { 642 const Value *Op = GTI.getOperand(); 643 if (StructType *STy = GTI.getStructTypeOrNull()) { 644 const StructLayout *SL = DL.getStructLayout(STy); 645 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 646 TmpOffset += SL->getElementOffset(Idx); 647 } else { 648 uint64_t S = GTI.getSequentialElementStride(DL); 649 while (true) { 650 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 651 // Constant-offset addressing. 652 TmpOffset += CI->getSExtValue() * S; 653 break; 654 } 655 if (canFoldAddIntoGEP(U, Op)) { 656 // A compatible add with a constant operand. Fold the constant. 657 ConstantInt *CI = 658 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 659 TmpOffset += CI->getSExtValue() * S; 660 // Iterate on the other operand. 661 Op = cast<AddOperator>(Op)->getOperand(0); 662 continue; 663 } 664 // Unsupported 665 goto unsupported_gep; 666 } 667 } 668 } 669 670 // Try to grab the base operand now. 671 Addr.setOffset(TmpOffset); 672 if (computeAddress(U->getOperand(0), Addr, Ty)) 673 return true; 674 675 // We failed, restore everything and try the other options. 676 Addr = SavedAddr; 677 678 unsupported_gep: 679 break; 680 } 681 case Instruction::Alloca: { 682 const AllocaInst *AI = cast<AllocaInst>(Obj); 683 DenseMap<const AllocaInst *, int>::iterator SI = 684 FuncInfo.StaticAllocaMap.find(AI); 685 if (SI != FuncInfo.StaticAllocaMap.end()) { 686 Addr.setKind(Address::FrameIndexBase); 687 Addr.setFI(SI->second); 688 return true; 689 } 690 break; 691 } 692 case Instruction::Add: { 693 // Adds of constants are common and easy enough. 694 const Value *LHS = U->getOperand(0); 695 const Value *RHS = U->getOperand(1); 696 697 if (isa<ConstantInt>(LHS)) 698 std::swap(LHS, RHS); 699 700 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 701 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 702 return computeAddress(LHS, Addr, Ty); 703 } 704 705 Address Backup = Addr; 706 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 707 return true; 708 Addr = Backup; 709 710 break; 711 } 712 case Instruction::Sub: { 713 // Subs of constants are common and easy enough. 714 const Value *LHS = U->getOperand(0); 715 const Value *RHS = U->getOperand(1); 716 717 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 718 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 719 return computeAddress(LHS, Addr, Ty); 720 } 721 break; 722 } 723 case Instruction::Shl: { 724 if (Addr.getOffsetReg()) 725 break; 726 727 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 728 if (!CI) 729 break; 730 731 unsigned Val = CI->getZExtValue(); 732 if (Val < 1 || Val > 3) 733 break; 734 735 uint64_t NumBytes = 0; 736 if (Ty && Ty->isSized()) { 737 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 738 NumBytes = NumBits / 8; 739 if (!isPowerOf2_64(NumBits)) 740 NumBytes = 0; 741 } 742 743 if (NumBytes != (1ULL << Val)) 744 break; 745 746 Addr.setShift(Val); 747 Addr.setExtendType(AArch64_AM::LSL); 748 749 const Value *Src = U->getOperand(0); 750 if (const auto *I = dyn_cast<Instruction>(Src)) { 751 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 752 // Fold the zext or sext when it won't become a noop. 753 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 754 if (!isIntExtFree(ZE) && 755 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 756 Addr.setExtendType(AArch64_AM::UXTW); 757 Src = ZE->getOperand(0); 758 } 759 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 760 if (!isIntExtFree(SE) && 761 SE->getOperand(0)->getType()->isIntegerTy(32)) { 762 Addr.setExtendType(AArch64_AM::SXTW); 763 Src = SE->getOperand(0); 764 } 765 } 766 } 767 } 768 769 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 770 if (AI->getOpcode() == Instruction::And) { 771 const Value *LHS = AI->getOperand(0); 772 const Value *RHS = AI->getOperand(1); 773 774 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 775 if (C->getValue() == 0xffffffff) 776 std::swap(LHS, RHS); 777 778 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 779 if (C->getValue() == 0xffffffff) { 780 Addr.setExtendType(AArch64_AM::UXTW); 781 Register Reg = getRegForValue(LHS); 782 if (!Reg) 783 return false; 784 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 785 Addr.setOffsetReg(Reg); 786 return true; 787 } 788 } 789 790 Register Reg = getRegForValue(Src); 791 if (!Reg) 792 return false; 793 Addr.setOffsetReg(Reg); 794 return true; 795 } 796 case Instruction::Mul: { 797 if (Addr.getOffsetReg()) 798 break; 799 800 if (!isMulPowOf2(U)) 801 break; 802 803 const Value *LHS = U->getOperand(0); 804 const Value *RHS = U->getOperand(1); 805 806 // Canonicalize power-of-2 value to the RHS. 807 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 808 if (C->getValue().isPowerOf2()) 809 std::swap(LHS, RHS); 810 811 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 812 const auto *C = cast<ConstantInt>(RHS); 813 unsigned Val = C->getValue().logBase2(); 814 if (Val < 1 || Val > 3) 815 break; 816 817 uint64_t NumBytes = 0; 818 if (Ty && Ty->isSized()) { 819 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 820 NumBytes = NumBits / 8; 821 if (!isPowerOf2_64(NumBits)) 822 NumBytes = 0; 823 } 824 825 if (NumBytes != (1ULL << Val)) 826 break; 827 828 Addr.setShift(Val); 829 Addr.setExtendType(AArch64_AM::LSL); 830 831 const Value *Src = LHS; 832 if (const auto *I = dyn_cast<Instruction>(Src)) { 833 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 834 // Fold the zext or sext when it won't become a noop. 835 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 836 if (!isIntExtFree(ZE) && 837 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 838 Addr.setExtendType(AArch64_AM::UXTW); 839 Src = ZE->getOperand(0); 840 } 841 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 842 if (!isIntExtFree(SE) && 843 SE->getOperand(0)->getType()->isIntegerTy(32)) { 844 Addr.setExtendType(AArch64_AM::SXTW); 845 Src = SE->getOperand(0); 846 } 847 } 848 } 849 } 850 851 Register Reg = getRegForValue(Src); 852 if (!Reg) 853 return false; 854 Addr.setOffsetReg(Reg); 855 return true; 856 } 857 case Instruction::And: { 858 if (Addr.getOffsetReg()) 859 break; 860 861 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 862 break; 863 864 const Value *LHS = U->getOperand(0); 865 const Value *RHS = U->getOperand(1); 866 867 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 868 if (C->getValue() == 0xffffffff) 869 std::swap(LHS, RHS); 870 871 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 872 if (C->getValue() == 0xffffffff) { 873 Addr.setShift(0); 874 Addr.setExtendType(AArch64_AM::LSL); 875 Addr.setExtendType(AArch64_AM::UXTW); 876 877 Register Reg = getRegForValue(LHS); 878 if (!Reg) 879 return false; 880 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 881 Addr.setOffsetReg(Reg); 882 return true; 883 } 884 break; 885 } 886 case Instruction::SExt: 887 case Instruction::ZExt: { 888 if (!Addr.getReg() || Addr.getOffsetReg()) 889 break; 890 891 const Value *Src = nullptr; 892 // Fold the zext or sext when it won't become a noop. 893 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 894 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 895 Addr.setExtendType(AArch64_AM::UXTW); 896 Src = ZE->getOperand(0); 897 } 898 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 899 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 900 Addr.setExtendType(AArch64_AM::SXTW); 901 Src = SE->getOperand(0); 902 } 903 } 904 905 if (!Src) 906 break; 907 908 Addr.setShift(0); 909 Register Reg = getRegForValue(Src); 910 if (!Reg) 911 return false; 912 Addr.setOffsetReg(Reg); 913 return true; 914 } 915 } // end switch 916 917 if (Addr.isRegBase() && !Addr.getReg()) { 918 Register Reg = getRegForValue(Obj); 919 if (!Reg) 920 return false; 921 Addr.setReg(Reg); 922 return true; 923 } 924 925 if (!Addr.getOffsetReg()) { 926 Register Reg = getRegForValue(Obj); 927 if (!Reg) 928 return false; 929 Addr.setOffsetReg(Reg); 930 return true; 931 } 932 933 return false; 934 } 935 936 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 937 const User *U = nullptr; 938 unsigned Opcode = Instruction::UserOp1; 939 bool InMBB = true; 940 941 if (const auto *I = dyn_cast<Instruction>(V)) { 942 Opcode = I->getOpcode(); 943 U = I; 944 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 945 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 946 Opcode = C->getOpcode(); 947 U = C; 948 } 949 950 switch (Opcode) { 951 default: break; 952 case Instruction::BitCast: 953 // Look past bitcasts if its operand is in the same BB. 954 if (InMBB) 955 return computeCallAddress(U->getOperand(0), Addr); 956 break; 957 case Instruction::IntToPtr: 958 // Look past no-op inttoptrs if its operand is in the same BB. 959 if (InMBB && 960 TLI.getValueType(DL, U->getOperand(0)->getType()) == 961 TLI.getPointerTy(DL)) 962 return computeCallAddress(U->getOperand(0), Addr); 963 break; 964 case Instruction::PtrToInt: 965 // Look past no-op ptrtoints if its operand is in the same BB. 966 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 967 return computeCallAddress(U->getOperand(0), Addr); 968 break; 969 } 970 971 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 972 Addr.setGlobalValue(GV); 973 return true; 974 } 975 976 // If all else fails, try to materialize the value in a register. 977 if (!Addr.getGlobalValue()) { 978 Addr.setReg(getRegForValue(V)); 979 return Addr.getReg() != 0; 980 } 981 982 return false; 983 } 984 985 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 986 EVT evt = TLI.getValueType(DL, Ty, true); 987 988 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 989 return false; 990 991 // Only handle simple types. 992 if (evt == MVT::Other || !evt.isSimple()) 993 return false; 994 VT = evt.getSimpleVT(); 995 996 // This is a legal type, but it's not something we handle in fast-isel. 997 if (VT == MVT::f128) 998 return false; 999 1000 // Handle all other legal types, i.e. a register that will directly hold this 1001 // value. 1002 return TLI.isTypeLegal(VT); 1003 } 1004 1005 /// Determine if the value type is supported by FastISel. 1006 /// 1007 /// FastISel for AArch64 can handle more value types than are legal. This adds 1008 /// simple value type such as i1, i8, and i16. 1009 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 1010 if (Ty->isVectorTy() && !IsVectorAllowed) 1011 return false; 1012 1013 if (isTypeLegal(Ty, VT)) 1014 return true; 1015 1016 // If this is a type than can be sign or zero-extended to a basic operation 1017 // go ahead and accept it now. 1018 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 1019 return true; 1020 1021 return false; 1022 } 1023 1024 bool AArch64FastISel::isValueAvailable(const Value *V) const { 1025 if (!isa<Instruction>(V)) 1026 return true; 1027 1028 const auto *I = cast<Instruction>(V); 1029 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 1030 } 1031 1032 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1033 if (Subtarget->isTargetILP32()) 1034 return false; 1035 1036 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1037 if (!ScaleFactor) 1038 return false; 1039 1040 bool ImmediateOffsetNeedsLowering = false; 1041 bool RegisterOffsetNeedsLowering = false; 1042 int64_t Offset = Addr.getOffset(); 1043 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1044 ImmediateOffsetNeedsLowering = true; 1045 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1046 !isUInt<12>(Offset / ScaleFactor)) 1047 ImmediateOffsetNeedsLowering = true; 1048 1049 // Cannot encode an offset register and an immediate offset in the same 1050 // instruction. Fold the immediate offset into the load/store instruction and 1051 // emit an additional add to take care of the offset register. 1052 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1053 RegisterOffsetNeedsLowering = true; 1054 1055 // Cannot encode zero register as base. 1056 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1057 RegisterOffsetNeedsLowering = true; 1058 1059 // If this is a stack pointer and the offset needs to be simplified then put 1060 // the alloca address into a register, set the base type back to register and 1061 // continue. This should almost never happen. 1062 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1063 { 1064 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1065 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 1066 ResultReg) 1067 .addFrameIndex(Addr.getFI()) 1068 .addImm(0) 1069 .addImm(0); 1070 Addr.setKind(Address::RegBase); 1071 Addr.setReg(ResultReg); 1072 } 1073 1074 if (RegisterOffsetNeedsLowering) { 1075 unsigned ResultReg = 0; 1076 if (Addr.getReg()) { 1077 if (Addr.getExtendType() == AArch64_AM::SXTW || 1078 Addr.getExtendType() == AArch64_AM::UXTW ) 1079 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1080 Addr.getOffsetReg(), Addr.getExtendType(), 1081 Addr.getShift()); 1082 else 1083 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1084 Addr.getOffsetReg(), AArch64_AM::LSL, 1085 Addr.getShift()); 1086 } else { 1087 if (Addr.getExtendType() == AArch64_AM::UXTW) 1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1089 Addr.getShift(), /*IsZExt=*/true); 1090 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1092 Addr.getShift(), /*IsZExt=*/false); 1093 else 1094 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1095 Addr.getShift()); 1096 } 1097 if (!ResultReg) 1098 return false; 1099 1100 Addr.setReg(ResultReg); 1101 Addr.setOffsetReg(0); 1102 Addr.setShift(0); 1103 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1104 } 1105 1106 // Since the offset is too large for the load/store instruction get the 1107 // reg+offset into a register. 1108 if (ImmediateOffsetNeedsLowering) { 1109 unsigned ResultReg; 1110 if (Addr.getReg()) 1111 // Try to fold the immediate into the add instruction. 1112 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); 1113 else 1114 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1115 1116 if (!ResultReg) 1117 return false; 1118 Addr.setReg(ResultReg); 1119 Addr.setOffset(0); 1120 } 1121 return true; 1122 } 1123 1124 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1125 const MachineInstrBuilder &MIB, 1126 MachineMemOperand::Flags Flags, 1127 unsigned ScaleFactor, 1128 MachineMemOperand *MMO) { 1129 int64_t Offset = Addr.getOffset() / ScaleFactor; 1130 // Frame base works a bit differently. Handle it separately. 1131 if (Addr.isFIBase()) { 1132 int FI = Addr.getFI(); 1133 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1134 // and alignment should be based on the VT. 1135 MMO = FuncInfo.MF->getMachineMemOperand( 1136 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1137 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1138 // Now add the rest of the operands. 1139 MIB.addFrameIndex(FI).addImm(Offset); 1140 } else { 1141 assert(Addr.isRegBase() && "Unexpected address kind."); 1142 const MCInstrDesc &II = MIB->getDesc(); 1143 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1144 Addr.setReg( 1145 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1146 Addr.setOffsetReg( 1147 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1148 if (Addr.getOffsetReg()) { 1149 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1150 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1151 Addr.getExtendType() == AArch64_AM::SXTX; 1152 MIB.addReg(Addr.getReg()); 1153 MIB.addReg(Addr.getOffsetReg()); 1154 MIB.addImm(IsSigned); 1155 MIB.addImm(Addr.getShift() != 0); 1156 } else 1157 MIB.addReg(Addr.getReg()).addImm(Offset); 1158 } 1159 1160 if (MMO) 1161 MIB.addMemOperand(MMO); 1162 } 1163 1164 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1165 const Value *RHS, bool SetFlags, 1166 bool WantResult, bool IsZExt) { 1167 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1168 bool NeedExtend = false; 1169 switch (RetVT.SimpleTy) { 1170 default: 1171 return 0; 1172 case MVT::i1: 1173 NeedExtend = true; 1174 break; 1175 case MVT::i8: 1176 NeedExtend = true; 1177 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1178 break; 1179 case MVT::i16: 1180 NeedExtend = true; 1181 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1182 break; 1183 case MVT::i32: // fall-through 1184 case MVT::i64: 1185 break; 1186 } 1187 MVT SrcVT = RetVT; 1188 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1189 1190 // Canonicalize immediates to the RHS first. 1191 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1192 std::swap(LHS, RHS); 1193 1194 // Canonicalize mul by power of 2 to the RHS. 1195 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1196 if (isMulPowOf2(LHS)) 1197 std::swap(LHS, RHS); 1198 1199 // Canonicalize shift immediate to the RHS. 1200 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1201 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1202 if (isa<ConstantInt>(SI->getOperand(1))) 1203 if (SI->getOpcode() == Instruction::Shl || 1204 SI->getOpcode() == Instruction::LShr || 1205 SI->getOpcode() == Instruction::AShr ) 1206 std::swap(LHS, RHS); 1207 1208 Register LHSReg = getRegForValue(LHS); 1209 if (!LHSReg) 1210 return 0; 1211 1212 if (NeedExtend) 1213 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1214 1215 unsigned ResultReg = 0; 1216 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1217 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1218 if (C->isNegative()) 1219 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags, 1220 WantResult); 1221 else 1222 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, 1223 WantResult); 1224 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1225 if (C->isNullValue()) 1226 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult); 1227 1228 if (ResultReg) 1229 return ResultReg; 1230 1231 // Only extend the RHS within the instruction if there is a valid extend type. 1232 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1233 isValueAvailable(RHS)) { 1234 Register RHSReg = getRegForValue(RHS); 1235 if (!RHSReg) 1236 return 0; 1237 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0, 1238 SetFlags, WantResult); 1239 } 1240 1241 // Check if the mul can be folded into the instruction. 1242 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1243 if (isMulPowOf2(RHS)) { 1244 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1245 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1246 1247 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1248 if (C->getValue().isPowerOf2()) 1249 std::swap(MulLHS, MulRHS); 1250 1251 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1252 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1253 Register RHSReg = getRegForValue(MulLHS); 1254 if (!RHSReg) 1255 return 0; 1256 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL, 1257 ShiftVal, SetFlags, WantResult); 1258 if (ResultReg) 1259 return ResultReg; 1260 } 1261 } 1262 1263 // Check if the shift can be folded into the instruction. 1264 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1265 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1266 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1267 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1268 switch (SI->getOpcode()) { 1269 default: break; 1270 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1271 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1272 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1273 } 1274 uint64_t ShiftVal = C->getZExtValue(); 1275 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1276 Register RHSReg = getRegForValue(SI->getOperand(0)); 1277 if (!RHSReg) 1278 return 0; 1279 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, 1280 ShiftVal, SetFlags, WantResult); 1281 if (ResultReg) 1282 return ResultReg; 1283 } 1284 } 1285 } 1286 } 1287 1288 Register RHSReg = getRegForValue(RHS); 1289 if (!RHSReg) 1290 return 0; 1291 1292 if (NeedExtend) 1293 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1294 1295 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); 1296 } 1297 1298 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1299 unsigned RHSReg, bool SetFlags, 1300 bool WantResult) { 1301 assert(LHSReg && RHSReg && "Invalid register number."); 1302 1303 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1304 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1305 return 0; 1306 1307 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1308 return 0; 1309 1310 static const unsigned OpcTable[2][2][2] = { 1311 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1312 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1313 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1314 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1315 }; 1316 bool Is64Bit = RetVT == MVT::i64; 1317 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1318 const TargetRegisterClass *RC = 1319 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1320 unsigned ResultReg; 1321 if (WantResult) 1322 ResultReg = createResultReg(RC); 1323 else 1324 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1325 1326 const MCInstrDesc &II = TII.get(Opc); 1327 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1328 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1329 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1330 .addReg(LHSReg) 1331 .addReg(RHSReg); 1332 return ResultReg; 1333 } 1334 1335 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1336 uint64_t Imm, bool SetFlags, 1337 bool WantResult) { 1338 assert(LHSReg && "Invalid register number."); 1339 1340 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1341 return 0; 1342 1343 unsigned ShiftImm; 1344 if (isUInt<12>(Imm)) 1345 ShiftImm = 0; 1346 else if ((Imm & 0xfff000) == Imm) { 1347 ShiftImm = 12; 1348 Imm >>= 12; 1349 } else 1350 return 0; 1351 1352 static const unsigned OpcTable[2][2][2] = { 1353 { { AArch64::SUBWri, AArch64::SUBXri }, 1354 { AArch64::ADDWri, AArch64::ADDXri } }, 1355 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1356 { AArch64::ADDSWri, AArch64::ADDSXri } } 1357 }; 1358 bool Is64Bit = RetVT == MVT::i64; 1359 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1360 const TargetRegisterClass *RC; 1361 if (SetFlags) 1362 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1363 else 1364 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1365 unsigned ResultReg; 1366 if (WantResult) 1367 ResultReg = createResultReg(RC); 1368 else 1369 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1370 1371 const MCInstrDesc &II = TII.get(Opc); 1372 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1374 .addReg(LHSReg) 1375 .addImm(Imm) 1376 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1377 return ResultReg; 1378 } 1379 1380 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1381 unsigned RHSReg, 1382 AArch64_AM::ShiftExtendType ShiftType, 1383 uint64_t ShiftImm, bool SetFlags, 1384 bool WantResult) { 1385 assert(LHSReg && RHSReg && "Invalid register number."); 1386 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1387 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1388 1389 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1390 return 0; 1391 1392 // Don't deal with undefined shifts. 1393 if (ShiftImm >= RetVT.getSizeInBits()) 1394 return 0; 1395 1396 static const unsigned OpcTable[2][2][2] = { 1397 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1398 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1399 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1400 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1401 }; 1402 bool Is64Bit = RetVT == MVT::i64; 1403 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1404 const TargetRegisterClass *RC = 1405 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1406 unsigned ResultReg; 1407 if (WantResult) 1408 ResultReg = createResultReg(RC); 1409 else 1410 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1411 1412 const MCInstrDesc &II = TII.get(Opc); 1413 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1414 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1415 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1416 .addReg(LHSReg) 1417 .addReg(RHSReg) 1418 .addImm(getShifterImm(ShiftType, ShiftImm)); 1419 return ResultReg; 1420 } 1421 1422 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1423 unsigned RHSReg, 1424 AArch64_AM::ShiftExtendType ExtType, 1425 uint64_t ShiftImm, bool SetFlags, 1426 bool WantResult) { 1427 assert(LHSReg && RHSReg && "Invalid register number."); 1428 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1429 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1430 1431 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1432 return 0; 1433 1434 if (ShiftImm >= 4) 1435 return 0; 1436 1437 static const unsigned OpcTable[2][2][2] = { 1438 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1439 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1440 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1441 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1442 }; 1443 bool Is64Bit = RetVT == MVT::i64; 1444 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1445 const TargetRegisterClass *RC = nullptr; 1446 if (SetFlags) 1447 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1448 else 1449 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1450 unsigned ResultReg; 1451 if (WantResult) 1452 ResultReg = createResultReg(RC); 1453 else 1454 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1455 1456 const MCInstrDesc &II = TII.get(Opc); 1457 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1458 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1460 .addReg(LHSReg) 1461 .addReg(RHSReg) 1462 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1463 return ResultReg; 1464 } 1465 1466 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1467 Type *Ty = LHS->getType(); 1468 EVT EVT = TLI.getValueType(DL, Ty, true); 1469 if (!EVT.isSimple()) 1470 return false; 1471 MVT VT = EVT.getSimpleVT(); 1472 1473 switch (VT.SimpleTy) { 1474 default: 1475 return false; 1476 case MVT::i1: 1477 case MVT::i8: 1478 case MVT::i16: 1479 case MVT::i32: 1480 case MVT::i64: 1481 return emitICmp(VT, LHS, RHS, IsZExt); 1482 case MVT::f32: 1483 case MVT::f64: 1484 return emitFCmp(VT, LHS, RHS); 1485 } 1486 } 1487 1488 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1489 bool IsZExt) { 1490 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1491 IsZExt) != 0; 1492 } 1493 1494 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { 1495 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, 1496 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1497 } 1498 1499 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1500 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1501 return false; 1502 1503 // Check to see if the 2nd operand is a constant that we can encode directly 1504 // in the compare. 1505 bool UseImm = false; 1506 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1507 if (CFP->isZero() && !CFP->isNegative()) 1508 UseImm = true; 1509 1510 Register LHSReg = getRegForValue(LHS); 1511 if (!LHSReg) 1512 return false; 1513 1514 if (UseImm) { 1515 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1516 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1517 .addReg(LHSReg); 1518 return true; 1519 } 1520 1521 Register RHSReg = getRegForValue(RHS); 1522 if (!RHSReg) 1523 return false; 1524 1525 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1526 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1527 .addReg(LHSReg) 1528 .addReg(RHSReg); 1529 return true; 1530 } 1531 1532 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1533 bool SetFlags, bool WantResult, bool IsZExt) { 1534 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1535 IsZExt); 1536 } 1537 1538 /// This method is a wrapper to simplify add emission. 1539 /// 1540 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1541 /// that fails, then try to materialize the immediate into a register and use 1542 /// emitAddSub_rr instead. 1543 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { 1544 unsigned ResultReg; 1545 if (Imm < 0) 1546 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm); 1547 else 1548 ResultReg = emitAddSub_ri(true, VT, Op0, Imm); 1549 1550 if (ResultReg) 1551 return ResultReg; 1552 1553 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1554 if (!CReg) 1555 return 0; 1556 1557 ResultReg = emitAddSub_rr(true, VT, Op0, CReg); 1558 return ResultReg; 1559 } 1560 1561 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1562 bool SetFlags, bool WantResult, bool IsZExt) { 1563 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1564 IsZExt); 1565 } 1566 1567 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1568 unsigned RHSReg, bool WantResult) { 1569 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, 1570 /*SetFlags=*/true, WantResult); 1571 } 1572 1573 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1574 unsigned RHSReg, 1575 AArch64_AM::ShiftExtendType ShiftType, 1576 uint64_t ShiftImm, bool WantResult) { 1577 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, 1578 ShiftImm, /*SetFlags=*/true, WantResult); 1579 } 1580 1581 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1582 const Value *LHS, const Value *RHS) { 1583 // Canonicalize immediates to the RHS first. 1584 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1585 std::swap(LHS, RHS); 1586 1587 // Canonicalize mul by power-of-2 to the RHS. 1588 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1589 if (isMulPowOf2(LHS)) 1590 std::swap(LHS, RHS); 1591 1592 // Canonicalize shift immediate to the RHS. 1593 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1594 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1595 if (isa<ConstantInt>(SI->getOperand(1))) 1596 std::swap(LHS, RHS); 1597 1598 Register LHSReg = getRegForValue(LHS); 1599 if (!LHSReg) 1600 return 0; 1601 1602 unsigned ResultReg = 0; 1603 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1604 uint64_t Imm = C->getZExtValue(); 1605 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); 1606 } 1607 if (ResultReg) 1608 return ResultReg; 1609 1610 // Check if the mul can be folded into the instruction. 1611 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1612 if (isMulPowOf2(RHS)) { 1613 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1614 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1615 1616 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1617 if (C->getValue().isPowerOf2()) 1618 std::swap(MulLHS, MulRHS); 1619 1620 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1621 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1622 1623 Register RHSReg = getRegForValue(MulLHS); 1624 if (!RHSReg) 1625 return 0; 1626 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1627 if (ResultReg) 1628 return ResultReg; 1629 } 1630 } 1631 1632 // Check if the shift can be folded into the instruction. 1633 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1634 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1635 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1636 uint64_t ShiftVal = C->getZExtValue(); 1637 Register RHSReg = getRegForValue(SI->getOperand(0)); 1638 if (!RHSReg) 1639 return 0; 1640 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1641 if (ResultReg) 1642 return ResultReg; 1643 } 1644 } 1645 1646 Register RHSReg = getRegForValue(RHS); 1647 if (!RHSReg) 1648 return 0; 1649 1650 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1651 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg); 1652 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1653 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1654 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1655 } 1656 return ResultReg; 1657 } 1658 1659 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1660 unsigned LHSReg, uint64_t Imm) { 1661 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1662 "ISD nodes are not consecutive!"); 1663 static const unsigned OpcTable[3][2] = { 1664 { AArch64::ANDWri, AArch64::ANDXri }, 1665 { AArch64::ORRWri, AArch64::ORRXri }, 1666 { AArch64::EORWri, AArch64::EORXri } 1667 }; 1668 const TargetRegisterClass *RC; 1669 unsigned Opc; 1670 unsigned RegSize; 1671 switch (RetVT.SimpleTy) { 1672 default: 1673 return 0; 1674 case MVT::i1: 1675 case MVT::i8: 1676 case MVT::i16: 1677 case MVT::i32: { 1678 unsigned Idx = ISDOpc - ISD::AND; 1679 Opc = OpcTable[Idx][0]; 1680 RC = &AArch64::GPR32spRegClass; 1681 RegSize = 32; 1682 break; 1683 } 1684 case MVT::i64: 1685 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1686 RC = &AArch64::GPR64spRegClass; 1687 RegSize = 64; 1688 break; 1689 } 1690 1691 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1692 return 0; 1693 1694 Register ResultReg = 1695 fastEmitInst_ri(Opc, RC, LHSReg, 1696 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1697 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1698 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1699 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1700 } 1701 return ResultReg; 1702 } 1703 1704 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1705 unsigned LHSReg, unsigned RHSReg, 1706 uint64_t ShiftImm) { 1707 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1708 "ISD nodes are not consecutive!"); 1709 static const unsigned OpcTable[3][2] = { 1710 { AArch64::ANDWrs, AArch64::ANDXrs }, 1711 { AArch64::ORRWrs, AArch64::ORRXrs }, 1712 { AArch64::EORWrs, AArch64::EORXrs } 1713 }; 1714 1715 // Don't deal with undefined shifts. 1716 if (ShiftImm >= RetVT.getSizeInBits()) 1717 return 0; 1718 1719 const TargetRegisterClass *RC; 1720 unsigned Opc; 1721 switch (RetVT.SimpleTy) { 1722 default: 1723 return 0; 1724 case MVT::i1: 1725 case MVT::i8: 1726 case MVT::i16: 1727 case MVT::i32: 1728 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1729 RC = &AArch64::GPR32RegClass; 1730 break; 1731 case MVT::i64: 1732 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1733 RC = &AArch64::GPR64RegClass; 1734 break; 1735 } 1736 Register ResultReg = 1737 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg, 1738 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1739 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1740 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1741 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1742 } 1743 return ResultReg; 1744 } 1745 1746 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, 1747 uint64_t Imm) { 1748 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm); 1749 } 1750 1751 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1752 bool WantZExt, MachineMemOperand *MMO) { 1753 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1754 return 0; 1755 1756 // Simplify this down to something we can handle. 1757 if (!simplifyAddress(Addr, VT)) 1758 return 0; 1759 1760 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1761 if (!ScaleFactor) 1762 llvm_unreachable("Unexpected value type."); 1763 1764 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1765 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1766 bool UseScaled = true; 1767 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1768 UseScaled = false; 1769 ScaleFactor = 1; 1770 } 1771 1772 static const unsigned GPOpcTable[2][8][4] = { 1773 // Sign-extend. 1774 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1775 AArch64::LDURXi }, 1776 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1777 AArch64::LDURXi }, 1778 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1779 AArch64::LDRXui }, 1780 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1781 AArch64::LDRXui }, 1782 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1783 AArch64::LDRXroX }, 1784 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1785 AArch64::LDRXroX }, 1786 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1787 AArch64::LDRXroW }, 1788 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1789 AArch64::LDRXroW } 1790 }, 1791 // Zero-extend. 1792 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1793 AArch64::LDURXi }, 1794 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1795 AArch64::LDURXi }, 1796 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1797 AArch64::LDRXui }, 1798 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1799 AArch64::LDRXui }, 1800 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1801 AArch64::LDRXroX }, 1802 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1803 AArch64::LDRXroX }, 1804 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1805 AArch64::LDRXroW }, 1806 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1807 AArch64::LDRXroW } 1808 } 1809 }; 1810 1811 static const unsigned FPOpcTable[4][2] = { 1812 { AArch64::LDURSi, AArch64::LDURDi }, 1813 { AArch64::LDRSui, AArch64::LDRDui }, 1814 { AArch64::LDRSroX, AArch64::LDRDroX }, 1815 { AArch64::LDRSroW, AArch64::LDRDroW } 1816 }; 1817 1818 unsigned Opc; 1819 const TargetRegisterClass *RC; 1820 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1821 Addr.getOffsetReg(); 1822 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1823 if (Addr.getExtendType() == AArch64_AM::UXTW || 1824 Addr.getExtendType() == AArch64_AM::SXTW) 1825 Idx++; 1826 1827 bool IsRet64Bit = RetVT == MVT::i64; 1828 switch (VT.SimpleTy) { 1829 default: 1830 llvm_unreachable("Unexpected value type."); 1831 case MVT::i1: // Intentional fall-through. 1832 case MVT::i8: 1833 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1834 RC = (IsRet64Bit && !WantZExt) ? 1835 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1836 break; 1837 case MVT::i16: 1838 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1839 RC = (IsRet64Bit && !WantZExt) ? 1840 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1841 break; 1842 case MVT::i32: 1843 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1844 RC = (IsRet64Bit && !WantZExt) ? 1845 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1846 break; 1847 case MVT::i64: 1848 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1849 RC = &AArch64::GPR64RegClass; 1850 break; 1851 case MVT::f32: 1852 Opc = FPOpcTable[Idx][0]; 1853 RC = &AArch64::FPR32RegClass; 1854 break; 1855 case MVT::f64: 1856 Opc = FPOpcTable[Idx][1]; 1857 RC = &AArch64::FPR64RegClass; 1858 break; 1859 } 1860 1861 // Create the base instruction, then add the operands. 1862 Register ResultReg = createResultReg(RC); 1863 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1864 TII.get(Opc), ResultReg); 1865 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1866 1867 // Loading an i1 requires special handling. 1868 if (VT == MVT::i1) { 1869 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); 1870 assert(ANDReg && "Unexpected AND instruction emission failure."); 1871 ResultReg = ANDReg; 1872 } 1873 1874 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1875 // the 32bit reg to a 64bit reg. 1876 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1877 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 1878 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1879 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1880 .addImm(0) 1881 .addReg(ResultReg, getKillRegState(true)) 1882 .addImm(AArch64::sub_32); 1883 ResultReg = Reg64; 1884 } 1885 return ResultReg; 1886 } 1887 1888 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1889 MVT VT; 1890 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1891 return false; 1892 1893 if (VT.isVector()) 1894 return selectOperator(I, I->getOpcode()); 1895 1896 unsigned ResultReg; 1897 switch (I->getOpcode()) { 1898 default: 1899 llvm_unreachable("Unexpected instruction."); 1900 case Instruction::Add: 1901 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1902 break; 1903 case Instruction::Sub: 1904 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1905 break; 1906 } 1907 if (!ResultReg) 1908 return false; 1909 1910 updateValueMap(I, ResultReg); 1911 return true; 1912 } 1913 1914 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1915 MVT VT; 1916 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1917 return false; 1918 1919 if (VT.isVector()) 1920 return selectOperator(I, I->getOpcode()); 1921 1922 unsigned ResultReg; 1923 switch (I->getOpcode()) { 1924 default: 1925 llvm_unreachable("Unexpected instruction."); 1926 case Instruction::And: 1927 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1928 break; 1929 case Instruction::Or: 1930 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1931 break; 1932 case Instruction::Xor: 1933 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1934 break; 1935 } 1936 if (!ResultReg) 1937 return false; 1938 1939 updateValueMap(I, ResultReg); 1940 return true; 1941 } 1942 1943 bool AArch64FastISel::selectLoad(const Instruction *I) { 1944 MVT VT; 1945 // Verify we have a legal type before going any further. Currently, we handle 1946 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1947 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1948 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1949 cast<LoadInst>(I)->isAtomic()) 1950 return false; 1951 1952 const Value *SV = I->getOperand(0); 1953 if (TLI.supportSwiftError()) { 1954 // Swifterror values can come from either a function parameter with 1955 // swifterror attribute or an alloca with swifterror attribute. 1956 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1957 if (Arg->hasSwiftErrorAttr()) 1958 return false; 1959 } 1960 1961 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1962 if (Alloca->isSwiftError()) 1963 return false; 1964 } 1965 } 1966 1967 // See if we can handle this address. 1968 Address Addr; 1969 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1970 return false; 1971 1972 // Fold the following sign-/zero-extend into the load instruction. 1973 bool WantZExt = true; 1974 MVT RetVT = VT; 1975 const Value *IntExtVal = nullptr; 1976 if (I->hasOneUse()) { 1977 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1978 if (isTypeSupported(ZE->getType(), RetVT)) 1979 IntExtVal = ZE; 1980 else 1981 RetVT = VT; 1982 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1983 if (isTypeSupported(SE->getType(), RetVT)) 1984 IntExtVal = SE; 1985 else 1986 RetVT = VT; 1987 WantZExt = false; 1988 } 1989 } 1990 1991 unsigned ResultReg = 1992 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1993 if (!ResultReg) 1994 return false; 1995 1996 // There are a few different cases we have to handle, because the load or the 1997 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1998 // SelectionDAG. There is also an ordering issue when both instructions are in 1999 // different basic blocks. 2000 // 1.) The load instruction is selected by FastISel, but the integer extend 2001 // not. This usually happens when the integer extend is in a different 2002 // basic block and SelectionDAG took over for that basic block. 2003 // 2.) The load instruction is selected before the integer extend. This only 2004 // happens when the integer extend is in a different basic block. 2005 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2006 // by FastISel. This happens if there are instructions between the load 2007 // and the integer extend that couldn't be selected by FastISel. 2008 if (IntExtVal) { 2009 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2010 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2011 // it when it selects the integer extend. 2012 Register Reg = lookUpRegForValue(IntExtVal); 2013 auto *MI = MRI.getUniqueVRegDef(Reg); 2014 if (!MI) { 2015 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2016 if (WantZExt) { 2017 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2018 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2019 ResultReg = std::prev(I)->getOperand(0).getReg(); 2020 removeDeadCode(I, std::next(I)); 2021 } else 2022 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2023 AArch64::sub_32); 2024 } 2025 updateValueMap(I, ResultReg); 2026 return true; 2027 } 2028 2029 // The integer extend has already been emitted - delete all the instructions 2030 // that have been emitted by the integer extend lowering code and use the 2031 // result from the load instruction directly. 2032 while (MI) { 2033 Reg = 0; 2034 for (auto &Opnd : MI->uses()) { 2035 if (Opnd.isReg()) { 2036 Reg = Opnd.getReg(); 2037 break; 2038 } 2039 } 2040 MachineBasicBlock::iterator I(MI); 2041 removeDeadCode(I, std::next(I)); 2042 MI = nullptr; 2043 if (Reg) 2044 MI = MRI.getUniqueVRegDef(Reg); 2045 } 2046 updateValueMap(IntExtVal, ResultReg); 2047 return true; 2048 } 2049 2050 updateValueMap(I, ResultReg); 2051 return true; 2052 } 2053 2054 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2055 unsigned AddrReg, 2056 MachineMemOperand *MMO) { 2057 unsigned Opc; 2058 switch (VT.SimpleTy) { 2059 default: return false; 2060 case MVT::i8: Opc = AArch64::STLRB; break; 2061 case MVT::i16: Opc = AArch64::STLRH; break; 2062 case MVT::i32: Opc = AArch64::STLRW; break; 2063 case MVT::i64: Opc = AArch64::STLRX; break; 2064 } 2065 2066 const MCInstrDesc &II = TII.get(Opc); 2067 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2068 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2069 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2070 .addReg(SrcReg) 2071 .addReg(AddrReg) 2072 .addMemOperand(MMO); 2073 return true; 2074 } 2075 2076 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2077 MachineMemOperand *MMO) { 2078 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2079 return false; 2080 2081 // Simplify this down to something we can handle. 2082 if (!simplifyAddress(Addr, VT)) 2083 return false; 2084 2085 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2086 if (!ScaleFactor) 2087 llvm_unreachable("Unexpected value type."); 2088 2089 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2090 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2091 bool UseScaled = true; 2092 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2093 UseScaled = false; 2094 ScaleFactor = 1; 2095 } 2096 2097 static const unsigned OpcTable[4][6] = { 2098 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2099 AArch64::STURSi, AArch64::STURDi }, 2100 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2101 AArch64::STRSui, AArch64::STRDui }, 2102 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2103 AArch64::STRSroX, AArch64::STRDroX }, 2104 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2105 AArch64::STRSroW, AArch64::STRDroW } 2106 }; 2107 2108 unsigned Opc; 2109 bool VTIsi1 = false; 2110 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2111 Addr.getOffsetReg(); 2112 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2113 if (Addr.getExtendType() == AArch64_AM::UXTW || 2114 Addr.getExtendType() == AArch64_AM::SXTW) 2115 Idx++; 2116 2117 switch (VT.SimpleTy) { 2118 default: llvm_unreachable("Unexpected value type."); 2119 case MVT::i1: VTIsi1 = true; [[fallthrough]]; 2120 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2121 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2122 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2123 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2124 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2125 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2126 } 2127 2128 // Storing an i1 requires special handling. 2129 if (VTIsi1 && SrcReg != AArch64::WZR) { 2130 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); 2131 assert(ANDReg && "Unexpected AND instruction emission failure."); 2132 SrcReg = ANDReg; 2133 } 2134 // Create the base instruction, then add the operands. 2135 const MCInstrDesc &II = TII.get(Opc); 2136 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2137 MachineInstrBuilder MIB = 2138 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg); 2139 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2140 2141 return true; 2142 } 2143 2144 bool AArch64FastISel::selectStore(const Instruction *I) { 2145 MVT VT; 2146 const Value *Op0 = I->getOperand(0); 2147 // Verify we have a legal type before going any further. Currently, we handle 2148 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2149 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2150 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2151 return false; 2152 2153 const Value *PtrV = I->getOperand(1); 2154 if (TLI.supportSwiftError()) { 2155 // Swifterror values can come from either a function parameter with 2156 // swifterror attribute or an alloca with swifterror attribute. 2157 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2158 if (Arg->hasSwiftErrorAttr()) 2159 return false; 2160 } 2161 2162 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2163 if (Alloca->isSwiftError()) 2164 return false; 2165 } 2166 } 2167 2168 // Get the value to be stored into a register. Use the zero register directly 2169 // when possible to avoid an unnecessary copy and a wasted register. 2170 unsigned SrcReg = 0; 2171 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2172 if (CI->isZero()) 2173 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2174 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2175 if (CF->isZero() && !CF->isNegative()) { 2176 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2177 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2178 } 2179 } 2180 2181 if (!SrcReg) 2182 SrcReg = getRegForValue(Op0); 2183 2184 if (!SrcReg) 2185 return false; 2186 2187 auto *SI = cast<StoreInst>(I); 2188 2189 // Try to emit a STLR for seq_cst/release. 2190 if (SI->isAtomic()) { 2191 AtomicOrdering Ord = SI->getOrdering(); 2192 // The non-atomic instructions are sufficient for relaxed stores. 2193 if (isReleaseOrStronger(Ord)) { 2194 // The STLR addressing mode only supports a base reg; pass that directly. 2195 Register AddrReg = getRegForValue(PtrV); 2196 return emitStoreRelease(VT, SrcReg, AddrReg, 2197 createMachineMemOperandFor(I)); 2198 } 2199 } 2200 2201 // See if we can handle this address. 2202 Address Addr; 2203 if (!computeAddress(PtrV, Addr, Op0->getType())) 2204 return false; 2205 2206 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2207 return false; 2208 return true; 2209 } 2210 2211 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2212 switch (Pred) { 2213 case CmpInst::FCMP_ONE: 2214 case CmpInst::FCMP_UEQ: 2215 default: 2216 // AL is our "false" for now. The other two need more compares. 2217 return AArch64CC::AL; 2218 case CmpInst::ICMP_EQ: 2219 case CmpInst::FCMP_OEQ: 2220 return AArch64CC::EQ; 2221 case CmpInst::ICMP_SGT: 2222 case CmpInst::FCMP_OGT: 2223 return AArch64CC::GT; 2224 case CmpInst::ICMP_SGE: 2225 case CmpInst::FCMP_OGE: 2226 return AArch64CC::GE; 2227 case CmpInst::ICMP_UGT: 2228 case CmpInst::FCMP_UGT: 2229 return AArch64CC::HI; 2230 case CmpInst::FCMP_OLT: 2231 return AArch64CC::MI; 2232 case CmpInst::ICMP_ULE: 2233 case CmpInst::FCMP_OLE: 2234 return AArch64CC::LS; 2235 case CmpInst::FCMP_ORD: 2236 return AArch64CC::VC; 2237 case CmpInst::FCMP_UNO: 2238 return AArch64CC::VS; 2239 case CmpInst::FCMP_UGE: 2240 return AArch64CC::PL; 2241 case CmpInst::ICMP_SLT: 2242 case CmpInst::FCMP_ULT: 2243 return AArch64CC::LT; 2244 case CmpInst::ICMP_SLE: 2245 case CmpInst::FCMP_ULE: 2246 return AArch64CC::LE; 2247 case CmpInst::FCMP_UNE: 2248 case CmpInst::ICMP_NE: 2249 return AArch64CC::NE; 2250 case CmpInst::ICMP_UGE: 2251 return AArch64CC::HS; 2252 case CmpInst::ICMP_ULT: 2253 return AArch64CC::LO; 2254 } 2255 } 2256 2257 /// Try to emit a combined compare-and-branch instruction. 2258 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2259 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2260 // will not be produced, as they are conditional branch instructions that do 2261 // not set flags. 2262 if (FuncInfo.MF->getFunction().hasFnAttribute( 2263 Attribute::SpeculativeLoadHardening)) 2264 return false; 2265 2266 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2267 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2268 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2269 2270 const Value *LHS = CI->getOperand(0); 2271 const Value *RHS = CI->getOperand(1); 2272 2273 MVT VT; 2274 if (!isTypeSupported(LHS->getType(), VT)) 2275 return false; 2276 2277 unsigned BW = VT.getSizeInBits(); 2278 if (BW > 64) 2279 return false; 2280 2281 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2282 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2283 2284 // Try to take advantage of fallthrough opportunities. 2285 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2286 std::swap(TBB, FBB); 2287 Predicate = CmpInst::getInversePredicate(Predicate); 2288 } 2289 2290 int TestBit = -1; 2291 bool IsCmpNE; 2292 switch (Predicate) { 2293 default: 2294 return false; 2295 case CmpInst::ICMP_EQ: 2296 case CmpInst::ICMP_NE: 2297 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2298 std::swap(LHS, RHS); 2299 2300 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2301 return false; 2302 2303 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2304 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2305 const Value *AndLHS = AI->getOperand(0); 2306 const Value *AndRHS = AI->getOperand(1); 2307 2308 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2309 if (C->getValue().isPowerOf2()) 2310 std::swap(AndLHS, AndRHS); 2311 2312 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2313 if (C->getValue().isPowerOf2()) { 2314 TestBit = C->getValue().logBase2(); 2315 LHS = AndLHS; 2316 } 2317 } 2318 2319 if (VT == MVT::i1) 2320 TestBit = 0; 2321 2322 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2323 break; 2324 case CmpInst::ICMP_SLT: 2325 case CmpInst::ICMP_SGE: 2326 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2327 return false; 2328 2329 TestBit = BW - 1; 2330 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2331 break; 2332 case CmpInst::ICMP_SGT: 2333 case CmpInst::ICMP_SLE: 2334 if (!isa<ConstantInt>(RHS)) 2335 return false; 2336 2337 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2338 return false; 2339 2340 TestBit = BW - 1; 2341 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2342 break; 2343 } // end switch 2344 2345 static const unsigned OpcTable[2][2][2] = { 2346 { {AArch64::CBZW, AArch64::CBZX }, 2347 {AArch64::CBNZW, AArch64::CBNZX} }, 2348 { {AArch64::TBZW, AArch64::TBZX }, 2349 {AArch64::TBNZW, AArch64::TBNZX} } 2350 }; 2351 2352 bool IsBitTest = TestBit != -1; 2353 bool Is64Bit = BW == 64; 2354 if (TestBit < 32 && TestBit >= 0) 2355 Is64Bit = false; 2356 2357 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2358 const MCInstrDesc &II = TII.get(Opc); 2359 2360 Register SrcReg = getRegForValue(LHS); 2361 if (!SrcReg) 2362 return false; 2363 2364 if (BW == 64 && !Is64Bit) 2365 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); 2366 2367 if ((BW < 32) && !IsBitTest) 2368 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2369 2370 // Emit the combined compare and branch instruction. 2371 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2372 MachineInstrBuilder MIB = 2373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 2374 .addReg(SrcReg); 2375 if (IsBitTest) 2376 MIB.addImm(TestBit); 2377 MIB.addMBB(TBB); 2378 2379 finishCondBranch(BI->getParent(), TBB, FBB); 2380 return true; 2381 } 2382 2383 bool AArch64FastISel::selectBranch(const Instruction *I) { 2384 const BranchInst *BI = cast<BranchInst>(I); 2385 if (BI->isUnconditional()) { 2386 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2387 fastEmitBranch(MSucc, BI->getDebugLoc()); 2388 return true; 2389 } 2390 2391 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2392 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2393 2394 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2395 if (CI->hasOneUse() && isValueAvailable(CI)) { 2396 // Try to optimize or fold the cmp. 2397 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2398 switch (Predicate) { 2399 default: 2400 break; 2401 case CmpInst::FCMP_FALSE: 2402 fastEmitBranch(FBB, MIMD.getDL()); 2403 return true; 2404 case CmpInst::FCMP_TRUE: 2405 fastEmitBranch(TBB, MIMD.getDL()); 2406 return true; 2407 } 2408 2409 // Try to emit a combined compare-and-branch first. 2410 if (emitCompareAndBranch(BI)) 2411 return true; 2412 2413 // Try to take advantage of fallthrough opportunities. 2414 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2415 std::swap(TBB, FBB); 2416 Predicate = CmpInst::getInversePredicate(Predicate); 2417 } 2418 2419 // Emit the cmp. 2420 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2421 return false; 2422 2423 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2424 // instruction. 2425 AArch64CC::CondCode CC = getCompareCC(Predicate); 2426 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2427 switch (Predicate) { 2428 default: 2429 break; 2430 case CmpInst::FCMP_UEQ: 2431 ExtraCC = AArch64CC::EQ; 2432 CC = AArch64CC::VS; 2433 break; 2434 case CmpInst::FCMP_ONE: 2435 ExtraCC = AArch64CC::MI; 2436 CC = AArch64CC::GT; 2437 break; 2438 } 2439 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2440 2441 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2442 if (ExtraCC != AArch64CC::AL) { 2443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2444 .addImm(ExtraCC) 2445 .addMBB(TBB); 2446 } 2447 2448 // Emit the branch. 2449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2450 .addImm(CC) 2451 .addMBB(TBB); 2452 2453 finishCondBranch(BI->getParent(), TBB, FBB); 2454 return true; 2455 } 2456 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2457 uint64_t Imm = CI->getZExtValue(); 2458 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B)) 2460 .addMBB(Target); 2461 2462 // Obtain the branch probability and add the target to the successor list. 2463 if (FuncInfo.BPI) { 2464 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2465 BI->getParent(), Target->getBasicBlock()); 2466 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2467 } else 2468 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2469 return true; 2470 } else { 2471 AArch64CC::CondCode CC = AArch64CC::NE; 2472 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2473 // Fake request the condition, otherwise the intrinsic might be completely 2474 // optimized away. 2475 Register CondReg = getRegForValue(BI->getCondition()); 2476 if (!CondReg) 2477 return false; 2478 2479 // Emit the branch. 2480 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2481 .addImm(CC) 2482 .addMBB(TBB); 2483 2484 finishCondBranch(BI->getParent(), TBB, FBB); 2485 return true; 2486 } 2487 } 2488 2489 Register CondReg = getRegForValue(BI->getCondition()); 2490 if (CondReg == 0) 2491 return false; 2492 2493 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2494 unsigned Opcode = AArch64::TBNZW; 2495 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2496 std::swap(TBB, FBB); 2497 Opcode = AArch64::TBZW; 2498 } 2499 2500 const MCInstrDesc &II = TII.get(Opcode); 2501 Register ConstrainedCondReg 2502 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2503 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2504 .addReg(ConstrainedCondReg) 2505 .addImm(0) 2506 .addMBB(TBB); 2507 2508 finishCondBranch(BI->getParent(), TBB, FBB); 2509 return true; 2510 } 2511 2512 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2513 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2514 Register AddrReg = getRegForValue(BI->getOperand(0)); 2515 if (AddrReg == 0) 2516 return false; 2517 2518 // Emit the indirect branch. 2519 const MCInstrDesc &II = TII.get(AArch64::BR); 2520 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg); 2522 2523 // Make sure the CFG is up-to-date. 2524 for (const auto *Succ : BI->successors()) 2525 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2526 2527 return true; 2528 } 2529 2530 bool AArch64FastISel::selectCmp(const Instruction *I) { 2531 const CmpInst *CI = cast<CmpInst>(I); 2532 2533 // Vectors of i1 are weird: bail out. 2534 if (CI->getType()->isVectorTy()) 2535 return false; 2536 2537 // Try to optimize or fold the cmp. 2538 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2539 unsigned ResultReg = 0; 2540 switch (Predicate) { 2541 default: 2542 break; 2543 case CmpInst::FCMP_FALSE: 2544 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 2546 TII.get(TargetOpcode::COPY), ResultReg) 2547 .addReg(AArch64::WZR, getKillRegState(true)); 2548 break; 2549 case CmpInst::FCMP_TRUE: 2550 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2551 break; 2552 } 2553 2554 if (ResultReg) { 2555 updateValueMap(I, ResultReg); 2556 return true; 2557 } 2558 2559 // Emit the cmp. 2560 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2561 return false; 2562 2563 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2564 2565 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2566 // condition codes are inverted, because they are used by CSINC. 2567 static unsigned CondCodeTable[2][2] = { 2568 { AArch64CC::NE, AArch64CC::VC }, 2569 { AArch64CC::PL, AArch64CC::LE } 2570 }; 2571 unsigned *CondCodes = nullptr; 2572 switch (Predicate) { 2573 default: 2574 break; 2575 case CmpInst::FCMP_UEQ: 2576 CondCodes = &CondCodeTable[0][0]; 2577 break; 2578 case CmpInst::FCMP_ONE: 2579 CondCodes = &CondCodeTable[1][0]; 2580 break; 2581 } 2582 2583 if (CondCodes) { 2584 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2585 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2586 TmpReg1) 2587 .addReg(AArch64::WZR, getKillRegState(true)) 2588 .addReg(AArch64::WZR, getKillRegState(true)) 2589 .addImm(CondCodes[0]); 2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2591 ResultReg) 2592 .addReg(TmpReg1, getKillRegState(true)) 2593 .addReg(AArch64::WZR, getKillRegState(true)) 2594 .addImm(CondCodes[1]); 2595 2596 updateValueMap(I, ResultReg); 2597 return true; 2598 } 2599 2600 // Now set a register based on the comparison. 2601 AArch64CC::CondCode CC = getCompareCC(Predicate); 2602 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2603 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2605 ResultReg) 2606 .addReg(AArch64::WZR, getKillRegState(true)) 2607 .addReg(AArch64::WZR, getKillRegState(true)) 2608 .addImm(invertedCC); 2609 2610 updateValueMap(I, ResultReg); 2611 return true; 2612 } 2613 2614 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2615 /// value. 2616 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2617 if (!SI->getType()->isIntegerTy(1)) 2618 return false; 2619 2620 const Value *Src1Val, *Src2Val; 2621 unsigned Opc = 0; 2622 bool NeedExtraOp = false; 2623 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2624 if (CI->isOne()) { 2625 Src1Val = SI->getCondition(); 2626 Src2Val = SI->getFalseValue(); 2627 Opc = AArch64::ORRWrr; 2628 } else { 2629 assert(CI->isZero()); 2630 Src1Val = SI->getFalseValue(); 2631 Src2Val = SI->getCondition(); 2632 Opc = AArch64::BICWrr; 2633 } 2634 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2635 if (CI->isOne()) { 2636 Src1Val = SI->getCondition(); 2637 Src2Val = SI->getTrueValue(); 2638 Opc = AArch64::ORRWrr; 2639 NeedExtraOp = true; 2640 } else { 2641 assert(CI->isZero()); 2642 Src1Val = SI->getCondition(); 2643 Src2Val = SI->getTrueValue(); 2644 Opc = AArch64::ANDWrr; 2645 } 2646 } 2647 2648 if (!Opc) 2649 return false; 2650 2651 Register Src1Reg = getRegForValue(Src1Val); 2652 if (!Src1Reg) 2653 return false; 2654 2655 Register Src2Reg = getRegForValue(Src2Val); 2656 if (!Src2Reg) 2657 return false; 2658 2659 if (NeedExtraOp) 2660 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1); 2661 2662 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2663 Src2Reg); 2664 updateValueMap(SI, ResultReg); 2665 return true; 2666 } 2667 2668 bool AArch64FastISel::selectSelect(const Instruction *I) { 2669 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2670 MVT VT; 2671 if (!isTypeSupported(I->getType(), VT)) 2672 return false; 2673 2674 unsigned Opc; 2675 const TargetRegisterClass *RC; 2676 switch (VT.SimpleTy) { 2677 default: 2678 return false; 2679 case MVT::i1: 2680 case MVT::i8: 2681 case MVT::i16: 2682 case MVT::i32: 2683 Opc = AArch64::CSELWr; 2684 RC = &AArch64::GPR32RegClass; 2685 break; 2686 case MVT::i64: 2687 Opc = AArch64::CSELXr; 2688 RC = &AArch64::GPR64RegClass; 2689 break; 2690 case MVT::f32: 2691 Opc = AArch64::FCSELSrrr; 2692 RC = &AArch64::FPR32RegClass; 2693 break; 2694 case MVT::f64: 2695 Opc = AArch64::FCSELDrrr; 2696 RC = &AArch64::FPR64RegClass; 2697 break; 2698 } 2699 2700 const SelectInst *SI = cast<SelectInst>(I); 2701 const Value *Cond = SI->getCondition(); 2702 AArch64CC::CondCode CC = AArch64CC::NE; 2703 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2704 2705 if (optimizeSelect(SI)) 2706 return true; 2707 2708 // Try to pickup the flags, so we don't have to emit another compare. 2709 if (foldXALUIntrinsic(CC, I, Cond)) { 2710 // Fake request the condition to force emission of the XALU intrinsic. 2711 Register CondReg = getRegForValue(Cond); 2712 if (!CondReg) 2713 return false; 2714 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2715 isValueAvailable(Cond)) { 2716 const auto *Cmp = cast<CmpInst>(Cond); 2717 // Try to optimize or fold the cmp. 2718 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2719 const Value *FoldSelect = nullptr; 2720 switch (Predicate) { 2721 default: 2722 break; 2723 case CmpInst::FCMP_FALSE: 2724 FoldSelect = SI->getFalseValue(); 2725 break; 2726 case CmpInst::FCMP_TRUE: 2727 FoldSelect = SI->getTrueValue(); 2728 break; 2729 } 2730 2731 if (FoldSelect) { 2732 Register SrcReg = getRegForValue(FoldSelect); 2733 if (!SrcReg) 2734 return false; 2735 2736 updateValueMap(I, SrcReg); 2737 return true; 2738 } 2739 2740 // Emit the cmp. 2741 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2742 return false; 2743 2744 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2745 CC = getCompareCC(Predicate); 2746 switch (Predicate) { 2747 default: 2748 break; 2749 case CmpInst::FCMP_UEQ: 2750 ExtraCC = AArch64CC::EQ; 2751 CC = AArch64CC::VS; 2752 break; 2753 case CmpInst::FCMP_ONE: 2754 ExtraCC = AArch64CC::MI; 2755 CC = AArch64CC::GT; 2756 break; 2757 } 2758 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2759 } else { 2760 Register CondReg = getRegForValue(Cond); 2761 if (!CondReg) 2762 return false; 2763 2764 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2765 CondReg = constrainOperandRegClass(II, CondReg, 1); 2766 2767 // Emit a TST instruction (ANDS wzr, reg, #imm). 2768 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, 2769 AArch64::WZR) 2770 .addReg(CondReg) 2771 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2772 } 2773 2774 Register Src1Reg = getRegForValue(SI->getTrueValue()); 2775 Register Src2Reg = getRegForValue(SI->getFalseValue()); 2776 2777 if (!Src1Reg || !Src2Reg) 2778 return false; 2779 2780 if (ExtraCC != AArch64CC::AL) 2781 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC); 2782 2783 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC); 2784 updateValueMap(I, ResultReg); 2785 return true; 2786 } 2787 2788 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2789 Value *V = I->getOperand(0); 2790 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2791 return false; 2792 2793 Register Op = getRegForValue(V); 2794 if (Op == 0) 2795 return false; 2796 2797 Register ResultReg = createResultReg(&AArch64::FPR64RegClass); 2798 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr), 2799 ResultReg).addReg(Op); 2800 updateValueMap(I, ResultReg); 2801 return true; 2802 } 2803 2804 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2805 Value *V = I->getOperand(0); 2806 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2807 return false; 2808 2809 Register Op = getRegForValue(V); 2810 if (Op == 0) 2811 return false; 2812 2813 Register ResultReg = createResultReg(&AArch64::FPR32RegClass); 2814 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr), 2815 ResultReg).addReg(Op); 2816 updateValueMap(I, ResultReg); 2817 return true; 2818 } 2819 2820 // FPToUI and FPToSI 2821 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2822 MVT DestVT; 2823 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2824 return false; 2825 2826 Register SrcReg = getRegForValue(I->getOperand(0)); 2827 if (SrcReg == 0) 2828 return false; 2829 2830 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2831 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2832 return false; 2833 2834 unsigned Opc; 2835 if (SrcVT == MVT::f64) { 2836 if (Signed) 2837 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2838 else 2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2840 } else { 2841 if (Signed) 2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2843 else 2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2845 } 2846 Register ResultReg = createResultReg( 2847 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2848 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 2849 .addReg(SrcReg); 2850 updateValueMap(I, ResultReg); 2851 return true; 2852 } 2853 2854 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2855 MVT DestVT; 2856 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2857 return false; 2858 // Let regular ISEL handle FP16 2859 if (DestVT == MVT::f16) 2860 return false; 2861 2862 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2863 "Unexpected value type."); 2864 2865 Register SrcReg = getRegForValue(I->getOperand(0)); 2866 if (!SrcReg) 2867 return false; 2868 2869 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2870 2871 // Handle sign-extension. 2872 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2873 SrcReg = 2874 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2875 if (!SrcReg) 2876 return false; 2877 } 2878 2879 unsigned Opc; 2880 if (SrcVT == MVT::i64) { 2881 if (Signed) 2882 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2883 else 2884 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2885 } else { 2886 if (Signed) 2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2888 else 2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2890 } 2891 2892 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg); 2893 updateValueMap(I, ResultReg); 2894 return true; 2895 } 2896 2897 bool AArch64FastISel::fastLowerArguments() { 2898 if (!FuncInfo.CanLowerReturn) 2899 return false; 2900 2901 const Function *F = FuncInfo.Fn; 2902 if (F->isVarArg()) 2903 return false; 2904 2905 CallingConv::ID CC = F->getCallingConv(); 2906 if (CC != CallingConv::C && CC != CallingConv::Swift) 2907 return false; 2908 2909 if (Subtarget->hasCustomCallingConv()) 2910 return false; 2911 2912 // Only handle simple cases of up to 8 GPR and FPR each. 2913 unsigned GPRCnt = 0; 2914 unsigned FPRCnt = 0; 2915 for (auto const &Arg : F->args()) { 2916 if (Arg.hasAttribute(Attribute::ByVal) || 2917 Arg.hasAttribute(Attribute::InReg) || 2918 Arg.hasAttribute(Attribute::StructRet) || 2919 Arg.hasAttribute(Attribute::SwiftSelf) || 2920 Arg.hasAttribute(Attribute::SwiftAsync) || 2921 Arg.hasAttribute(Attribute::SwiftError) || 2922 Arg.hasAttribute(Attribute::Nest)) 2923 return false; 2924 2925 Type *ArgTy = Arg.getType(); 2926 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2927 return false; 2928 2929 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2930 if (!ArgVT.isSimple()) 2931 return false; 2932 2933 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2934 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2935 return false; 2936 2937 if (VT.isVector() && 2938 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2939 return false; 2940 2941 if (VT >= MVT::i1 && VT <= MVT::i64) 2942 ++GPRCnt; 2943 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2944 VT.is128BitVector()) 2945 ++FPRCnt; 2946 else 2947 return false; 2948 2949 if (GPRCnt > 8 || FPRCnt > 8) 2950 return false; 2951 } 2952 2953 static const MCPhysReg Registers[6][8] = { 2954 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2955 AArch64::W5, AArch64::W6, AArch64::W7 }, 2956 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2957 AArch64::X5, AArch64::X6, AArch64::X7 }, 2958 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2959 AArch64::H5, AArch64::H6, AArch64::H7 }, 2960 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2961 AArch64::S5, AArch64::S6, AArch64::S7 }, 2962 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2963 AArch64::D5, AArch64::D6, AArch64::D7 }, 2964 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2965 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2966 }; 2967 2968 unsigned GPRIdx = 0; 2969 unsigned FPRIdx = 0; 2970 for (auto const &Arg : F->args()) { 2971 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2972 unsigned SrcReg; 2973 const TargetRegisterClass *RC; 2974 if (VT >= MVT::i1 && VT <= MVT::i32) { 2975 SrcReg = Registers[0][GPRIdx++]; 2976 RC = &AArch64::GPR32RegClass; 2977 VT = MVT::i32; 2978 } else if (VT == MVT::i64) { 2979 SrcReg = Registers[1][GPRIdx++]; 2980 RC = &AArch64::GPR64RegClass; 2981 } else if (VT == MVT::f16) { 2982 SrcReg = Registers[2][FPRIdx++]; 2983 RC = &AArch64::FPR16RegClass; 2984 } else if (VT == MVT::f32) { 2985 SrcReg = Registers[3][FPRIdx++]; 2986 RC = &AArch64::FPR32RegClass; 2987 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2988 SrcReg = Registers[4][FPRIdx++]; 2989 RC = &AArch64::FPR64RegClass; 2990 } else if (VT.is128BitVector()) { 2991 SrcReg = Registers[5][FPRIdx++]; 2992 RC = &AArch64::FPR128RegClass; 2993 } else 2994 llvm_unreachable("Unexpected value type."); 2995 2996 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2997 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2998 // Without this, EmitLiveInCopies may eliminate the livein if its only 2999 // use is a bitcast (which isn't turned into an instruction). 3000 Register ResultReg = createResultReg(RC); 3001 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3002 TII.get(TargetOpcode::COPY), ResultReg) 3003 .addReg(DstReg, getKillRegState(true)); 3004 updateValueMap(&Arg, ResultReg); 3005 } 3006 return true; 3007 } 3008 3009 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3010 SmallVectorImpl<MVT> &OutVTs, 3011 unsigned &NumBytes) { 3012 CallingConv::ID CC = CLI.CallConv; 3013 SmallVector<CCValAssign, 16> ArgLocs; 3014 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3015 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3016 3017 // Get a count of how many bytes are to be pushed on the stack. 3018 NumBytes = CCInfo.getStackSize(); 3019 3020 // Issue CALLSEQ_START 3021 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3022 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown)) 3023 .addImm(NumBytes).addImm(0); 3024 3025 // Process the args. 3026 for (CCValAssign &VA : ArgLocs) { 3027 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3028 MVT ArgVT = OutVTs[VA.getValNo()]; 3029 3030 Register ArgReg = getRegForValue(ArgVal); 3031 if (!ArgReg) 3032 return false; 3033 3034 // Handle arg promotion: SExt, ZExt, AExt. 3035 switch (VA.getLocInfo()) { 3036 case CCValAssign::Full: 3037 break; 3038 case CCValAssign::SExt: { 3039 MVT DestVT = VA.getLocVT(); 3040 MVT SrcVT = ArgVT; 3041 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3042 if (!ArgReg) 3043 return false; 3044 break; 3045 } 3046 case CCValAssign::AExt: 3047 // Intentional fall-through. 3048 case CCValAssign::ZExt: { 3049 MVT DestVT = VA.getLocVT(); 3050 MVT SrcVT = ArgVT; 3051 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3052 if (!ArgReg) 3053 return false; 3054 break; 3055 } 3056 default: 3057 llvm_unreachable("Unknown arg promotion!"); 3058 } 3059 3060 // Now copy/store arg to correct locations. 3061 if (VA.isRegLoc() && !VA.needsCustom()) { 3062 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3063 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3064 CLI.OutRegs.push_back(VA.getLocReg()); 3065 } else if (VA.needsCustom()) { 3066 // FIXME: Handle custom args. 3067 return false; 3068 } else { 3069 assert(VA.isMemLoc() && "Assuming store on stack."); 3070 3071 // Don't emit stores for undef values. 3072 if (isa<UndefValue>(ArgVal)) 3073 continue; 3074 3075 // Need to store on the stack. 3076 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3077 3078 unsigned BEAlign = 0; 3079 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3080 BEAlign = 8 - ArgSize; 3081 3082 Address Addr; 3083 Addr.setKind(Address::RegBase); 3084 Addr.setReg(AArch64::SP); 3085 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3086 3087 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3088 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3089 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3090 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3091 3092 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3093 return false; 3094 } 3095 } 3096 return true; 3097 } 3098 3099 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) { 3100 CallingConv::ID CC = CLI.CallConv; 3101 3102 // Issue CALLSEQ_END 3103 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3104 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp)) 3105 .addImm(NumBytes).addImm(0); 3106 3107 // Now the return values. 3108 SmallVector<CCValAssign, 16> RVLocs; 3109 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3110 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC)); 3111 3112 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy); 3113 for (unsigned i = 0; i != RVLocs.size(); ++i) { 3114 CCValAssign &VA = RVLocs[i]; 3115 MVT CopyVT = VA.getValVT(); 3116 unsigned CopyReg = ResultReg + i; 3117 3118 // TODO: Handle big-endian results 3119 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3120 return false; 3121 3122 // Copy result out of their specified physreg. 3123 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 3124 CopyReg) 3125 .addReg(VA.getLocReg()); 3126 CLI.InRegs.push_back(VA.getLocReg()); 3127 } 3128 3129 CLI.ResultReg = ResultReg; 3130 CLI.NumResultRegs = RVLocs.size(); 3131 3132 return true; 3133 } 3134 3135 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3136 CallingConv::ID CC = CLI.CallConv; 3137 bool IsTailCall = CLI.IsTailCall; 3138 bool IsVarArg = CLI.IsVarArg; 3139 const Value *Callee = CLI.Callee; 3140 MCSymbol *Symbol = CLI.Symbol; 3141 3142 if (!Callee && !Symbol) 3143 return false; 3144 3145 // Allow SelectionDAG isel to handle calls to functions like setjmp that need 3146 // a bti instruction following the call. 3147 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && 3148 !Subtarget->noBTIAtReturnTwice() && 3149 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 3150 return false; 3151 3152 // Allow SelectionDAG isel to handle indirect calls with KCFI checks. 3153 if (CLI.CB && CLI.CB->isIndirectCall() && 3154 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi)) 3155 return false; 3156 3157 // Allow SelectionDAG isel to handle tail calls. 3158 if (IsTailCall) 3159 return false; 3160 3161 // FIXME: we could and should support this, but for now correctness at -O0 is 3162 // more important. 3163 if (Subtarget->isTargetILP32()) 3164 return false; 3165 3166 CodeModel::Model CM = TM.getCodeModel(); 3167 // Only support the small-addressing and large code models. 3168 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3169 return false; 3170 3171 // FIXME: Add large code model support for ELF. 3172 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3173 return false; 3174 3175 // Let SDISel handle vararg functions. 3176 if (IsVarArg) 3177 return false; 3178 3179 if (Subtarget->isWindowsArm64EC()) 3180 return false; 3181 3182 for (auto Flag : CLI.OutFlags) 3183 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3184 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) 3185 return false; 3186 3187 // Set up the argument vectors. 3188 SmallVector<MVT, 16> OutVTs; 3189 OutVTs.reserve(CLI.OutVals.size()); 3190 3191 for (auto *Val : CLI.OutVals) { 3192 MVT VT; 3193 if (!isTypeLegal(Val->getType(), VT) && 3194 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3195 return false; 3196 3197 // We don't handle vector parameters yet. 3198 if (VT.isVector() || VT.getSizeInBits() > 64) 3199 return false; 3200 3201 OutVTs.push_back(VT); 3202 } 3203 3204 Address Addr; 3205 if (Callee && !computeCallAddress(Callee, Addr)) 3206 return false; 3207 3208 // The weak function target may be zero; in that case we must use indirect 3209 // addressing via a stub on windows as it may be out of range for a 3210 // PC-relative jump. 3211 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3212 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3213 return false; 3214 3215 // Handle the arguments now that we've gotten them. 3216 unsigned NumBytes; 3217 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3218 return false; 3219 3220 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3221 if (RegInfo->isAnyArgRegReserved(*MF)) 3222 RegInfo->emitReservedArgRegCallError(*MF); 3223 3224 // Issue the call. 3225 MachineInstrBuilder MIB; 3226 if (Subtarget->useSmallAddressing()) { 3227 const MCInstrDesc &II = 3228 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3229 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II); 3230 if (Symbol) 3231 MIB.addSym(Symbol, 0); 3232 else if (Addr.getGlobalValue()) 3233 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3234 else if (Addr.getReg()) { 3235 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3236 MIB.addReg(Reg); 3237 } else 3238 return false; 3239 } else { 3240 unsigned CallReg = 0; 3241 if (Symbol) { 3242 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3243 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 3244 ADRPReg) 3245 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3246 3247 CallReg = createResultReg(&AArch64::GPR64RegClass); 3248 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3249 TII.get(AArch64::LDRXui), CallReg) 3250 .addReg(ADRPReg) 3251 .addSym(Symbol, 3252 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3253 } else if (Addr.getGlobalValue()) 3254 CallReg = materializeGV(Addr.getGlobalValue()); 3255 else if (Addr.getReg()) 3256 CallReg = Addr.getReg(); 3257 3258 if (!CallReg) 3259 return false; 3260 3261 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3262 CallReg = constrainOperandRegClass(II, CallReg, 0); 3263 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg); 3264 } 3265 3266 // Add implicit physical register uses to the call. 3267 for (auto Reg : CLI.OutRegs) 3268 MIB.addReg(Reg, RegState::Implicit); 3269 3270 // Add a register mask with the call-preserved registers. 3271 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3272 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3273 3274 CLI.Call = MIB; 3275 3276 // Finish off the call including any return values. 3277 return finishCall(CLI, NumBytes); 3278 } 3279 3280 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) { 3281 if (Alignment) 3282 return Len / Alignment->value() <= 4; 3283 else 3284 return Len < 32; 3285 } 3286 3287 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3288 uint64_t Len, MaybeAlign Alignment) { 3289 // Make sure we don't bloat code by inlining very large memcpy's. 3290 if (!isMemCpySmall(Len, Alignment)) 3291 return false; 3292 3293 int64_t UnscaledOffset = 0; 3294 Address OrigDest = Dest; 3295 Address OrigSrc = Src; 3296 3297 while (Len) { 3298 MVT VT; 3299 if (!Alignment || *Alignment >= 8) { 3300 if (Len >= 8) 3301 VT = MVT::i64; 3302 else if (Len >= 4) 3303 VT = MVT::i32; 3304 else if (Len >= 2) 3305 VT = MVT::i16; 3306 else { 3307 VT = MVT::i8; 3308 } 3309 } else { 3310 assert(Alignment && "Alignment is set in this branch"); 3311 // Bound based on alignment. 3312 if (Len >= 4 && *Alignment == 4) 3313 VT = MVT::i32; 3314 else if (Len >= 2 && *Alignment == 2) 3315 VT = MVT::i16; 3316 else { 3317 VT = MVT::i8; 3318 } 3319 } 3320 3321 unsigned ResultReg = emitLoad(VT, VT, Src); 3322 if (!ResultReg) 3323 return false; 3324 3325 if (!emitStore(VT, ResultReg, Dest)) 3326 return false; 3327 3328 int64_t Size = VT.getSizeInBits() / 8; 3329 Len -= Size; 3330 UnscaledOffset += Size; 3331 3332 // We need to recompute the unscaled offset for each iteration. 3333 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3334 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3335 } 3336 3337 return true; 3338 } 3339 3340 /// Check if it is possible to fold the condition from the XALU intrinsic 3341 /// into the user. The condition code will only be updated on success. 3342 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3343 const Instruction *I, 3344 const Value *Cond) { 3345 if (!isa<ExtractValueInst>(Cond)) 3346 return false; 3347 3348 const auto *EV = cast<ExtractValueInst>(Cond); 3349 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3350 return false; 3351 3352 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3353 MVT RetVT; 3354 const Function *Callee = II->getCalledFunction(); 3355 Type *RetTy = 3356 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3357 if (!isTypeLegal(RetTy, RetVT)) 3358 return false; 3359 3360 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3361 return false; 3362 3363 const Value *LHS = II->getArgOperand(0); 3364 const Value *RHS = II->getArgOperand(1); 3365 3366 // Canonicalize immediate to the RHS. 3367 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3368 std::swap(LHS, RHS); 3369 3370 // Simplify multiplies. 3371 Intrinsic::ID IID = II->getIntrinsicID(); 3372 switch (IID) { 3373 default: 3374 break; 3375 case Intrinsic::smul_with_overflow: 3376 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3377 if (C->getValue() == 2) 3378 IID = Intrinsic::sadd_with_overflow; 3379 break; 3380 case Intrinsic::umul_with_overflow: 3381 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3382 if (C->getValue() == 2) 3383 IID = Intrinsic::uadd_with_overflow; 3384 break; 3385 } 3386 3387 AArch64CC::CondCode TmpCC; 3388 switch (IID) { 3389 default: 3390 return false; 3391 case Intrinsic::sadd_with_overflow: 3392 case Intrinsic::ssub_with_overflow: 3393 TmpCC = AArch64CC::VS; 3394 break; 3395 case Intrinsic::uadd_with_overflow: 3396 TmpCC = AArch64CC::HS; 3397 break; 3398 case Intrinsic::usub_with_overflow: 3399 TmpCC = AArch64CC::LO; 3400 break; 3401 case Intrinsic::smul_with_overflow: 3402 case Intrinsic::umul_with_overflow: 3403 TmpCC = AArch64CC::NE; 3404 break; 3405 } 3406 3407 // Check if both instructions are in the same basic block. 3408 if (!isValueAvailable(II)) 3409 return false; 3410 3411 // Make sure nothing is in the way 3412 BasicBlock::const_iterator Start(I); 3413 BasicBlock::const_iterator End(II); 3414 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3415 // We only expect extractvalue instructions between the intrinsic and the 3416 // instruction to be selected. 3417 if (!isa<ExtractValueInst>(Itr)) 3418 return false; 3419 3420 // Check that the extractvalue operand comes from the intrinsic. 3421 const auto *EVI = cast<ExtractValueInst>(Itr); 3422 if (EVI->getAggregateOperand() != II) 3423 return false; 3424 } 3425 3426 CC = TmpCC; 3427 return true; 3428 } 3429 3430 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3431 // FIXME: Handle more intrinsics. 3432 switch (II->getIntrinsicID()) { 3433 default: return false; 3434 case Intrinsic::frameaddress: { 3435 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3436 MFI.setFrameAddressIsTaken(true); 3437 3438 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3439 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3440 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3442 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3443 // Recursively load frame address 3444 // ldr x0, [fp] 3445 // ldr x0, [x0] 3446 // ldr x0, [x0] 3447 // ... 3448 unsigned DestReg; 3449 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3450 while (Depth--) { 3451 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3452 SrcReg, 0); 3453 assert(DestReg && "Unexpected LDR instruction emission failure."); 3454 SrcReg = DestReg; 3455 } 3456 3457 updateValueMap(II, SrcReg); 3458 return true; 3459 } 3460 case Intrinsic::sponentry: { 3461 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3462 3463 // SP = FP + Fixed Object + 16 3464 int FI = MFI.CreateFixedObject(4, 0, false); 3465 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3467 TII.get(AArch64::ADDXri), ResultReg) 3468 .addFrameIndex(FI) 3469 .addImm(0) 3470 .addImm(0); 3471 3472 updateValueMap(II, ResultReg); 3473 return true; 3474 } 3475 case Intrinsic::memcpy: 3476 case Intrinsic::memmove: { 3477 const auto *MTI = cast<MemTransferInst>(II); 3478 // Don't handle volatile. 3479 if (MTI->isVolatile()) 3480 return false; 3481 3482 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3483 // we would emit dead code because we don't currently handle memmoves. 3484 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3485 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3486 // Small memcpy's are common enough that we want to do them without a call 3487 // if possible. 3488 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3489 MaybeAlign Alignment; 3490 if (MTI->getDestAlign() || MTI->getSourceAlign()) 3491 Alignment = std::min(MTI->getDestAlign().valueOrOne(), 3492 MTI->getSourceAlign().valueOrOne()); 3493 if (isMemCpySmall(Len, Alignment)) { 3494 Address Dest, Src; 3495 if (!computeAddress(MTI->getRawDest(), Dest) || 3496 !computeAddress(MTI->getRawSource(), Src)) 3497 return false; 3498 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3499 return true; 3500 } 3501 } 3502 3503 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3504 return false; 3505 3506 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3507 // Fast instruction selection doesn't support the special 3508 // address spaces. 3509 return false; 3510 3511 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3512 return lowerCallTo(II, IntrMemName, II->arg_size() - 1); 3513 } 3514 case Intrinsic::memset: { 3515 const MemSetInst *MSI = cast<MemSetInst>(II); 3516 // Don't handle volatile. 3517 if (MSI->isVolatile()) 3518 return false; 3519 3520 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3521 return false; 3522 3523 if (MSI->getDestAddressSpace() > 255) 3524 // Fast instruction selection doesn't support the special 3525 // address spaces. 3526 return false; 3527 3528 return lowerCallTo(II, "memset", II->arg_size() - 1); 3529 } 3530 case Intrinsic::sin: 3531 case Intrinsic::cos: 3532 case Intrinsic::pow: { 3533 MVT RetVT; 3534 if (!isTypeLegal(II->getType(), RetVT)) 3535 return false; 3536 3537 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3538 return false; 3539 3540 static const RTLIB::Libcall LibCallTable[3][2] = { 3541 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3542 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3543 { RTLIB::POW_F32, RTLIB::POW_F64 } 3544 }; 3545 RTLIB::Libcall LC; 3546 bool Is64Bit = RetVT == MVT::f64; 3547 switch (II->getIntrinsicID()) { 3548 default: 3549 llvm_unreachable("Unexpected intrinsic."); 3550 case Intrinsic::sin: 3551 LC = LibCallTable[0][Is64Bit]; 3552 break; 3553 case Intrinsic::cos: 3554 LC = LibCallTable[1][Is64Bit]; 3555 break; 3556 case Intrinsic::pow: 3557 LC = LibCallTable[2][Is64Bit]; 3558 break; 3559 } 3560 3561 ArgListTy Args; 3562 Args.reserve(II->arg_size()); 3563 3564 // Populate the argument list. 3565 for (auto &Arg : II->args()) { 3566 ArgListEntry Entry; 3567 Entry.Val = Arg; 3568 Entry.Ty = Arg->getType(); 3569 Args.push_back(Entry); 3570 } 3571 3572 CallLoweringInfo CLI; 3573 MCContext &Ctx = MF->getContext(); 3574 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3575 TLI.getLibcallName(LC), std::move(Args)); 3576 if (!lowerCallTo(CLI)) 3577 return false; 3578 updateValueMap(II, CLI.ResultReg); 3579 return true; 3580 } 3581 case Intrinsic::fabs: { 3582 MVT VT; 3583 if (!isTypeLegal(II->getType(), VT)) 3584 return false; 3585 3586 unsigned Opc; 3587 switch (VT.SimpleTy) { 3588 default: 3589 return false; 3590 case MVT::f32: 3591 Opc = AArch64::FABSSr; 3592 break; 3593 case MVT::f64: 3594 Opc = AArch64::FABSDr; 3595 break; 3596 } 3597 Register SrcReg = getRegForValue(II->getOperand(0)); 3598 if (!SrcReg) 3599 return false; 3600 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3601 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 3602 .addReg(SrcReg); 3603 updateValueMap(II, ResultReg); 3604 return true; 3605 } 3606 case Intrinsic::trap: 3607 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3608 .addImm(1); 3609 return true; 3610 case Intrinsic::debugtrap: 3611 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3612 .addImm(0xF000); 3613 return true; 3614 3615 case Intrinsic::sqrt: { 3616 Type *RetTy = II->getCalledFunction()->getReturnType(); 3617 3618 MVT VT; 3619 if (!isTypeLegal(RetTy, VT)) 3620 return false; 3621 3622 Register Op0Reg = getRegForValue(II->getOperand(0)); 3623 if (!Op0Reg) 3624 return false; 3625 3626 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg); 3627 if (!ResultReg) 3628 return false; 3629 3630 updateValueMap(II, ResultReg); 3631 return true; 3632 } 3633 case Intrinsic::sadd_with_overflow: 3634 case Intrinsic::uadd_with_overflow: 3635 case Intrinsic::ssub_with_overflow: 3636 case Intrinsic::usub_with_overflow: 3637 case Intrinsic::smul_with_overflow: 3638 case Intrinsic::umul_with_overflow: { 3639 // This implements the basic lowering of the xalu with overflow intrinsics. 3640 const Function *Callee = II->getCalledFunction(); 3641 auto *Ty = cast<StructType>(Callee->getReturnType()); 3642 Type *RetTy = Ty->getTypeAtIndex(0U); 3643 3644 MVT VT; 3645 if (!isTypeLegal(RetTy, VT)) 3646 return false; 3647 3648 if (VT != MVT::i32 && VT != MVT::i64) 3649 return false; 3650 3651 const Value *LHS = II->getArgOperand(0); 3652 const Value *RHS = II->getArgOperand(1); 3653 // Canonicalize immediate to the RHS. 3654 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3655 std::swap(LHS, RHS); 3656 3657 // Simplify multiplies. 3658 Intrinsic::ID IID = II->getIntrinsicID(); 3659 switch (IID) { 3660 default: 3661 break; 3662 case Intrinsic::smul_with_overflow: 3663 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3664 if (C->getValue() == 2) { 3665 IID = Intrinsic::sadd_with_overflow; 3666 RHS = LHS; 3667 } 3668 break; 3669 case Intrinsic::umul_with_overflow: 3670 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3671 if (C->getValue() == 2) { 3672 IID = Intrinsic::uadd_with_overflow; 3673 RHS = LHS; 3674 } 3675 break; 3676 } 3677 3678 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3679 AArch64CC::CondCode CC = AArch64CC::Invalid; 3680 switch (IID) { 3681 default: llvm_unreachable("Unexpected intrinsic!"); 3682 case Intrinsic::sadd_with_overflow: 3683 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3684 CC = AArch64CC::VS; 3685 break; 3686 case Intrinsic::uadd_with_overflow: 3687 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3688 CC = AArch64CC::HS; 3689 break; 3690 case Intrinsic::ssub_with_overflow: 3691 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3692 CC = AArch64CC::VS; 3693 break; 3694 case Intrinsic::usub_with_overflow: 3695 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3696 CC = AArch64CC::LO; 3697 break; 3698 case Intrinsic::smul_with_overflow: { 3699 CC = AArch64CC::NE; 3700 Register LHSReg = getRegForValue(LHS); 3701 if (!LHSReg) 3702 return false; 3703 3704 Register RHSReg = getRegForValue(RHS); 3705 if (!RHSReg) 3706 return false; 3707 3708 if (VT == MVT::i32) { 3709 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg); 3710 Register MulSubReg = 3711 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3712 // cmp xreg, wreg, sxtw 3713 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg, 3714 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, 3715 /*WantResult=*/false); 3716 MulReg = MulSubReg; 3717 } else { 3718 assert(VT == MVT::i64 && "Unexpected value type."); 3719 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3720 // reused in the next instruction. 3721 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3722 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg); 3723 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63, 3724 /*WantResult=*/false); 3725 } 3726 break; 3727 } 3728 case Intrinsic::umul_with_overflow: { 3729 CC = AArch64CC::NE; 3730 Register LHSReg = getRegForValue(LHS); 3731 if (!LHSReg) 3732 return false; 3733 3734 Register RHSReg = getRegForValue(RHS); 3735 if (!RHSReg) 3736 return false; 3737 3738 if (VT == MVT::i32) { 3739 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg); 3740 // tst xreg, #0xffffffff00000000 3741 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3742 TII.get(AArch64::ANDSXri), AArch64::XZR) 3743 .addReg(MulReg) 3744 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64)); 3745 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3746 } else { 3747 assert(VT == MVT::i64 && "Unexpected value type."); 3748 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3749 // reused in the next instruction. 3750 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3751 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg); 3752 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false); 3753 } 3754 break; 3755 } 3756 } 3757 3758 if (MulReg) { 3759 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3760 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3761 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3762 } 3763 3764 if (!ResultReg1) 3765 return false; 3766 3767 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3768 AArch64::WZR, AArch64::WZR, 3769 getInvertedCondCode(CC)); 3770 (void)ResultReg2; 3771 assert((ResultReg1 + 1) == ResultReg2 && 3772 "Nonconsecutive result registers."); 3773 updateValueMap(II, ResultReg1, 2); 3774 return true; 3775 } 3776 case Intrinsic::aarch64_crc32b: 3777 case Intrinsic::aarch64_crc32h: 3778 case Intrinsic::aarch64_crc32w: 3779 case Intrinsic::aarch64_crc32x: 3780 case Intrinsic::aarch64_crc32cb: 3781 case Intrinsic::aarch64_crc32ch: 3782 case Intrinsic::aarch64_crc32cw: 3783 case Intrinsic::aarch64_crc32cx: { 3784 if (!Subtarget->hasCRC()) 3785 return false; 3786 3787 unsigned Opc; 3788 switch (II->getIntrinsicID()) { 3789 default: 3790 llvm_unreachable("Unexpected intrinsic!"); 3791 case Intrinsic::aarch64_crc32b: 3792 Opc = AArch64::CRC32Brr; 3793 break; 3794 case Intrinsic::aarch64_crc32h: 3795 Opc = AArch64::CRC32Hrr; 3796 break; 3797 case Intrinsic::aarch64_crc32w: 3798 Opc = AArch64::CRC32Wrr; 3799 break; 3800 case Intrinsic::aarch64_crc32x: 3801 Opc = AArch64::CRC32Xrr; 3802 break; 3803 case Intrinsic::aarch64_crc32cb: 3804 Opc = AArch64::CRC32CBrr; 3805 break; 3806 case Intrinsic::aarch64_crc32ch: 3807 Opc = AArch64::CRC32CHrr; 3808 break; 3809 case Intrinsic::aarch64_crc32cw: 3810 Opc = AArch64::CRC32CWrr; 3811 break; 3812 case Intrinsic::aarch64_crc32cx: 3813 Opc = AArch64::CRC32CXrr; 3814 break; 3815 } 3816 3817 Register LHSReg = getRegForValue(II->getArgOperand(0)); 3818 Register RHSReg = getRegForValue(II->getArgOperand(1)); 3819 if (!LHSReg || !RHSReg) 3820 return false; 3821 3822 Register ResultReg = 3823 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg); 3824 updateValueMap(II, ResultReg); 3825 return true; 3826 } 3827 } 3828 return false; 3829 } 3830 3831 bool AArch64FastISel::selectRet(const Instruction *I) { 3832 const ReturnInst *Ret = cast<ReturnInst>(I); 3833 const Function &F = *I->getParent()->getParent(); 3834 3835 if (!FuncInfo.CanLowerReturn) 3836 return false; 3837 3838 if (F.isVarArg()) 3839 return false; 3840 3841 if (TLI.supportSwiftError() && 3842 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3843 return false; 3844 3845 if (TLI.supportSplitCSR(FuncInfo.MF)) 3846 return false; 3847 3848 // Build a list of return value registers. 3849 SmallVector<unsigned, 4> RetRegs; 3850 3851 if (Ret->getNumOperands() > 0) { 3852 CallingConv::ID CC = F.getCallingConv(); 3853 SmallVector<ISD::OutputArg, 4> Outs; 3854 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3855 3856 // Analyze operands of the call, assigning locations to each operand. 3857 SmallVector<CCValAssign, 16> ValLocs; 3858 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3859 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS); 3860 3861 // Only handle a single return value for now. 3862 if (ValLocs.size() != 1) 3863 return false; 3864 3865 CCValAssign &VA = ValLocs[0]; 3866 const Value *RV = Ret->getOperand(0); 3867 3868 // Don't bother handling odd stuff for now. 3869 if ((VA.getLocInfo() != CCValAssign::Full) && 3870 (VA.getLocInfo() != CCValAssign::BCvt)) 3871 return false; 3872 3873 // Only handle register returns for now. 3874 if (!VA.isRegLoc()) 3875 return false; 3876 3877 Register Reg = getRegForValue(RV); 3878 if (Reg == 0) 3879 return false; 3880 3881 unsigned SrcReg = Reg + VA.getValNo(); 3882 Register DestReg = VA.getLocReg(); 3883 // Avoid a cross-class copy. This is very unlikely. 3884 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3885 return false; 3886 3887 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3888 if (!RVEVT.isSimple()) 3889 return false; 3890 3891 // Vectors (of > 1 lane) in big endian need tricky handling. 3892 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && 3893 !Subtarget->isLittleEndian()) 3894 return false; 3895 3896 MVT RVVT = RVEVT.getSimpleVT(); 3897 if (RVVT == MVT::f128) 3898 return false; 3899 3900 MVT DestVT = VA.getValVT(); 3901 // Special handling for extended integers. 3902 if (RVVT != DestVT) { 3903 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3904 return false; 3905 3906 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3907 return false; 3908 3909 bool IsZExt = Outs[0].Flags.isZExt(); 3910 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3911 if (SrcReg == 0) 3912 return false; 3913 } 3914 3915 // "Callee" (i.e. value producer) zero extends pointers at function 3916 // boundary. 3917 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3918 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff); 3919 3920 // Make the copy. 3921 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3922 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3923 3924 // Add register to return instruction. 3925 RetRegs.push_back(VA.getLocReg()); 3926 } 3927 3928 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3929 TII.get(AArch64::RET_ReallyLR)); 3930 for (unsigned RetReg : RetRegs) 3931 MIB.addReg(RetReg, RegState::Implicit); 3932 return true; 3933 } 3934 3935 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3936 Type *DestTy = I->getType(); 3937 Value *Op = I->getOperand(0); 3938 Type *SrcTy = Op->getType(); 3939 3940 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3941 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3942 if (!SrcEVT.isSimple()) 3943 return false; 3944 if (!DestEVT.isSimple()) 3945 return false; 3946 3947 MVT SrcVT = SrcEVT.getSimpleVT(); 3948 MVT DestVT = DestEVT.getSimpleVT(); 3949 3950 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3951 SrcVT != MVT::i8) 3952 return false; 3953 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3954 DestVT != MVT::i1) 3955 return false; 3956 3957 Register SrcReg = getRegForValue(Op); 3958 if (!SrcReg) 3959 return false; 3960 3961 // If we're truncating from i64 to a smaller non-legal type then generate an 3962 // AND. Otherwise, we know the high bits are undefined and a truncate only 3963 // generate a COPY. We cannot mark the source register also as result 3964 // register, because this can incorrectly transfer the kill flag onto the 3965 // source register. 3966 unsigned ResultReg; 3967 if (SrcVT == MVT::i64) { 3968 uint64_t Mask = 0; 3969 switch (DestVT.SimpleTy) { 3970 default: 3971 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3972 return false; 3973 case MVT::i1: 3974 Mask = 0x1; 3975 break; 3976 case MVT::i8: 3977 Mask = 0xff; 3978 break; 3979 case MVT::i16: 3980 Mask = 0xffff; 3981 break; 3982 } 3983 // Issue an extract_subreg to get the lower 32-bits. 3984 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, 3985 AArch64::sub_32); 3986 // Create the AND instruction which performs the actual truncation. 3987 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask); 3988 assert(ResultReg && "Unexpected AND instruction emission failure."); 3989 } else { 3990 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3991 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3992 TII.get(TargetOpcode::COPY), ResultReg) 3993 .addReg(SrcReg); 3994 } 3995 3996 updateValueMap(I, ResultReg); 3997 return true; 3998 } 3999 4000 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 4001 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 4002 DestVT == MVT::i64) && 4003 "Unexpected value type."); 4004 // Handle i8 and i16 as i32. 4005 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4006 DestVT = MVT::i32; 4007 4008 if (IsZExt) { 4009 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1); 4010 assert(ResultReg && "Unexpected AND instruction emission failure."); 4011 if (DestVT == MVT::i64) { 4012 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 4013 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 4014 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4015 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4016 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4017 .addImm(0) 4018 .addReg(ResultReg) 4019 .addImm(AArch64::sub_32); 4020 ResultReg = Reg64; 4021 } 4022 return ResultReg; 4023 } else { 4024 if (DestVT == MVT::i64) { 4025 // FIXME: We're SExt i1 to i64. 4026 return 0; 4027 } 4028 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 4029 0, 0); 4030 } 4031 } 4032 4033 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4034 unsigned Opc, ZReg; 4035 switch (RetVT.SimpleTy) { 4036 default: return 0; 4037 case MVT::i8: 4038 case MVT::i16: 4039 case MVT::i32: 4040 RetVT = MVT::i32; 4041 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4042 case MVT::i64: 4043 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4044 } 4045 4046 const TargetRegisterClass *RC = 4047 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4048 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg); 4049 } 4050 4051 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4052 if (RetVT != MVT::i64) 4053 return 0; 4054 4055 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4056 Op0, Op1, AArch64::XZR); 4057 } 4058 4059 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4060 if (RetVT != MVT::i64) 4061 return 0; 4062 4063 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4064 Op0, Op1, AArch64::XZR); 4065 } 4066 4067 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, 4068 unsigned Op1Reg) { 4069 unsigned Opc = 0; 4070 bool NeedTrunc = false; 4071 uint64_t Mask = 0; 4072 switch (RetVT.SimpleTy) { 4073 default: return 0; 4074 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4075 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4076 case MVT::i32: Opc = AArch64::LSLVWr; break; 4077 case MVT::i64: Opc = AArch64::LSLVXr; break; 4078 } 4079 4080 const TargetRegisterClass *RC = 4081 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4082 if (NeedTrunc) 4083 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4084 4085 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4086 if (NeedTrunc) 4087 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4088 return ResultReg; 4089 } 4090 4091 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4092 uint64_t Shift, bool IsZExt) { 4093 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4094 "Unexpected source/return type pair."); 4095 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4096 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4097 "Unexpected source value type."); 4098 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4099 RetVT == MVT::i64) && "Unexpected return value type."); 4100 4101 bool Is64Bit = (RetVT == MVT::i64); 4102 unsigned RegSize = Is64Bit ? 64 : 32; 4103 unsigned DstBits = RetVT.getSizeInBits(); 4104 unsigned SrcBits = SrcVT.getSizeInBits(); 4105 const TargetRegisterClass *RC = 4106 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4107 4108 // Just emit a copy for "zero" shifts. 4109 if (Shift == 0) { 4110 if (RetVT == SrcVT) { 4111 Register ResultReg = createResultReg(RC); 4112 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4113 TII.get(TargetOpcode::COPY), ResultReg) 4114 .addReg(Op0); 4115 return ResultReg; 4116 } else 4117 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4118 } 4119 4120 // Don't deal with undefined shifts. 4121 if (Shift >= DstBits) 4122 return 0; 4123 4124 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4125 // {S|U}BFM Wd, Wn, #r, #s 4126 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4127 4128 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4129 // %2 = shl i16 %1, 4 4130 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4131 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4132 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4133 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4134 4135 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4136 // %2 = shl i16 %1, 8 4137 // Wd<32+7-24,32-24> = Wn<7:0> 4138 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4139 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4140 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4141 4142 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4143 // %2 = shl i16 %1, 12 4144 // Wd<32+3-20,32-20> = Wn<3:0> 4145 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4146 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4147 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4148 4149 unsigned ImmR = RegSize - Shift; 4150 // Limit the width to the length of the source type. 4151 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4152 static const unsigned OpcTable[2][2] = { 4153 {AArch64::SBFMWri, AArch64::SBFMXri}, 4154 {AArch64::UBFMWri, AArch64::UBFMXri} 4155 }; 4156 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4157 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4158 Register TmpReg = MRI.createVirtualRegister(RC); 4159 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4160 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4161 .addImm(0) 4162 .addReg(Op0) 4163 .addImm(AArch64::sub_32); 4164 Op0 = TmpReg; 4165 } 4166 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4167 } 4168 4169 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, 4170 unsigned Op1Reg) { 4171 unsigned Opc = 0; 4172 bool NeedTrunc = false; 4173 uint64_t Mask = 0; 4174 switch (RetVT.SimpleTy) { 4175 default: return 0; 4176 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4177 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4178 case MVT::i32: Opc = AArch64::LSRVWr; break; 4179 case MVT::i64: Opc = AArch64::LSRVXr; break; 4180 } 4181 4182 const TargetRegisterClass *RC = 4183 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4184 if (NeedTrunc) { 4185 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask); 4186 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4187 } 4188 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4189 if (NeedTrunc) 4190 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4191 return ResultReg; 4192 } 4193 4194 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4195 uint64_t Shift, bool IsZExt) { 4196 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4197 "Unexpected source/return type pair."); 4198 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4199 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4200 "Unexpected source value type."); 4201 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4202 RetVT == MVT::i64) && "Unexpected return value type."); 4203 4204 bool Is64Bit = (RetVT == MVT::i64); 4205 unsigned RegSize = Is64Bit ? 64 : 32; 4206 unsigned DstBits = RetVT.getSizeInBits(); 4207 unsigned SrcBits = SrcVT.getSizeInBits(); 4208 const TargetRegisterClass *RC = 4209 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4210 4211 // Just emit a copy for "zero" shifts. 4212 if (Shift == 0) { 4213 if (RetVT == SrcVT) { 4214 Register ResultReg = createResultReg(RC); 4215 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4216 TII.get(TargetOpcode::COPY), ResultReg) 4217 .addReg(Op0); 4218 return ResultReg; 4219 } else 4220 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4221 } 4222 4223 // Don't deal with undefined shifts. 4224 if (Shift >= DstBits) 4225 return 0; 4226 4227 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4228 // {S|U}BFM Wd, Wn, #r, #s 4229 // Wd<s-r:0> = Wn<s:r> when r <= s 4230 4231 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4232 // %2 = lshr i16 %1, 4 4233 // Wd<7-4:0> = Wn<7:4> 4234 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4235 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4236 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4237 4238 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4239 // %2 = lshr i16 %1, 8 4240 // Wd<7-7,0> = Wn<7:7> 4241 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4242 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4243 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4244 4245 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4246 // %2 = lshr i16 %1, 12 4247 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4248 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4249 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4250 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4251 4252 if (Shift >= SrcBits && IsZExt) 4253 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4254 4255 // It is not possible to fold a sign-extend into the LShr instruction. In this 4256 // case emit a sign-extend. 4257 if (!IsZExt) { 4258 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4259 if (!Op0) 4260 return 0; 4261 SrcVT = RetVT; 4262 SrcBits = SrcVT.getSizeInBits(); 4263 IsZExt = true; 4264 } 4265 4266 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4267 unsigned ImmS = SrcBits - 1; 4268 static const unsigned OpcTable[2][2] = { 4269 {AArch64::SBFMWri, AArch64::SBFMXri}, 4270 {AArch64::UBFMWri, AArch64::UBFMXri} 4271 }; 4272 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4273 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4274 Register TmpReg = MRI.createVirtualRegister(RC); 4275 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4276 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4277 .addImm(0) 4278 .addReg(Op0) 4279 .addImm(AArch64::sub_32); 4280 Op0 = TmpReg; 4281 } 4282 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4283 } 4284 4285 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, 4286 unsigned Op1Reg) { 4287 unsigned Opc = 0; 4288 bool NeedTrunc = false; 4289 uint64_t Mask = 0; 4290 switch (RetVT.SimpleTy) { 4291 default: return 0; 4292 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4293 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4294 case MVT::i32: Opc = AArch64::ASRVWr; break; 4295 case MVT::i64: Opc = AArch64::ASRVXr; break; 4296 } 4297 4298 const TargetRegisterClass *RC = 4299 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4300 if (NeedTrunc) { 4301 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4302 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4303 } 4304 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4305 if (NeedTrunc) 4306 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4307 return ResultReg; 4308 } 4309 4310 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4311 uint64_t Shift, bool IsZExt) { 4312 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4313 "Unexpected source/return type pair."); 4314 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4315 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4316 "Unexpected source value type."); 4317 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4318 RetVT == MVT::i64) && "Unexpected return value type."); 4319 4320 bool Is64Bit = (RetVT == MVT::i64); 4321 unsigned RegSize = Is64Bit ? 64 : 32; 4322 unsigned DstBits = RetVT.getSizeInBits(); 4323 unsigned SrcBits = SrcVT.getSizeInBits(); 4324 const TargetRegisterClass *RC = 4325 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4326 4327 // Just emit a copy for "zero" shifts. 4328 if (Shift == 0) { 4329 if (RetVT == SrcVT) { 4330 Register ResultReg = createResultReg(RC); 4331 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4332 TII.get(TargetOpcode::COPY), ResultReg) 4333 .addReg(Op0); 4334 return ResultReg; 4335 } else 4336 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4337 } 4338 4339 // Don't deal with undefined shifts. 4340 if (Shift >= DstBits) 4341 return 0; 4342 4343 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4344 // {S|U}BFM Wd, Wn, #r, #s 4345 // Wd<s-r:0> = Wn<s:r> when r <= s 4346 4347 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4348 // %2 = ashr i16 %1, 4 4349 // Wd<7-4:0> = Wn<7:4> 4350 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4351 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4352 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4353 4354 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4355 // %2 = ashr i16 %1, 8 4356 // Wd<7-7,0> = Wn<7:7> 4357 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4358 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4359 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4360 4361 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4362 // %2 = ashr i16 %1, 12 4363 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4364 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4365 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4366 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4367 4368 if (Shift >= SrcBits && IsZExt) 4369 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4370 4371 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4372 unsigned ImmS = SrcBits - 1; 4373 static const unsigned OpcTable[2][2] = { 4374 {AArch64::SBFMWri, AArch64::SBFMXri}, 4375 {AArch64::UBFMWri, AArch64::UBFMXri} 4376 }; 4377 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4378 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4379 Register TmpReg = MRI.createVirtualRegister(RC); 4380 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4381 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4382 .addImm(0) 4383 .addReg(Op0) 4384 .addImm(AArch64::sub_32); 4385 Op0 = TmpReg; 4386 } 4387 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4388 } 4389 4390 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4391 bool IsZExt) { 4392 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4393 4394 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4395 // DestVT are odd things, so test to make sure that they are both types we can 4396 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4397 // bail out to SelectionDAG. 4398 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4399 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4400 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4401 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4402 return 0; 4403 4404 unsigned Opc; 4405 unsigned Imm = 0; 4406 4407 switch (SrcVT.SimpleTy) { 4408 default: 4409 return 0; 4410 case MVT::i1: 4411 return emiti1Ext(SrcReg, DestVT, IsZExt); 4412 case MVT::i8: 4413 if (DestVT == MVT::i64) 4414 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4415 else 4416 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4417 Imm = 7; 4418 break; 4419 case MVT::i16: 4420 if (DestVT == MVT::i64) 4421 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4422 else 4423 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4424 Imm = 15; 4425 break; 4426 case MVT::i32: 4427 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4428 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4429 Imm = 31; 4430 break; 4431 } 4432 4433 // Handle i8 and i16 as i32. 4434 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4435 DestVT = MVT::i32; 4436 else if (DestVT == MVT::i64) { 4437 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4438 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4439 TII.get(AArch64::SUBREG_TO_REG), Src64) 4440 .addImm(0) 4441 .addReg(SrcReg) 4442 .addImm(AArch64::sub_32); 4443 SrcReg = Src64; 4444 } 4445 4446 const TargetRegisterClass *RC = 4447 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4448 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm); 4449 } 4450 4451 static bool isZExtLoad(const MachineInstr *LI) { 4452 switch (LI->getOpcode()) { 4453 default: 4454 return false; 4455 case AArch64::LDURBBi: 4456 case AArch64::LDURHHi: 4457 case AArch64::LDURWi: 4458 case AArch64::LDRBBui: 4459 case AArch64::LDRHHui: 4460 case AArch64::LDRWui: 4461 case AArch64::LDRBBroX: 4462 case AArch64::LDRHHroX: 4463 case AArch64::LDRWroX: 4464 case AArch64::LDRBBroW: 4465 case AArch64::LDRHHroW: 4466 case AArch64::LDRWroW: 4467 return true; 4468 } 4469 } 4470 4471 static bool isSExtLoad(const MachineInstr *LI) { 4472 switch (LI->getOpcode()) { 4473 default: 4474 return false; 4475 case AArch64::LDURSBWi: 4476 case AArch64::LDURSHWi: 4477 case AArch64::LDURSBXi: 4478 case AArch64::LDURSHXi: 4479 case AArch64::LDURSWi: 4480 case AArch64::LDRSBWui: 4481 case AArch64::LDRSHWui: 4482 case AArch64::LDRSBXui: 4483 case AArch64::LDRSHXui: 4484 case AArch64::LDRSWui: 4485 case AArch64::LDRSBWroX: 4486 case AArch64::LDRSHWroX: 4487 case AArch64::LDRSBXroX: 4488 case AArch64::LDRSHXroX: 4489 case AArch64::LDRSWroX: 4490 case AArch64::LDRSBWroW: 4491 case AArch64::LDRSHWroW: 4492 case AArch64::LDRSBXroW: 4493 case AArch64::LDRSHXroW: 4494 case AArch64::LDRSWroW: 4495 return true; 4496 } 4497 } 4498 4499 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4500 MVT SrcVT) { 4501 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4502 if (!LI || !LI->hasOneUse()) 4503 return false; 4504 4505 // Check if the load instruction has already been selected. 4506 Register Reg = lookUpRegForValue(LI); 4507 if (!Reg) 4508 return false; 4509 4510 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4511 if (!MI) 4512 return false; 4513 4514 // Check if the correct load instruction has been emitted - SelectionDAG might 4515 // have emitted a zero-extending load, but we need a sign-extending load. 4516 bool IsZExt = isa<ZExtInst>(I); 4517 const auto *LoadMI = MI; 4518 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4519 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4520 Register LoadReg = MI->getOperand(1).getReg(); 4521 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4522 assert(LoadMI && "Expected valid instruction"); 4523 } 4524 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4525 return false; 4526 4527 // Nothing to be done. 4528 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4529 updateValueMap(I, Reg); 4530 return true; 4531 } 4532 4533 if (IsZExt) { 4534 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 4535 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4536 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4537 .addImm(0) 4538 .addReg(Reg, getKillRegState(true)) 4539 .addImm(AArch64::sub_32); 4540 Reg = Reg64; 4541 } else { 4542 assert((MI->getOpcode() == TargetOpcode::COPY && 4543 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4544 "Expected copy instruction"); 4545 Reg = MI->getOperand(1).getReg(); 4546 MachineBasicBlock::iterator I(MI); 4547 removeDeadCode(I, std::next(I)); 4548 } 4549 updateValueMap(I, Reg); 4550 return true; 4551 } 4552 4553 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4554 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4555 "Unexpected integer extend instruction."); 4556 MVT RetVT; 4557 MVT SrcVT; 4558 if (!isTypeSupported(I->getType(), RetVT)) 4559 return false; 4560 4561 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4562 return false; 4563 4564 // Try to optimize already sign-/zero-extended values from load instructions. 4565 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4566 return true; 4567 4568 Register SrcReg = getRegForValue(I->getOperand(0)); 4569 if (!SrcReg) 4570 return false; 4571 4572 // Try to optimize already sign-/zero-extended values from function arguments. 4573 bool IsZExt = isa<ZExtInst>(I); 4574 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4575 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4576 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4577 Register ResultReg = createResultReg(&AArch64::GPR64RegClass); 4578 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4579 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4580 .addImm(0) 4581 .addReg(SrcReg) 4582 .addImm(AArch64::sub_32); 4583 SrcReg = ResultReg; 4584 } 4585 4586 updateValueMap(I, SrcReg); 4587 return true; 4588 } 4589 } 4590 4591 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4592 if (!ResultReg) 4593 return false; 4594 4595 updateValueMap(I, ResultReg); 4596 return true; 4597 } 4598 4599 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4600 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4601 if (!DestEVT.isSimple()) 4602 return false; 4603 4604 MVT DestVT = DestEVT.getSimpleVT(); 4605 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4606 return false; 4607 4608 unsigned DivOpc; 4609 bool Is64bit = (DestVT == MVT::i64); 4610 switch (ISDOpcode) { 4611 default: 4612 return false; 4613 case ISD::SREM: 4614 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4615 break; 4616 case ISD::UREM: 4617 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4618 break; 4619 } 4620 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4621 Register Src0Reg = getRegForValue(I->getOperand(0)); 4622 if (!Src0Reg) 4623 return false; 4624 4625 Register Src1Reg = getRegForValue(I->getOperand(1)); 4626 if (!Src1Reg) 4627 return false; 4628 4629 const TargetRegisterClass *RC = 4630 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4631 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg); 4632 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4633 // The remainder is computed as numerator - (quotient * denominator) using the 4634 // MSUB instruction. 4635 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg); 4636 updateValueMap(I, ResultReg); 4637 return true; 4638 } 4639 4640 bool AArch64FastISel::selectMul(const Instruction *I) { 4641 MVT VT; 4642 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4643 return false; 4644 4645 if (VT.isVector()) 4646 return selectBinaryOp(I, ISD::MUL); 4647 4648 const Value *Src0 = I->getOperand(0); 4649 const Value *Src1 = I->getOperand(1); 4650 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4651 if (C->getValue().isPowerOf2()) 4652 std::swap(Src0, Src1); 4653 4654 // Try to simplify to a shift instruction. 4655 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4656 if (C->getValue().isPowerOf2()) { 4657 uint64_t ShiftVal = C->getValue().logBase2(); 4658 MVT SrcVT = VT; 4659 bool IsZExt = true; 4660 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4661 if (!isIntExtFree(ZExt)) { 4662 MVT VT; 4663 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4664 SrcVT = VT; 4665 IsZExt = true; 4666 Src0 = ZExt->getOperand(0); 4667 } 4668 } 4669 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4670 if (!isIntExtFree(SExt)) { 4671 MVT VT; 4672 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4673 SrcVT = VT; 4674 IsZExt = false; 4675 Src0 = SExt->getOperand(0); 4676 } 4677 } 4678 } 4679 4680 Register Src0Reg = getRegForValue(Src0); 4681 if (!Src0Reg) 4682 return false; 4683 4684 unsigned ResultReg = 4685 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt); 4686 4687 if (ResultReg) { 4688 updateValueMap(I, ResultReg); 4689 return true; 4690 } 4691 } 4692 4693 Register Src0Reg = getRegForValue(I->getOperand(0)); 4694 if (!Src0Reg) 4695 return false; 4696 4697 Register Src1Reg = getRegForValue(I->getOperand(1)); 4698 if (!Src1Reg) 4699 return false; 4700 4701 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg); 4702 4703 if (!ResultReg) 4704 return false; 4705 4706 updateValueMap(I, ResultReg); 4707 return true; 4708 } 4709 4710 bool AArch64FastISel::selectShift(const Instruction *I) { 4711 MVT RetVT; 4712 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4713 return false; 4714 4715 if (RetVT.isVector()) 4716 return selectOperator(I, I->getOpcode()); 4717 4718 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4719 unsigned ResultReg = 0; 4720 uint64_t ShiftVal = C->getZExtValue(); 4721 MVT SrcVT = RetVT; 4722 bool IsZExt = I->getOpcode() != Instruction::AShr; 4723 const Value *Op0 = I->getOperand(0); 4724 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4725 if (!isIntExtFree(ZExt)) { 4726 MVT TmpVT; 4727 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4728 SrcVT = TmpVT; 4729 IsZExt = true; 4730 Op0 = ZExt->getOperand(0); 4731 } 4732 } 4733 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4734 if (!isIntExtFree(SExt)) { 4735 MVT TmpVT; 4736 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4737 SrcVT = TmpVT; 4738 IsZExt = false; 4739 Op0 = SExt->getOperand(0); 4740 } 4741 } 4742 } 4743 4744 Register Op0Reg = getRegForValue(Op0); 4745 if (!Op0Reg) 4746 return false; 4747 4748 switch (I->getOpcode()) { 4749 default: llvm_unreachable("Unexpected instruction."); 4750 case Instruction::Shl: 4751 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4752 break; 4753 case Instruction::AShr: 4754 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4755 break; 4756 case Instruction::LShr: 4757 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4758 break; 4759 } 4760 if (!ResultReg) 4761 return false; 4762 4763 updateValueMap(I, ResultReg); 4764 return true; 4765 } 4766 4767 Register Op0Reg = getRegForValue(I->getOperand(0)); 4768 if (!Op0Reg) 4769 return false; 4770 4771 Register Op1Reg = getRegForValue(I->getOperand(1)); 4772 if (!Op1Reg) 4773 return false; 4774 4775 unsigned ResultReg = 0; 4776 switch (I->getOpcode()) { 4777 default: llvm_unreachable("Unexpected instruction."); 4778 case Instruction::Shl: 4779 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); 4780 break; 4781 case Instruction::AShr: 4782 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); 4783 break; 4784 case Instruction::LShr: 4785 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); 4786 break; 4787 } 4788 4789 if (!ResultReg) 4790 return false; 4791 4792 updateValueMap(I, ResultReg); 4793 return true; 4794 } 4795 4796 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4797 MVT RetVT, SrcVT; 4798 4799 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4800 return false; 4801 if (!isTypeLegal(I->getType(), RetVT)) 4802 return false; 4803 4804 unsigned Opc; 4805 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4806 Opc = AArch64::FMOVWSr; 4807 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4808 Opc = AArch64::FMOVXDr; 4809 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4810 Opc = AArch64::FMOVSWr; 4811 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4812 Opc = AArch64::FMOVDXr; 4813 else 4814 return false; 4815 4816 const TargetRegisterClass *RC = nullptr; 4817 switch (RetVT.SimpleTy) { 4818 default: llvm_unreachable("Unexpected value type."); 4819 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4820 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4821 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4822 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4823 } 4824 Register Op0Reg = getRegForValue(I->getOperand(0)); 4825 if (!Op0Reg) 4826 return false; 4827 4828 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg); 4829 if (!ResultReg) 4830 return false; 4831 4832 updateValueMap(I, ResultReg); 4833 return true; 4834 } 4835 4836 bool AArch64FastISel::selectFRem(const Instruction *I) { 4837 MVT RetVT; 4838 if (!isTypeLegal(I->getType(), RetVT)) 4839 return false; 4840 4841 RTLIB::Libcall LC; 4842 switch (RetVT.SimpleTy) { 4843 default: 4844 return false; 4845 case MVT::f32: 4846 LC = RTLIB::REM_F32; 4847 break; 4848 case MVT::f64: 4849 LC = RTLIB::REM_F64; 4850 break; 4851 } 4852 4853 ArgListTy Args; 4854 Args.reserve(I->getNumOperands()); 4855 4856 // Populate the argument list. 4857 for (auto &Arg : I->operands()) { 4858 ArgListEntry Entry; 4859 Entry.Val = Arg; 4860 Entry.Ty = Arg->getType(); 4861 Args.push_back(Entry); 4862 } 4863 4864 CallLoweringInfo CLI; 4865 MCContext &Ctx = MF->getContext(); 4866 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4867 TLI.getLibcallName(LC), std::move(Args)); 4868 if (!lowerCallTo(CLI)) 4869 return false; 4870 updateValueMap(I, CLI.ResultReg); 4871 return true; 4872 } 4873 4874 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4875 MVT VT; 4876 if (!isTypeLegal(I->getType(), VT)) 4877 return false; 4878 4879 if (!isa<ConstantInt>(I->getOperand(1))) 4880 return selectBinaryOp(I, ISD::SDIV); 4881 4882 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4883 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4884 !(C.isPowerOf2() || C.isNegatedPowerOf2())) 4885 return selectBinaryOp(I, ISD::SDIV); 4886 4887 unsigned Lg2 = C.countr_zero(); 4888 Register Src0Reg = getRegForValue(I->getOperand(0)); 4889 if (!Src0Reg) 4890 return false; 4891 4892 if (cast<BinaryOperator>(I)->isExact()) { 4893 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2); 4894 if (!ResultReg) 4895 return false; 4896 updateValueMap(I, ResultReg); 4897 return true; 4898 } 4899 4900 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4901 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne); 4902 if (!AddReg) 4903 return false; 4904 4905 // (Src0 < 0) ? Pow2 - 1 : 0; 4906 if (!emitICmp_ri(VT, Src0Reg, 0)) 4907 return false; 4908 4909 unsigned SelectOpc; 4910 const TargetRegisterClass *RC; 4911 if (VT == MVT::i64) { 4912 SelectOpc = AArch64::CSELXr; 4913 RC = &AArch64::GPR64RegClass; 4914 } else { 4915 SelectOpc = AArch64::CSELWr; 4916 RC = &AArch64::GPR32RegClass; 4917 } 4918 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg, 4919 AArch64CC::LT); 4920 if (!SelectReg) 4921 return false; 4922 4923 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4924 // negate the result. 4925 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4926 unsigned ResultReg; 4927 if (C.isNegative()) 4928 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg, 4929 AArch64_AM::ASR, Lg2); 4930 else 4931 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2); 4932 4933 if (!ResultReg) 4934 return false; 4935 4936 updateValueMap(I, ResultReg); 4937 return true; 4938 } 4939 4940 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4941 /// have to duplicate it for AArch64, because otherwise we would fail during the 4942 /// sign-extend emission. 4943 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4944 Register IdxN = getRegForValue(Idx); 4945 if (IdxN == 0) 4946 // Unhandled operand. Halt "fast" selection and bail. 4947 return 0; 4948 4949 // If the index is smaller or larger than intptr_t, truncate or extend it. 4950 MVT PtrVT = TLI.getPointerTy(DL); 4951 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4952 if (IdxVT.bitsLT(PtrVT)) { 4953 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 4954 } else if (IdxVT.bitsGT(PtrVT)) 4955 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4956 return IdxN; 4957 } 4958 4959 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4960 /// duplicate it for AArch64, because otherwise we would bail out even for 4961 /// simple cases. This is because the standard fastEmit functions don't cover 4962 /// MUL at all and ADD is lowered very inefficientily. 4963 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4964 if (Subtarget->isTargetILP32()) 4965 return false; 4966 4967 Register N = getRegForValue(I->getOperand(0)); 4968 if (!N) 4969 return false; 4970 4971 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4972 // into a single N = N + TotalOffset. 4973 uint64_t TotalOffs = 0; 4974 MVT VT = TLI.getPointerTy(DL); 4975 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4976 GTI != E; ++GTI) { 4977 const Value *Idx = GTI.getOperand(); 4978 if (auto *StTy = GTI.getStructTypeOrNull()) { 4979 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4980 // N = N + Offset 4981 if (Field) 4982 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4983 } else { 4984 // If this is a constant subscript, handle it quickly. 4985 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4986 if (CI->isZero()) 4987 continue; 4988 // N = N + Offset 4989 TotalOffs += GTI.getSequentialElementStride(DL) * 4990 cast<ConstantInt>(CI)->getSExtValue(); 4991 continue; 4992 } 4993 if (TotalOffs) { 4994 N = emitAdd_ri_(VT, N, TotalOffs); 4995 if (!N) 4996 return false; 4997 TotalOffs = 0; 4998 } 4999 5000 // N = N + Idx * ElementSize; 5001 uint64_t ElementSize = GTI.getSequentialElementStride(DL); 5002 unsigned IdxN = getRegForGEPIndex(Idx); 5003 if (!IdxN) 5004 return false; 5005 5006 if (ElementSize != 1) { 5007 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 5008 if (!C) 5009 return false; 5010 IdxN = emitMul_rr(VT, IdxN, C); 5011 if (!IdxN) 5012 return false; 5013 } 5014 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN); 5015 if (!N) 5016 return false; 5017 } 5018 } 5019 if (TotalOffs) { 5020 N = emitAdd_ri_(VT, N, TotalOffs); 5021 if (!N) 5022 return false; 5023 } 5024 updateValueMap(I, N); 5025 return true; 5026 } 5027 5028 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 5029 assert(TM.getOptLevel() == CodeGenOptLevel::None && 5030 "cmpxchg survived AtomicExpand at optlevel > -O0"); 5031 5032 auto *RetPairTy = cast<StructType>(I->getType()); 5033 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5034 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5035 "cmpxchg has a non-i1 status result"); 5036 5037 MVT VT; 5038 if (!isTypeLegal(RetTy, VT)) 5039 return false; 5040 5041 const TargetRegisterClass *ResRC; 5042 unsigned Opc, CmpOpc; 5043 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5044 // extractvalue selection doesn't support that. 5045 if (VT == MVT::i32) { 5046 Opc = AArch64::CMP_SWAP_32; 5047 CmpOpc = AArch64::SUBSWrs; 5048 ResRC = &AArch64::GPR32RegClass; 5049 } else if (VT == MVT::i64) { 5050 Opc = AArch64::CMP_SWAP_64; 5051 CmpOpc = AArch64::SUBSXrs; 5052 ResRC = &AArch64::GPR64RegClass; 5053 } else { 5054 return false; 5055 } 5056 5057 const MCInstrDesc &II = TII.get(Opc); 5058 5059 const Register AddrReg = constrainOperandRegClass( 5060 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 5061 const Register DesiredReg = constrainOperandRegClass( 5062 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 5063 const Register NewReg = constrainOperandRegClass( 5064 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5065 5066 const Register ResultReg1 = createResultReg(ResRC); 5067 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5068 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5069 5070 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5071 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 5072 .addDef(ResultReg1) 5073 .addDef(ScratchReg) 5074 .addUse(AddrReg) 5075 .addUse(DesiredReg) 5076 .addUse(NewReg); 5077 5078 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc)) 5079 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5080 .addUse(ResultReg1) 5081 .addUse(DesiredReg) 5082 .addImm(0); 5083 5084 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr)) 5085 .addDef(ResultReg2) 5086 .addUse(AArch64::WZR) 5087 .addUse(AArch64::WZR) 5088 .addImm(AArch64CC::NE); 5089 5090 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5091 updateValueMap(I, ResultReg1, 2); 5092 return true; 5093 } 5094 5095 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5096 if (TLI.fallBackToDAGISel(*I)) 5097 return false; 5098 switch (I->getOpcode()) { 5099 default: 5100 break; 5101 case Instruction::Add: 5102 case Instruction::Sub: 5103 return selectAddSub(I); 5104 case Instruction::Mul: 5105 return selectMul(I); 5106 case Instruction::SDiv: 5107 return selectSDiv(I); 5108 case Instruction::SRem: 5109 if (!selectBinaryOp(I, ISD::SREM)) 5110 return selectRem(I, ISD::SREM); 5111 return true; 5112 case Instruction::URem: 5113 if (!selectBinaryOp(I, ISD::UREM)) 5114 return selectRem(I, ISD::UREM); 5115 return true; 5116 case Instruction::Shl: 5117 case Instruction::LShr: 5118 case Instruction::AShr: 5119 return selectShift(I); 5120 case Instruction::And: 5121 case Instruction::Or: 5122 case Instruction::Xor: 5123 return selectLogicalOp(I); 5124 case Instruction::Br: 5125 return selectBranch(I); 5126 case Instruction::IndirectBr: 5127 return selectIndirectBr(I); 5128 case Instruction::BitCast: 5129 if (!FastISel::selectBitCast(I)) 5130 return selectBitCast(I); 5131 return true; 5132 case Instruction::FPToSI: 5133 if (!selectCast(I, ISD::FP_TO_SINT)) 5134 return selectFPToInt(I, /*Signed=*/true); 5135 return true; 5136 case Instruction::FPToUI: 5137 return selectFPToInt(I, /*Signed=*/false); 5138 case Instruction::ZExt: 5139 case Instruction::SExt: 5140 return selectIntExt(I); 5141 case Instruction::Trunc: 5142 if (!selectCast(I, ISD::TRUNCATE)) 5143 return selectTrunc(I); 5144 return true; 5145 case Instruction::FPExt: 5146 return selectFPExt(I); 5147 case Instruction::FPTrunc: 5148 return selectFPTrunc(I); 5149 case Instruction::SIToFP: 5150 if (!selectCast(I, ISD::SINT_TO_FP)) 5151 return selectIntToFP(I, /*Signed=*/true); 5152 return true; 5153 case Instruction::UIToFP: 5154 return selectIntToFP(I, /*Signed=*/false); 5155 case Instruction::Load: 5156 return selectLoad(I); 5157 case Instruction::Store: 5158 return selectStore(I); 5159 case Instruction::FCmp: 5160 case Instruction::ICmp: 5161 return selectCmp(I); 5162 case Instruction::Select: 5163 return selectSelect(I); 5164 case Instruction::Ret: 5165 return selectRet(I); 5166 case Instruction::FRem: 5167 return selectFRem(I); 5168 case Instruction::GetElementPtr: 5169 return selectGetElementPtr(I); 5170 case Instruction::AtomicCmpXchg: 5171 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5172 } 5173 5174 // fall-back to target-independent instruction selection. 5175 return selectOperator(I, I->getOpcode()); 5176 } 5177 5178 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5179 const TargetLibraryInfo *LibInfo) { 5180 5181 SMEAttrs CallerAttrs(*FuncInfo.Fn); 5182 if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() || 5183 CallerAttrs.hasStreamingCompatibleInterface()) 5184 return nullptr; 5185 return new AArch64FastISel(FuncInfo, LibInfo); 5186 } 5187