1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/APFloat.h" 23 #include "llvm/ADT/APInt.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/Analysis/BranchProbabilityInfo.h" 27 #include "llvm/CodeGen/CallingConvLower.h" 28 #include "llvm/CodeGen/FastISel.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineConstantPool.h" 33 #include "llvm/CodeGen/MachineFrameInfo.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineMemOperand.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/RuntimeLibcalls.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/Argument.h" 41 #include "llvm/IR/Attributes.h" 42 #include "llvm/IR/BasicBlock.h" 43 #include "llvm/IR/CallingConv.h" 44 #include "llvm/IR/Constant.h" 45 #include "llvm/IR/Constants.h" 46 #include "llvm/IR/DataLayout.h" 47 #include "llvm/IR/DerivedTypes.h" 48 #include "llvm/IR/Function.h" 49 #include "llvm/IR/GetElementPtrTypeIterator.h" 50 #include "llvm/IR/GlobalValue.h" 51 #include "llvm/IR/InstrTypes.h" 52 #include "llvm/IR/Instruction.h" 53 #include "llvm/IR/Instructions.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/Intrinsics.h" 56 #include "llvm/IR/Operator.h" 57 #include "llvm/IR/Type.h" 58 #include "llvm/IR/User.h" 59 #include "llvm/IR/Value.h" 60 #include "llvm/MC/MCInstrDesc.h" 61 #include "llvm/MC/MCRegisterInfo.h" 62 #include "llvm/MC/MCSymbol.h" 63 #include "llvm/Support/AtomicOrdering.h" 64 #include "llvm/Support/Casting.h" 65 #include "llvm/Support/CodeGen.h" 66 #include "llvm/Support/Compiler.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/MachineValueType.h" 69 #include "llvm/Support/MathExtras.h" 70 #include <algorithm> 71 #include <cassert> 72 #include <cstdint> 73 #include <iterator> 74 #include <utility> 75 76 using namespace llvm; 77 78 namespace { 79 80 class AArch64FastISel final : public FastISel { 81 class Address { 82 public: 83 using BaseKind = enum { 84 RegBase, 85 FrameIndexBase 86 }; 87 88 private: 89 BaseKind Kind = RegBase; 90 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 91 union { 92 unsigned Reg; 93 int FI; 94 } Base; 95 unsigned OffsetReg = 0; 96 unsigned Shift = 0; 97 int64_t Offset = 0; 98 const GlobalValue *GV = nullptr; 99 100 public: 101 Address() { Base.Reg = 0; } 102 103 void setKind(BaseKind K) { Kind = K; } 104 BaseKind getKind() const { return Kind; } 105 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 106 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 107 bool isRegBase() const { return Kind == RegBase; } 108 bool isFIBase() const { return Kind == FrameIndexBase; } 109 110 void setReg(unsigned Reg) { 111 assert(isRegBase() && "Invalid base register access!"); 112 Base.Reg = Reg; 113 } 114 115 unsigned getReg() const { 116 assert(isRegBase() && "Invalid base register access!"); 117 return Base.Reg; 118 } 119 120 void setOffsetReg(unsigned Reg) { 121 OffsetReg = Reg; 122 } 123 124 unsigned getOffsetReg() const { 125 return OffsetReg; 126 } 127 128 void setFI(unsigned FI) { 129 assert(isFIBase() && "Invalid base frame index access!"); 130 Base.FI = FI; 131 } 132 133 unsigned getFI() const { 134 assert(isFIBase() && "Invalid base frame index access!"); 135 return Base.FI; 136 } 137 138 void setOffset(int64_t O) { Offset = O; } 139 int64_t getOffset() { return Offset; } 140 void setShift(unsigned S) { Shift = S; } 141 unsigned getShift() { return Shift; } 142 143 void setGlobalValue(const GlobalValue *G) { GV = G; } 144 const GlobalValue *getGlobalValue() { return GV; } 145 }; 146 147 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 148 /// make the right decision when generating code for different targets. 149 const AArch64Subtarget *Subtarget; 150 LLVMContext *Context; 151 152 bool fastLowerArguments() override; 153 bool fastLowerCall(CallLoweringInfo &CLI) override; 154 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 155 156 private: 157 // Selection routines. 158 bool selectAddSub(const Instruction *I); 159 bool selectLogicalOp(const Instruction *I); 160 bool selectLoad(const Instruction *I); 161 bool selectStore(const Instruction *I); 162 bool selectBranch(const Instruction *I); 163 bool selectIndirectBr(const Instruction *I); 164 bool selectCmp(const Instruction *I); 165 bool selectSelect(const Instruction *I); 166 bool selectFPExt(const Instruction *I); 167 bool selectFPTrunc(const Instruction *I); 168 bool selectFPToInt(const Instruction *I, bool Signed); 169 bool selectIntToFP(const Instruction *I, bool Signed); 170 bool selectRem(const Instruction *I, unsigned ISDOpcode); 171 bool selectRet(const Instruction *I); 172 bool selectTrunc(const Instruction *I); 173 bool selectIntExt(const Instruction *I); 174 bool selectMul(const Instruction *I); 175 bool selectShift(const Instruction *I); 176 bool selectBitCast(const Instruction *I); 177 bool selectFRem(const Instruction *I); 178 bool selectSDiv(const Instruction *I); 179 bool selectGetElementPtr(const Instruction *I); 180 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 181 182 // Utility helper routines. 183 bool isTypeLegal(Type *Ty, MVT &VT); 184 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 185 bool isValueAvailable(const Value *V) const; 186 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 187 bool computeCallAddress(const Value *V, Address &Addr); 188 bool simplifyAddress(Address &Addr, MVT VT); 189 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 190 MachineMemOperand::Flags Flags, 191 unsigned ScaleFactor, MachineMemOperand *MMO); 192 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); 193 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 194 MaybeAlign Alignment); 195 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 196 const Value *Cond); 197 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 198 bool optimizeSelect(const SelectInst *SI); 199 unsigned getRegForGEPIndex(const Value *Idx); 200 201 // Emit helper routines. 202 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 203 const Value *RHS, bool SetFlags = false, 204 bool WantResult = true, bool IsZExt = false); 205 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 206 unsigned RHSReg, bool SetFlags = false, 207 bool WantResult = true); 208 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 209 uint64_t Imm, bool SetFlags = false, 210 bool WantResult = true); 211 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 212 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, 213 uint64_t ShiftImm, bool SetFlags = false, 214 bool WantResult = true); 215 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 216 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, 217 uint64_t ShiftImm, bool SetFlags = false, 218 bool WantResult = true); 219 220 // Emit functions. 221 bool emitCompareAndBranch(const BranchInst *BI); 222 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 223 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 225 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 226 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 227 MachineMemOperand *MMO = nullptr); 228 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 229 MachineMemOperand *MMO = nullptr); 230 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 231 MachineMemOperand *MMO = nullptr); 232 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 233 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 235 bool SetFlags = false, bool WantResult = true, 236 bool IsZExt = false); 237 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); 238 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 239 bool SetFlags = false, bool WantResult = true, 240 bool IsZExt = false); 241 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 242 bool WantResult = true); 243 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 244 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 245 bool WantResult = true); 246 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 247 const Value *RHS); 248 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 249 uint64_t Imm); 250 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 251 unsigned RHSReg, uint64_t ShiftImm); 252 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 253 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); 254 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 255 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 256 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 257 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 258 bool IsZExt = true); 259 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 260 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 261 bool IsZExt = true); 262 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 263 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 264 bool IsZExt = false); 265 266 unsigned materializeInt(const ConstantInt *CI, MVT VT); 267 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 268 unsigned materializeGV(const GlobalValue *GV); 269 270 // Call handling routines. 271 private: 272 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 273 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 274 unsigned &NumBytes); 275 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 276 277 public: 278 // Backend specific FastISel code. 279 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 280 unsigned fastMaterializeConstant(const Constant *C) override; 281 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 282 283 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 284 const TargetLibraryInfo *LibInfo) 285 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 286 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); 287 Context = &FuncInfo.Fn->getContext(); 288 } 289 290 bool fastSelectInstruction(const Instruction *I) override; 291 292 #include "AArch64GenFastISel.inc" 293 }; 294 295 } // end anonymous namespace 296 297 /// Check if the sign-/zero-extend will be a noop. 298 static bool isIntExtFree(const Instruction *I) { 299 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 300 "Unexpected integer extend instruction."); 301 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 302 "Unexpected value type."); 303 bool IsZExt = isa<ZExtInst>(I); 304 305 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 306 if (LI->hasOneUse()) 307 return true; 308 309 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 310 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 311 return true; 312 313 return false; 314 } 315 316 /// Determine the implicit scale factor that is applied by a memory 317 /// operation for a given value type. 318 static unsigned getImplicitScaleFactor(MVT VT) { 319 switch (VT.SimpleTy) { 320 default: 321 return 0; // invalid 322 case MVT::i1: // fall-through 323 case MVT::i8: 324 return 1; 325 case MVT::i16: 326 return 2; 327 case MVT::i32: // fall-through 328 case MVT::f32: 329 return 4; 330 case MVT::i64: // fall-through 331 case MVT::f64: 332 return 8; 333 } 334 } 335 336 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 337 if (CC == CallingConv::WebKit_JS) 338 return CC_AArch64_WebKit_JS; 339 if (CC == CallingConv::GHC) 340 return CC_AArch64_GHC; 341 if (CC == CallingConv::CFGuard_Check) 342 return CC_AArch64_Win64_CFGuard_Check; 343 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 344 } 345 346 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 347 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 348 "Alloca should always return a pointer."); 349 350 // Don't handle dynamic allocas. 351 if (!FuncInfo.StaticAllocaMap.count(AI)) 352 return 0; 353 354 DenseMap<const AllocaInst *, int>::iterator SI = 355 FuncInfo.StaticAllocaMap.find(AI); 356 357 if (SI != FuncInfo.StaticAllocaMap.end()) { 358 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 359 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 360 ResultReg) 361 .addFrameIndex(SI->second) 362 .addImm(0) 363 .addImm(0); 364 return ResultReg; 365 } 366 367 return 0; 368 } 369 370 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 371 if (VT > MVT::i64) 372 return 0; 373 374 if (!CI->isZero()) 375 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 376 377 // Create a copy from the zero register to materialize a "0" value. 378 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 379 : &AArch64::GPR32RegClass; 380 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 381 Register ResultReg = createResultReg(RC); 382 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 383 ResultReg).addReg(ZeroReg, getKillRegState(true)); 384 return ResultReg; 385 } 386 387 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 388 // Positive zero (+0.0) has to be materialized with a fmov from the zero 389 // register, because the immediate version of fmov cannot encode zero. 390 if (CFP->isNullValue()) 391 return fastMaterializeFloatZero(CFP); 392 393 if (VT != MVT::f32 && VT != MVT::f64) 394 return 0; 395 396 const APFloat Val = CFP->getValueAPF(); 397 bool Is64Bit = (VT == MVT::f64); 398 // This checks to see if we can use FMOV instructions to materialize 399 // a constant, otherwise we have to materialize via the constant pool. 400 int Imm = 401 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 402 if (Imm != -1) { 403 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 404 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 405 } 406 407 // For the large code model materialize the FP constant in code. 408 if (TM.getCodeModel() == CodeModel::Large) { 409 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 410 const TargetRegisterClass *RC = Is64Bit ? 411 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 412 413 Register TmpReg = createResultReg(RC); 414 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg) 415 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 416 417 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 418 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 419 TII.get(TargetOpcode::COPY), ResultReg) 420 .addReg(TmpReg, getKillRegState(true)); 421 422 return ResultReg; 423 } 424 425 // Materialize via constant pool. MachineConstantPool wants an explicit 426 // alignment. 427 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 428 429 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 430 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 432 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 433 434 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 435 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 436 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 437 .addReg(ADRPReg) 438 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 439 return ResultReg; 440 } 441 442 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 443 // We can't handle thread-local variables quickly yet. 444 if (GV->isThreadLocal()) 445 return 0; 446 447 // MachO still uses GOT for large code-model accesses, but ELF requires 448 // movz/movk sequences, which FastISel doesn't handle yet. 449 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 450 return 0; 451 452 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 453 454 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 455 if (!DestEVT.isSimple()) 456 return 0; 457 458 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 459 unsigned ResultReg; 460 461 if (OpFlags & AArch64II::MO_GOT) { 462 // ADRP + LDRX 463 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 464 ADRPReg) 465 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 466 467 unsigned LdrOpc; 468 if (Subtarget->isTargetILP32()) { 469 ResultReg = createResultReg(&AArch64::GPR32RegClass); 470 LdrOpc = AArch64::LDRWui; 471 } else { 472 ResultReg = createResultReg(&AArch64::GPR64RegClass); 473 LdrOpc = AArch64::LDRXui; 474 } 475 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc), 476 ResultReg) 477 .addReg(ADRPReg) 478 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 479 AArch64II::MO_NC | OpFlags); 480 if (!Subtarget->isTargetILP32()) 481 return ResultReg; 482 483 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 484 // so we must extend the result on ILP32. 485 Register Result64 = createResultReg(&AArch64::GPR64RegClass); 486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 487 TII.get(TargetOpcode::SUBREG_TO_REG)) 488 .addDef(Result64) 489 .addImm(0) 490 .addReg(ResultReg, RegState::Kill) 491 .addImm(AArch64::sub_32); 492 return Result64; 493 } else { 494 // ADRP + ADDX 495 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 496 ADRPReg) 497 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 498 499 if (OpFlags & AArch64II::MO_TAGGED) { 500 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 501 // We do so by creating a MOVK that sets bits 48-63 of the register to 502 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 503 // the small code model so we can assume a binary size of <= 4GB, which 504 // makes the untagged PC relative offset positive. The binary must also be 505 // loaded into address range [0, 2^48). Both of these properties need to 506 // be ensured at runtime when using tagged addresses. 507 // 508 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that 509 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands 510 // are not exactly 1:1 with FastISel so we cannot easily abstract this 511 // out. At some point, it would be nice to find a way to not have this 512 // duplciate code. 513 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass); 514 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi), 515 DstReg) 516 .addReg(ADRPReg) 517 .addGlobalAddress(GV, /*Offset=*/0x100000000, 518 AArch64II::MO_PREL | AArch64II::MO_G3) 519 .addImm(48); 520 ADRPReg = DstReg; 521 } 522 523 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 524 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 525 ResultReg) 526 .addReg(ADRPReg) 527 .addGlobalAddress(GV, 0, 528 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 529 .addImm(0); 530 } 531 return ResultReg; 532 } 533 534 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 535 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 536 537 // Only handle simple types. 538 if (!CEVT.isSimple()) 539 return 0; 540 MVT VT = CEVT.getSimpleVT(); 541 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 542 // 'null' pointers need to have a somewhat special treatment. 543 if (isa<ConstantPointerNull>(C)) { 544 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 545 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 546 } 547 548 if (const auto *CI = dyn_cast<ConstantInt>(C)) 549 return materializeInt(CI, VT); 550 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 551 return materializeFP(CFP, VT); 552 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 553 return materializeGV(GV); 554 555 return 0; 556 } 557 558 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 559 assert(CFP->isNullValue() && 560 "Floating-point constant is not a positive zero."); 561 MVT VT; 562 if (!isTypeLegal(CFP->getType(), VT)) 563 return 0; 564 565 if (VT != MVT::f32 && VT != MVT::f64) 566 return 0; 567 568 bool Is64Bit = (VT == MVT::f64); 569 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 570 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 571 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg); 572 } 573 574 /// Check if the multiply is by a power-of-2 constant. 575 static bool isMulPowOf2(const Value *I) { 576 if (const auto *MI = dyn_cast<MulOperator>(I)) { 577 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 578 if (C->getValue().isPowerOf2()) 579 return true; 580 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 581 if (C->getValue().isPowerOf2()) 582 return true; 583 } 584 return false; 585 } 586 587 // Computes the address to get to an object. 588 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 589 { 590 const User *U = nullptr; 591 unsigned Opcode = Instruction::UserOp1; 592 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 593 // Don't walk into other basic blocks unless the object is an alloca from 594 // another block, otherwise it may not have a virtual register assigned. 595 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 596 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 597 Opcode = I->getOpcode(); 598 U = I; 599 } 600 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 601 Opcode = C->getOpcode(); 602 U = C; 603 } 604 605 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 606 if (Ty->getAddressSpace() > 255) 607 // Fast instruction selection doesn't support the special 608 // address spaces. 609 return false; 610 611 switch (Opcode) { 612 default: 613 break; 614 case Instruction::BitCast: 615 // Look through bitcasts. 616 return computeAddress(U->getOperand(0), Addr, Ty); 617 618 case Instruction::IntToPtr: 619 // Look past no-op inttoptrs. 620 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 621 TLI.getPointerTy(DL)) 622 return computeAddress(U->getOperand(0), Addr, Ty); 623 break; 624 625 case Instruction::PtrToInt: 626 // Look past no-op ptrtoints. 627 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 628 return computeAddress(U->getOperand(0), Addr, Ty); 629 break; 630 631 case Instruction::GetElementPtr: { 632 Address SavedAddr = Addr; 633 uint64_t TmpOffset = Addr.getOffset(); 634 635 // Iterate through the GEP folding the constants into offsets where 636 // we can. 637 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 638 GTI != E; ++GTI) { 639 const Value *Op = GTI.getOperand(); 640 if (StructType *STy = GTI.getStructTypeOrNull()) { 641 const StructLayout *SL = DL.getStructLayout(STy); 642 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 643 TmpOffset += SL->getElementOffset(Idx); 644 } else { 645 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 646 while (true) { 647 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 648 // Constant-offset addressing. 649 TmpOffset += CI->getSExtValue() * S; 650 break; 651 } 652 if (canFoldAddIntoGEP(U, Op)) { 653 // A compatible add with a constant operand. Fold the constant. 654 ConstantInt *CI = 655 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 656 TmpOffset += CI->getSExtValue() * S; 657 // Iterate on the other operand. 658 Op = cast<AddOperator>(Op)->getOperand(0); 659 continue; 660 } 661 // Unsupported 662 goto unsupported_gep; 663 } 664 } 665 } 666 667 // Try to grab the base operand now. 668 Addr.setOffset(TmpOffset); 669 if (computeAddress(U->getOperand(0), Addr, Ty)) 670 return true; 671 672 // We failed, restore everything and try the other options. 673 Addr = SavedAddr; 674 675 unsupported_gep: 676 break; 677 } 678 case Instruction::Alloca: { 679 const AllocaInst *AI = cast<AllocaInst>(Obj); 680 DenseMap<const AllocaInst *, int>::iterator SI = 681 FuncInfo.StaticAllocaMap.find(AI); 682 if (SI != FuncInfo.StaticAllocaMap.end()) { 683 Addr.setKind(Address::FrameIndexBase); 684 Addr.setFI(SI->second); 685 return true; 686 } 687 break; 688 } 689 case Instruction::Add: { 690 // Adds of constants are common and easy enough. 691 const Value *LHS = U->getOperand(0); 692 const Value *RHS = U->getOperand(1); 693 694 if (isa<ConstantInt>(LHS)) 695 std::swap(LHS, RHS); 696 697 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 698 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 699 return computeAddress(LHS, Addr, Ty); 700 } 701 702 Address Backup = Addr; 703 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 704 return true; 705 Addr = Backup; 706 707 break; 708 } 709 case Instruction::Sub: { 710 // Subs of constants are common and easy enough. 711 const Value *LHS = U->getOperand(0); 712 const Value *RHS = U->getOperand(1); 713 714 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 715 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 716 return computeAddress(LHS, Addr, Ty); 717 } 718 break; 719 } 720 case Instruction::Shl: { 721 if (Addr.getOffsetReg()) 722 break; 723 724 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 725 if (!CI) 726 break; 727 728 unsigned Val = CI->getZExtValue(); 729 if (Val < 1 || Val > 3) 730 break; 731 732 uint64_t NumBytes = 0; 733 if (Ty && Ty->isSized()) { 734 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 735 NumBytes = NumBits / 8; 736 if (!isPowerOf2_64(NumBits)) 737 NumBytes = 0; 738 } 739 740 if (NumBytes != (1ULL << Val)) 741 break; 742 743 Addr.setShift(Val); 744 Addr.setExtendType(AArch64_AM::LSL); 745 746 const Value *Src = U->getOperand(0); 747 if (const auto *I = dyn_cast<Instruction>(Src)) { 748 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 749 // Fold the zext or sext when it won't become a noop. 750 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 751 if (!isIntExtFree(ZE) && 752 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 753 Addr.setExtendType(AArch64_AM::UXTW); 754 Src = ZE->getOperand(0); 755 } 756 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 757 if (!isIntExtFree(SE) && 758 SE->getOperand(0)->getType()->isIntegerTy(32)) { 759 Addr.setExtendType(AArch64_AM::SXTW); 760 Src = SE->getOperand(0); 761 } 762 } 763 } 764 } 765 766 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 767 if (AI->getOpcode() == Instruction::And) { 768 const Value *LHS = AI->getOperand(0); 769 const Value *RHS = AI->getOperand(1); 770 771 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 772 if (C->getValue() == 0xffffffff) 773 std::swap(LHS, RHS); 774 775 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 776 if (C->getValue() == 0xffffffff) { 777 Addr.setExtendType(AArch64_AM::UXTW); 778 Register Reg = getRegForValue(LHS); 779 if (!Reg) 780 return false; 781 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 782 Addr.setOffsetReg(Reg); 783 return true; 784 } 785 } 786 787 Register Reg = getRegForValue(Src); 788 if (!Reg) 789 return false; 790 Addr.setOffsetReg(Reg); 791 return true; 792 } 793 case Instruction::Mul: { 794 if (Addr.getOffsetReg()) 795 break; 796 797 if (!isMulPowOf2(U)) 798 break; 799 800 const Value *LHS = U->getOperand(0); 801 const Value *RHS = U->getOperand(1); 802 803 // Canonicalize power-of-2 value to the RHS. 804 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 805 if (C->getValue().isPowerOf2()) 806 std::swap(LHS, RHS); 807 808 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 809 const auto *C = cast<ConstantInt>(RHS); 810 unsigned Val = C->getValue().logBase2(); 811 if (Val < 1 || Val > 3) 812 break; 813 814 uint64_t NumBytes = 0; 815 if (Ty && Ty->isSized()) { 816 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 817 NumBytes = NumBits / 8; 818 if (!isPowerOf2_64(NumBits)) 819 NumBytes = 0; 820 } 821 822 if (NumBytes != (1ULL << Val)) 823 break; 824 825 Addr.setShift(Val); 826 Addr.setExtendType(AArch64_AM::LSL); 827 828 const Value *Src = LHS; 829 if (const auto *I = dyn_cast<Instruction>(Src)) { 830 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 831 // Fold the zext or sext when it won't become a noop. 832 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 833 if (!isIntExtFree(ZE) && 834 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 835 Addr.setExtendType(AArch64_AM::UXTW); 836 Src = ZE->getOperand(0); 837 } 838 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 839 if (!isIntExtFree(SE) && 840 SE->getOperand(0)->getType()->isIntegerTy(32)) { 841 Addr.setExtendType(AArch64_AM::SXTW); 842 Src = SE->getOperand(0); 843 } 844 } 845 } 846 } 847 848 Register Reg = getRegForValue(Src); 849 if (!Reg) 850 return false; 851 Addr.setOffsetReg(Reg); 852 return true; 853 } 854 case Instruction::And: { 855 if (Addr.getOffsetReg()) 856 break; 857 858 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 859 break; 860 861 const Value *LHS = U->getOperand(0); 862 const Value *RHS = U->getOperand(1); 863 864 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 865 if (C->getValue() == 0xffffffff) 866 std::swap(LHS, RHS); 867 868 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 869 if (C->getValue() == 0xffffffff) { 870 Addr.setShift(0); 871 Addr.setExtendType(AArch64_AM::LSL); 872 Addr.setExtendType(AArch64_AM::UXTW); 873 874 Register Reg = getRegForValue(LHS); 875 if (!Reg) 876 return false; 877 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 878 Addr.setOffsetReg(Reg); 879 return true; 880 } 881 break; 882 } 883 case Instruction::SExt: 884 case Instruction::ZExt: { 885 if (!Addr.getReg() || Addr.getOffsetReg()) 886 break; 887 888 const Value *Src = nullptr; 889 // Fold the zext or sext when it won't become a noop. 890 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 891 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 892 Addr.setExtendType(AArch64_AM::UXTW); 893 Src = ZE->getOperand(0); 894 } 895 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 896 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 897 Addr.setExtendType(AArch64_AM::SXTW); 898 Src = SE->getOperand(0); 899 } 900 } 901 902 if (!Src) 903 break; 904 905 Addr.setShift(0); 906 Register Reg = getRegForValue(Src); 907 if (!Reg) 908 return false; 909 Addr.setOffsetReg(Reg); 910 return true; 911 } 912 } // end switch 913 914 if (Addr.isRegBase() && !Addr.getReg()) { 915 Register Reg = getRegForValue(Obj); 916 if (!Reg) 917 return false; 918 Addr.setReg(Reg); 919 return true; 920 } 921 922 if (!Addr.getOffsetReg()) { 923 Register Reg = getRegForValue(Obj); 924 if (!Reg) 925 return false; 926 Addr.setOffsetReg(Reg); 927 return true; 928 } 929 930 return false; 931 } 932 933 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 934 const User *U = nullptr; 935 unsigned Opcode = Instruction::UserOp1; 936 bool InMBB = true; 937 938 if (const auto *I = dyn_cast<Instruction>(V)) { 939 Opcode = I->getOpcode(); 940 U = I; 941 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 942 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 943 Opcode = C->getOpcode(); 944 U = C; 945 } 946 947 switch (Opcode) { 948 default: break; 949 case Instruction::BitCast: 950 // Look past bitcasts if its operand is in the same BB. 951 if (InMBB) 952 return computeCallAddress(U->getOperand(0), Addr); 953 break; 954 case Instruction::IntToPtr: 955 // Look past no-op inttoptrs if its operand is in the same BB. 956 if (InMBB && 957 TLI.getValueType(DL, U->getOperand(0)->getType()) == 958 TLI.getPointerTy(DL)) 959 return computeCallAddress(U->getOperand(0), Addr); 960 break; 961 case Instruction::PtrToInt: 962 // Look past no-op ptrtoints if its operand is in the same BB. 963 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 964 return computeCallAddress(U->getOperand(0), Addr); 965 break; 966 } 967 968 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 969 Addr.setGlobalValue(GV); 970 return true; 971 } 972 973 // If all else fails, try to materialize the value in a register. 974 if (!Addr.getGlobalValue()) { 975 Addr.setReg(getRegForValue(V)); 976 return Addr.getReg() != 0; 977 } 978 979 return false; 980 } 981 982 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 983 EVT evt = TLI.getValueType(DL, Ty, true); 984 985 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 986 return false; 987 988 // Only handle simple types. 989 if (evt == MVT::Other || !evt.isSimple()) 990 return false; 991 VT = evt.getSimpleVT(); 992 993 // This is a legal type, but it's not something we handle in fast-isel. 994 if (VT == MVT::f128) 995 return false; 996 997 // Handle all other legal types, i.e. a register that will directly hold this 998 // value. 999 return TLI.isTypeLegal(VT); 1000 } 1001 1002 /// Determine if the value type is supported by FastISel. 1003 /// 1004 /// FastISel for AArch64 can handle more value types than are legal. This adds 1005 /// simple value type such as i1, i8, and i16. 1006 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 1007 if (Ty->isVectorTy() && !IsVectorAllowed) 1008 return false; 1009 1010 if (isTypeLegal(Ty, VT)) 1011 return true; 1012 1013 // If this is a type than can be sign or zero-extended to a basic operation 1014 // go ahead and accept it now. 1015 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 1016 return true; 1017 1018 return false; 1019 } 1020 1021 bool AArch64FastISel::isValueAvailable(const Value *V) const { 1022 if (!isa<Instruction>(V)) 1023 return true; 1024 1025 const auto *I = cast<Instruction>(V); 1026 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 1027 } 1028 1029 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1030 if (Subtarget->isTargetILP32()) 1031 return false; 1032 1033 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1034 if (!ScaleFactor) 1035 return false; 1036 1037 bool ImmediateOffsetNeedsLowering = false; 1038 bool RegisterOffsetNeedsLowering = false; 1039 int64_t Offset = Addr.getOffset(); 1040 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1041 ImmediateOffsetNeedsLowering = true; 1042 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1043 !isUInt<12>(Offset / ScaleFactor)) 1044 ImmediateOffsetNeedsLowering = true; 1045 1046 // Cannot encode an offset register and an immediate offset in the same 1047 // instruction. Fold the immediate offset into the load/store instruction and 1048 // emit an additional add to take care of the offset register. 1049 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1050 RegisterOffsetNeedsLowering = true; 1051 1052 // Cannot encode zero register as base. 1053 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1054 RegisterOffsetNeedsLowering = true; 1055 1056 // If this is a stack pointer and the offset needs to be simplified then put 1057 // the alloca address into a register, set the base type back to register and 1058 // continue. This should almost never happen. 1059 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1060 { 1061 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1062 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 1063 ResultReg) 1064 .addFrameIndex(Addr.getFI()) 1065 .addImm(0) 1066 .addImm(0); 1067 Addr.setKind(Address::RegBase); 1068 Addr.setReg(ResultReg); 1069 } 1070 1071 if (RegisterOffsetNeedsLowering) { 1072 unsigned ResultReg = 0; 1073 if (Addr.getReg()) { 1074 if (Addr.getExtendType() == AArch64_AM::SXTW || 1075 Addr.getExtendType() == AArch64_AM::UXTW ) 1076 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1077 Addr.getOffsetReg(), Addr.getExtendType(), 1078 Addr.getShift()); 1079 else 1080 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1081 Addr.getOffsetReg(), AArch64_AM::LSL, 1082 Addr.getShift()); 1083 } else { 1084 if (Addr.getExtendType() == AArch64_AM::UXTW) 1085 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1086 Addr.getShift(), /*IsZExt=*/true); 1087 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1089 Addr.getShift(), /*IsZExt=*/false); 1090 else 1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1092 Addr.getShift()); 1093 } 1094 if (!ResultReg) 1095 return false; 1096 1097 Addr.setReg(ResultReg); 1098 Addr.setOffsetReg(0); 1099 Addr.setShift(0); 1100 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1101 } 1102 1103 // Since the offset is too large for the load/store instruction get the 1104 // reg+offset into a register. 1105 if (ImmediateOffsetNeedsLowering) { 1106 unsigned ResultReg; 1107 if (Addr.getReg()) 1108 // Try to fold the immediate into the add instruction. 1109 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); 1110 else 1111 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1112 1113 if (!ResultReg) 1114 return false; 1115 Addr.setReg(ResultReg); 1116 Addr.setOffset(0); 1117 } 1118 return true; 1119 } 1120 1121 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1122 const MachineInstrBuilder &MIB, 1123 MachineMemOperand::Flags Flags, 1124 unsigned ScaleFactor, 1125 MachineMemOperand *MMO) { 1126 int64_t Offset = Addr.getOffset() / ScaleFactor; 1127 // Frame base works a bit differently. Handle it separately. 1128 if (Addr.isFIBase()) { 1129 int FI = Addr.getFI(); 1130 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1131 // and alignment should be based on the VT. 1132 MMO = FuncInfo.MF->getMachineMemOperand( 1133 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1134 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1135 // Now add the rest of the operands. 1136 MIB.addFrameIndex(FI).addImm(Offset); 1137 } else { 1138 assert(Addr.isRegBase() && "Unexpected address kind."); 1139 const MCInstrDesc &II = MIB->getDesc(); 1140 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1141 Addr.setReg( 1142 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1143 Addr.setOffsetReg( 1144 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1145 if (Addr.getOffsetReg()) { 1146 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1147 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1148 Addr.getExtendType() == AArch64_AM::SXTX; 1149 MIB.addReg(Addr.getReg()); 1150 MIB.addReg(Addr.getOffsetReg()); 1151 MIB.addImm(IsSigned); 1152 MIB.addImm(Addr.getShift() != 0); 1153 } else 1154 MIB.addReg(Addr.getReg()).addImm(Offset); 1155 } 1156 1157 if (MMO) 1158 MIB.addMemOperand(MMO); 1159 } 1160 1161 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1162 const Value *RHS, bool SetFlags, 1163 bool WantResult, bool IsZExt) { 1164 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1165 bool NeedExtend = false; 1166 switch (RetVT.SimpleTy) { 1167 default: 1168 return 0; 1169 case MVT::i1: 1170 NeedExtend = true; 1171 break; 1172 case MVT::i8: 1173 NeedExtend = true; 1174 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1175 break; 1176 case MVT::i16: 1177 NeedExtend = true; 1178 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1179 break; 1180 case MVT::i32: // fall-through 1181 case MVT::i64: 1182 break; 1183 } 1184 MVT SrcVT = RetVT; 1185 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1186 1187 // Canonicalize immediates to the RHS first. 1188 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1189 std::swap(LHS, RHS); 1190 1191 // Canonicalize mul by power of 2 to the RHS. 1192 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1193 if (isMulPowOf2(LHS)) 1194 std::swap(LHS, RHS); 1195 1196 // Canonicalize shift immediate to the RHS. 1197 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1198 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1199 if (isa<ConstantInt>(SI->getOperand(1))) 1200 if (SI->getOpcode() == Instruction::Shl || 1201 SI->getOpcode() == Instruction::LShr || 1202 SI->getOpcode() == Instruction::AShr ) 1203 std::swap(LHS, RHS); 1204 1205 Register LHSReg = getRegForValue(LHS); 1206 if (!LHSReg) 1207 return 0; 1208 1209 if (NeedExtend) 1210 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1211 1212 unsigned ResultReg = 0; 1213 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1214 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1215 if (C->isNegative()) 1216 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags, 1217 WantResult); 1218 else 1219 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, 1220 WantResult); 1221 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1222 if (C->isNullValue()) 1223 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult); 1224 1225 if (ResultReg) 1226 return ResultReg; 1227 1228 // Only extend the RHS within the instruction if there is a valid extend type. 1229 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1230 isValueAvailable(RHS)) { 1231 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1232 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1233 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1234 Register RHSReg = getRegForValue(SI->getOperand(0)); 1235 if (!RHSReg) 1236 return 0; 1237 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 1238 C->getZExtValue(), SetFlags, WantResult); 1239 } 1240 Register RHSReg = getRegForValue(RHS); 1241 if (!RHSReg) 1242 return 0; 1243 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0, 1244 SetFlags, WantResult); 1245 } 1246 1247 // Check if the mul can be folded into the instruction. 1248 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1249 if (isMulPowOf2(RHS)) { 1250 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1251 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1252 1253 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1254 if (C->getValue().isPowerOf2()) 1255 std::swap(MulLHS, MulRHS); 1256 1257 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1258 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1259 Register RHSReg = getRegForValue(MulLHS); 1260 if (!RHSReg) 1261 return 0; 1262 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL, 1263 ShiftVal, SetFlags, WantResult); 1264 if (ResultReg) 1265 return ResultReg; 1266 } 1267 } 1268 1269 // Check if the shift can be folded into the instruction. 1270 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1271 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1272 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1273 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1274 switch (SI->getOpcode()) { 1275 default: break; 1276 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1277 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1278 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1279 } 1280 uint64_t ShiftVal = C->getZExtValue(); 1281 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1282 Register RHSReg = getRegForValue(SI->getOperand(0)); 1283 if (!RHSReg) 1284 return 0; 1285 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, 1286 ShiftVal, SetFlags, WantResult); 1287 if (ResultReg) 1288 return ResultReg; 1289 } 1290 } 1291 } 1292 } 1293 1294 Register RHSReg = getRegForValue(RHS); 1295 if (!RHSReg) 1296 return 0; 1297 1298 if (NeedExtend) 1299 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1300 1301 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); 1302 } 1303 1304 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1305 unsigned RHSReg, bool SetFlags, 1306 bool WantResult) { 1307 assert(LHSReg && RHSReg && "Invalid register number."); 1308 1309 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1310 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1311 return 0; 1312 1313 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1314 return 0; 1315 1316 static const unsigned OpcTable[2][2][2] = { 1317 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1318 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1319 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1320 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1321 }; 1322 bool Is64Bit = RetVT == MVT::i64; 1323 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1324 const TargetRegisterClass *RC = 1325 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1326 unsigned ResultReg; 1327 if (WantResult) 1328 ResultReg = createResultReg(RC); 1329 else 1330 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1331 1332 const MCInstrDesc &II = TII.get(Opc); 1333 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1334 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1335 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1336 .addReg(LHSReg) 1337 .addReg(RHSReg); 1338 return ResultReg; 1339 } 1340 1341 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1342 uint64_t Imm, bool SetFlags, 1343 bool WantResult) { 1344 assert(LHSReg && "Invalid register number."); 1345 1346 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1347 return 0; 1348 1349 unsigned ShiftImm; 1350 if (isUInt<12>(Imm)) 1351 ShiftImm = 0; 1352 else if ((Imm & 0xfff000) == Imm) { 1353 ShiftImm = 12; 1354 Imm >>= 12; 1355 } else 1356 return 0; 1357 1358 static const unsigned OpcTable[2][2][2] = { 1359 { { AArch64::SUBWri, AArch64::SUBXri }, 1360 { AArch64::ADDWri, AArch64::ADDXri } }, 1361 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1362 { AArch64::ADDSWri, AArch64::ADDSXri } } 1363 }; 1364 bool Is64Bit = RetVT == MVT::i64; 1365 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1366 const TargetRegisterClass *RC; 1367 if (SetFlags) 1368 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1369 else 1370 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1371 unsigned ResultReg; 1372 if (WantResult) 1373 ResultReg = createResultReg(RC); 1374 else 1375 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1376 1377 const MCInstrDesc &II = TII.get(Opc); 1378 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1379 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1380 .addReg(LHSReg) 1381 .addImm(Imm) 1382 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1383 return ResultReg; 1384 } 1385 1386 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1387 unsigned RHSReg, 1388 AArch64_AM::ShiftExtendType ShiftType, 1389 uint64_t ShiftImm, bool SetFlags, 1390 bool WantResult) { 1391 assert(LHSReg && RHSReg && "Invalid register number."); 1392 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1393 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1394 1395 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1396 return 0; 1397 1398 // Don't deal with undefined shifts. 1399 if (ShiftImm >= RetVT.getSizeInBits()) 1400 return 0; 1401 1402 static const unsigned OpcTable[2][2][2] = { 1403 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1404 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1405 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1406 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1407 }; 1408 bool Is64Bit = RetVT == MVT::i64; 1409 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1410 const TargetRegisterClass *RC = 1411 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1412 unsigned ResultReg; 1413 if (WantResult) 1414 ResultReg = createResultReg(RC); 1415 else 1416 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1417 1418 const MCInstrDesc &II = TII.get(Opc); 1419 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1420 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1422 .addReg(LHSReg) 1423 .addReg(RHSReg) 1424 .addImm(getShifterImm(ShiftType, ShiftImm)); 1425 return ResultReg; 1426 } 1427 1428 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1429 unsigned RHSReg, 1430 AArch64_AM::ShiftExtendType ExtType, 1431 uint64_t ShiftImm, bool SetFlags, 1432 bool WantResult) { 1433 assert(LHSReg && RHSReg && "Invalid register number."); 1434 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1435 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1436 1437 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1438 return 0; 1439 1440 if (ShiftImm >= 4) 1441 return 0; 1442 1443 static const unsigned OpcTable[2][2][2] = { 1444 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1445 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1446 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1447 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1448 }; 1449 bool Is64Bit = RetVT == MVT::i64; 1450 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1451 const TargetRegisterClass *RC = nullptr; 1452 if (SetFlags) 1453 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1454 else 1455 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1456 unsigned ResultReg; 1457 if (WantResult) 1458 ResultReg = createResultReg(RC); 1459 else 1460 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1461 1462 const MCInstrDesc &II = TII.get(Opc); 1463 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1464 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1465 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1466 .addReg(LHSReg) 1467 .addReg(RHSReg) 1468 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1469 return ResultReg; 1470 } 1471 1472 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1473 Type *Ty = LHS->getType(); 1474 EVT EVT = TLI.getValueType(DL, Ty, true); 1475 if (!EVT.isSimple()) 1476 return false; 1477 MVT VT = EVT.getSimpleVT(); 1478 1479 switch (VT.SimpleTy) { 1480 default: 1481 return false; 1482 case MVT::i1: 1483 case MVT::i8: 1484 case MVT::i16: 1485 case MVT::i32: 1486 case MVT::i64: 1487 return emitICmp(VT, LHS, RHS, IsZExt); 1488 case MVT::f32: 1489 case MVT::f64: 1490 return emitFCmp(VT, LHS, RHS); 1491 } 1492 } 1493 1494 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1495 bool IsZExt) { 1496 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1497 IsZExt) != 0; 1498 } 1499 1500 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { 1501 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, 1502 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1503 } 1504 1505 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1506 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1507 return false; 1508 1509 // Check to see if the 2nd operand is a constant that we can encode directly 1510 // in the compare. 1511 bool UseImm = false; 1512 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1513 if (CFP->isZero() && !CFP->isNegative()) 1514 UseImm = true; 1515 1516 Register LHSReg = getRegForValue(LHS); 1517 if (!LHSReg) 1518 return false; 1519 1520 if (UseImm) { 1521 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1522 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1523 .addReg(LHSReg); 1524 return true; 1525 } 1526 1527 Register RHSReg = getRegForValue(RHS); 1528 if (!RHSReg) 1529 return false; 1530 1531 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1532 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1533 .addReg(LHSReg) 1534 .addReg(RHSReg); 1535 return true; 1536 } 1537 1538 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1539 bool SetFlags, bool WantResult, bool IsZExt) { 1540 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1541 IsZExt); 1542 } 1543 1544 /// This method is a wrapper to simplify add emission. 1545 /// 1546 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1547 /// that fails, then try to materialize the immediate into a register and use 1548 /// emitAddSub_rr instead. 1549 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { 1550 unsigned ResultReg; 1551 if (Imm < 0) 1552 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm); 1553 else 1554 ResultReg = emitAddSub_ri(true, VT, Op0, Imm); 1555 1556 if (ResultReg) 1557 return ResultReg; 1558 1559 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1560 if (!CReg) 1561 return 0; 1562 1563 ResultReg = emitAddSub_rr(true, VT, Op0, CReg); 1564 return ResultReg; 1565 } 1566 1567 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1568 bool SetFlags, bool WantResult, bool IsZExt) { 1569 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1570 IsZExt); 1571 } 1572 1573 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1574 unsigned RHSReg, bool WantResult) { 1575 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, 1576 /*SetFlags=*/true, WantResult); 1577 } 1578 1579 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1580 unsigned RHSReg, 1581 AArch64_AM::ShiftExtendType ShiftType, 1582 uint64_t ShiftImm, bool WantResult) { 1583 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, 1584 ShiftImm, /*SetFlags=*/true, WantResult); 1585 } 1586 1587 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1588 const Value *LHS, const Value *RHS) { 1589 // Canonicalize immediates to the RHS first. 1590 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1591 std::swap(LHS, RHS); 1592 1593 // Canonicalize mul by power-of-2 to the RHS. 1594 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1595 if (isMulPowOf2(LHS)) 1596 std::swap(LHS, RHS); 1597 1598 // Canonicalize shift immediate to the RHS. 1599 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1600 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1601 if (isa<ConstantInt>(SI->getOperand(1))) 1602 std::swap(LHS, RHS); 1603 1604 Register LHSReg = getRegForValue(LHS); 1605 if (!LHSReg) 1606 return 0; 1607 1608 unsigned ResultReg = 0; 1609 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1610 uint64_t Imm = C->getZExtValue(); 1611 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); 1612 } 1613 if (ResultReg) 1614 return ResultReg; 1615 1616 // Check if the mul can be folded into the instruction. 1617 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1618 if (isMulPowOf2(RHS)) { 1619 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1620 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1621 1622 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1623 if (C->getValue().isPowerOf2()) 1624 std::swap(MulLHS, MulRHS); 1625 1626 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1627 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1628 1629 Register RHSReg = getRegForValue(MulLHS); 1630 if (!RHSReg) 1631 return 0; 1632 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1633 if (ResultReg) 1634 return ResultReg; 1635 } 1636 } 1637 1638 // Check if the shift can be folded into the instruction. 1639 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1640 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1641 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1642 uint64_t ShiftVal = C->getZExtValue(); 1643 Register RHSReg = getRegForValue(SI->getOperand(0)); 1644 if (!RHSReg) 1645 return 0; 1646 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1647 if (ResultReg) 1648 return ResultReg; 1649 } 1650 } 1651 1652 Register RHSReg = getRegForValue(RHS); 1653 if (!RHSReg) 1654 return 0; 1655 1656 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1657 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg); 1658 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1659 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1660 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1661 } 1662 return ResultReg; 1663 } 1664 1665 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1666 unsigned LHSReg, uint64_t Imm) { 1667 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1668 "ISD nodes are not consecutive!"); 1669 static const unsigned OpcTable[3][2] = { 1670 { AArch64::ANDWri, AArch64::ANDXri }, 1671 { AArch64::ORRWri, AArch64::ORRXri }, 1672 { AArch64::EORWri, AArch64::EORXri } 1673 }; 1674 const TargetRegisterClass *RC; 1675 unsigned Opc; 1676 unsigned RegSize; 1677 switch (RetVT.SimpleTy) { 1678 default: 1679 return 0; 1680 case MVT::i1: 1681 case MVT::i8: 1682 case MVT::i16: 1683 case MVT::i32: { 1684 unsigned Idx = ISDOpc - ISD::AND; 1685 Opc = OpcTable[Idx][0]; 1686 RC = &AArch64::GPR32spRegClass; 1687 RegSize = 32; 1688 break; 1689 } 1690 case MVT::i64: 1691 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1692 RC = &AArch64::GPR64spRegClass; 1693 RegSize = 64; 1694 break; 1695 } 1696 1697 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1698 return 0; 1699 1700 Register ResultReg = 1701 fastEmitInst_ri(Opc, RC, LHSReg, 1702 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1703 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1704 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1705 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1706 } 1707 return ResultReg; 1708 } 1709 1710 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1711 unsigned LHSReg, unsigned RHSReg, 1712 uint64_t ShiftImm) { 1713 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1714 "ISD nodes are not consecutive!"); 1715 static const unsigned OpcTable[3][2] = { 1716 { AArch64::ANDWrs, AArch64::ANDXrs }, 1717 { AArch64::ORRWrs, AArch64::ORRXrs }, 1718 { AArch64::EORWrs, AArch64::EORXrs } 1719 }; 1720 1721 // Don't deal with undefined shifts. 1722 if (ShiftImm >= RetVT.getSizeInBits()) 1723 return 0; 1724 1725 const TargetRegisterClass *RC; 1726 unsigned Opc; 1727 switch (RetVT.SimpleTy) { 1728 default: 1729 return 0; 1730 case MVT::i1: 1731 case MVT::i8: 1732 case MVT::i16: 1733 case MVT::i32: 1734 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1735 RC = &AArch64::GPR32RegClass; 1736 break; 1737 case MVT::i64: 1738 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1739 RC = &AArch64::GPR64RegClass; 1740 break; 1741 } 1742 Register ResultReg = 1743 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg, 1744 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1745 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1746 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1747 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1748 } 1749 return ResultReg; 1750 } 1751 1752 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, 1753 uint64_t Imm) { 1754 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm); 1755 } 1756 1757 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1758 bool WantZExt, MachineMemOperand *MMO) { 1759 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1760 return 0; 1761 1762 // Simplify this down to something we can handle. 1763 if (!simplifyAddress(Addr, VT)) 1764 return 0; 1765 1766 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1767 if (!ScaleFactor) 1768 llvm_unreachable("Unexpected value type."); 1769 1770 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1771 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1772 bool UseScaled = true; 1773 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1774 UseScaled = false; 1775 ScaleFactor = 1; 1776 } 1777 1778 static const unsigned GPOpcTable[2][8][4] = { 1779 // Sign-extend. 1780 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1781 AArch64::LDURXi }, 1782 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1783 AArch64::LDURXi }, 1784 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1785 AArch64::LDRXui }, 1786 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1787 AArch64::LDRXui }, 1788 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1789 AArch64::LDRXroX }, 1790 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1791 AArch64::LDRXroX }, 1792 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1793 AArch64::LDRXroW }, 1794 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1795 AArch64::LDRXroW } 1796 }, 1797 // Zero-extend. 1798 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1799 AArch64::LDURXi }, 1800 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1801 AArch64::LDURXi }, 1802 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1803 AArch64::LDRXui }, 1804 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1805 AArch64::LDRXui }, 1806 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1807 AArch64::LDRXroX }, 1808 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1809 AArch64::LDRXroX }, 1810 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1811 AArch64::LDRXroW }, 1812 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1813 AArch64::LDRXroW } 1814 } 1815 }; 1816 1817 static const unsigned FPOpcTable[4][2] = { 1818 { AArch64::LDURSi, AArch64::LDURDi }, 1819 { AArch64::LDRSui, AArch64::LDRDui }, 1820 { AArch64::LDRSroX, AArch64::LDRDroX }, 1821 { AArch64::LDRSroW, AArch64::LDRDroW } 1822 }; 1823 1824 unsigned Opc; 1825 const TargetRegisterClass *RC; 1826 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1827 Addr.getOffsetReg(); 1828 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1829 if (Addr.getExtendType() == AArch64_AM::UXTW || 1830 Addr.getExtendType() == AArch64_AM::SXTW) 1831 Idx++; 1832 1833 bool IsRet64Bit = RetVT == MVT::i64; 1834 switch (VT.SimpleTy) { 1835 default: 1836 llvm_unreachable("Unexpected value type."); 1837 case MVT::i1: // Intentional fall-through. 1838 case MVT::i8: 1839 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1840 RC = (IsRet64Bit && !WantZExt) ? 1841 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1842 break; 1843 case MVT::i16: 1844 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1845 RC = (IsRet64Bit && !WantZExt) ? 1846 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1847 break; 1848 case MVT::i32: 1849 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1850 RC = (IsRet64Bit && !WantZExt) ? 1851 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1852 break; 1853 case MVT::i64: 1854 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1855 RC = &AArch64::GPR64RegClass; 1856 break; 1857 case MVT::f32: 1858 Opc = FPOpcTable[Idx][0]; 1859 RC = &AArch64::FPR32RegClass; 1860 break; 1861 case MVT::f64: 1862 Opc = FPOpcTable[Idx][1]; 1863 RC = &AArch64::FPR64RegClass; 1864 break; 1865 } 1866 1867 // Create the base instruction, then add the operands. 1868 Register ResultReg = createResultReg(RC); 1869 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1870 TII.get(Opc), ResultReg); 1871 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1872 1873 // Loading an i1 requires special handling. 1874 if (VT == MVT::i1) { 1875 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); 1876 assert(ANDReg && "Unexpected AND instruction emission failure."); 1877 ResultReg = ANDReg; 1878 } 1879 1880 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1881 // the 32bit reg to a 64bit reg. 1882 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1883 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 1884 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1885 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1886 .addImm(0) 1887 .addReg(ResultReg, getKillRegState(true)) 1888 .addImm(AArch64::sub_32); 1889 ResultReg = Reg64; 1890 } 1891 return ResultReg; 1892 } 1893 1894 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1895 MVT VT; 1896 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1897 return false; 1898 1899 if (VT.isVector()) 1900 return selectOperator(I, I->getOpcode()); 1901 1902 unsigned ResultReg; 1903 switch (I->getOpcode()) { 1904 default: 1905 llvm_unreachable("Unexpected instruction."); 1906 case Instruction::Add: 1907 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1908 break; 1909 case Instruction::Sub: 1910 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1911 break; 1912 } 1913 if (!ResultReg) 1914 return false; 1915 1916 updateValueMap(I, ResultReg); 1917 return true; 1918 } 1919 1920 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1921 MVT VT; 1922 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1923 return false; 1924 1925 if (VT.isVector()) 1926 return selectOperator(I, I->getOpcode()); 1927 1928 unsigned ResultReg; 1929 switch (I->getOpcode()) { 1930 default: 1931 llvm_unreachable("Unexpected instruction."); 1932 case Instruction::And: 1933 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1934 break; 1935 case Instruction::Or: 1936 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1937 break; 1938 case Instruction::Xor: 1939 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1940 break; 1941 } 1942 if (!ResultReg) 1943 return false; 1944 1945 updateValueMap(I, ResultReg); 1946 return true; 1947 } 1948 1949 bool AArch64FastISel::selectLoad(const Instruction *I) { 1950 MVT VT; 1951 // Verify we have a legal type before going any further. Currently, we handle 1952 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1953 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1954 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1955 cast<LoadInst>(I)->isAtomic()) 1956 return false; 1957 1958 const Value *SV = I->getOperand(0); 1959 if (TLI.supportSwiftError()) { 1960 // Swifterror values can come from either a function parameter with 1961 // swifterror attribute or an alloca with swifterror attribute. 1962 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1963 if (Arg->hasSwiftErrorAttr()) 1964 return false; 1965 } 1966 1967 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1968 if (Alloca->isSwiftError()) 1969 return false; 1970 } 1971 } 1972 1973 // See if we can handle this address. 1974 Address Addr; 1975 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1976 return false; 1977 1978 // Fold the following sign-/zero-extend into the load instruction. 1979 bool WantZExt = true; 1980 MVT RetVT = VT; 1981 const Value *IntExtVal = nullptr; 1982 if (I->hasOneUse()) { 1983 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1984 if (isTypeSupported(ZE->getType(), RetVT)) 1985 IntExtVal = ZE; 1986 else 1987 RetVT = VT; 1988 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1989 if (isTypeSupported(SE->getType(), RetVT)) 1990 IntExtVal = SE; 1991 else 1992 RetVT = VT; 1993 WantZExt = false; 1994 } 1995 } 1996 1997 unsigned ResultReg = 1998 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1999 if (!ResultReg) 2000 return false; 2001 2002 // There are a few different cases we have to handle, because the load or the 2003 // sign-/zero-extend might not be selected by FastISel if we fall-back to 2004 // SelectionDAG. There is also an ordering issue when both instructions are in 2005 // different basic blocks. 2006 // 1.) The load instruction is selected by FastISel, but the integer extend 2007 // not. This usually happens when the integer extend is in a different 2008 // basic block and SelectionDAG took over for that basic block. 2009 // 2.) The load instruction is selected before the integer extend. This only 2010 // happens when the integer extend is in a different basic block. 2011 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2012 // by FastISel. This happens if there are instructions between the load 2013 // and the integer extend that couldn't be selected by FastISel. 2014 if (IntExtVal) { 2015 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2016 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2017 // it when it selects the integer extend. 2018 Register Reg = lookUpRegForValue(IntExtVal); 2019 auto *MI = MRI.getUniqueVRegDef(Reg); 2020 if (!MI) { 2021 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2022 if (WantZExt) { 2023 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2024 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2025 ResultReg = std::prev(I)->getOperand(0).getReg(); 2026 removeDeadCode(I, std::next(I)); 2027 } else 2028 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2029 AArch64::sub_32); 2030 } 2031 updateValueMap(I, ResultReg); 2032 return true; 2033 } 2034 2035 // The integer extend has already been emitted - delete all the instructions 2036 // that have been emitted by the integer extend lowering code and use the 2037 // result from the load instruction directly. 2038 while (MI) { 2039 Reg = 0; 2040 for (auto &Opnd : MI->uses()) { 2041 if (Opnd.isReg()) { 2042 Reg = Opnd.getReg(); 2043 break; 2044 } 2045 } 2046 MachineBasicBlock::iterator I(MI); 2047 removeDeadCode(I, std::next(I)); 2048 MI = nullptr; 2049 if (Reg) 2050 MI = MRI.getUniqueVRegDef(Reg); 2051 } 2052 updateValueMap(IntExtVal, ResultReg); 2053 return true; 2054 } 2055 2056 updateValueMap(I, ResultReg); 2057 return true; 2058 } 2059 2060 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2061 unsigned AddrReg, 2062 MachineMemOperand *MMO) { 2063 unsigned Opc; 2064 switch (VT.SimpleTy) { 2065 default: return false; 2066 case MVT::i8: Opc = AArch64::STLRB; break; 2067 case MVT::i16: Opc = AArch64::STLRH; break; 2068 case MVT::i32: Opc = AArch64::STLRW; break; 2069 case MVT::i64: Opc = AArch64::STLRX; break; 2070 } 2071 2072 const MCInstrDesc &II = TII.get(Opc); 2073 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2074 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2075 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2076 .addReg(SrcReg) 2077 .addReg(AddrReg) 2078 .addMemOperand(MMO); 2079 return true; 2080 } 2081 2082 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2083 MachineMemOperand *MMO) { 2084 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2085 return false; 2086 2087 // Simplify this down to something we can handle. 2088 if (!simplifyAddress(Addr, VT)) 2089 return false; 2090 2091 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2092 if (!ScaleFactor) 2093 llvm_unreachable("Unexpected value type."); 2094 2095 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2096 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2097 bool UseScaled = true; 2098 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2099 UseScaled = false; 2100 ScaleFactor = 1; 2101 } 2102 2103 static const unsigned OpcTable[4][6] = { 2104 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2105 AArch64::STURSi, AArch64::STURDi }, 2106 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2107 AArch64::STRSui, AArch64::STRDui }, 2108 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2109 AArch64::STRSroX, AArch64::STRDroX }, 2110 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2111 AArch64::STRSroW, AArch64::STRDroW } 2112 }; 2113 2114 unsigned Opc; 2115 bool VTIsi1 = false; 2116 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2117 Addr.getOffsetReg(); 2118 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2119 if (Addr.getExtendType() == AArch64_AM::UXTW || 2120 Addr.getExtendType() == AArch64_AM::SXTW) 2121 Idx++; 2122 2123 switch (VT.SimpleTy) { 2124 default: llvm_unreachable("Unexpected value type."); 2125 case MVT::i1: VTIsi1 = true; [[fallthrough]]; 2126 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2127 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2128 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2129 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2130 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2131 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2132 } 2133 2134 // Storing an i1 requires special handling. 2135 if (VTIsi1 && SrcReg != AArch64::WZR) { 2136 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); 2137 assert(ANDReg && "Unexpected AND instruction emission failure."); 2138 SrcReg = ANDReg; 2139 } 2140 // Create the base instruction, then add the operands. 2141 const MCInstrDesc &II = TII.get(Opc); 2142 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2143 MachineInstrBuilder MIB = 2144 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg); 2145 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2146 2147 return true; 2148 } 2149 2150 bool AArch64FastISel::selectStore(const Instruction *I) { 2151 MVT VT; 2152 const Value *Op0 = I->getOperand(0); 2153 // Verify we have a legal type before going any further. Currently, we handle 2154 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2155 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2156 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2157 return false; 2158 2159 const Value *PtrV = I->getOperand(1); 2160 if (TLI.supportSwiftError()) { 2161 // Swifterror values can come from either a function parameter with 2162 // swifterror attribute or an alloca with swifterror attribute. 2163 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2164 if (Arg->hasSwiftErrorAttr()) 2165 return false; 2166 } 2167 2168 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2169 if (Alloca->isSwiftError()) 2170 return false; 2171 } 2172 } 2173 2174 // Get the value to be stored into a register. Use the zero register directly 2175 // when possible to avoid an unnecessary copy and a wasted register. 2176 unsigned SrcReg = 0; 2177 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2178 if (CI->isZero()) 2179 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2180 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2181 if (CF->isZero() && !CF->isNegative()) { 2182 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2183 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2184 } 2185 } 2186 2187 if (!SrcReg) 2188 SrcReg = getRegForValue(Op0); 2189 2190 if (!SrcReg) 2191 return false; 2192 2193 auto *SI = cast<StoreInst>(I); 2194 2195 // Try to emit a STLR for seq_cst/release. 2196 if (SI->isAtomic()) { 2197 AtomicOrdering Ord = SI->getOrdering(); 2198 // The non-atomic instructions are sufficient for relaxed stores. 2199 if (isReleaseOrStronger(Ord)) { 2200 // The STLR addressing mode only supports a base reg; pass that directly. 2201 Register AddrReg = getRegForValue(PtrV); 2202 return emitStoreRelease(VT, SrcReg, AddrReg, 2203 createMachineMemOperandFor(I)); 2204 } 2205 } 2206 2207 // See if we can handle this address. 2208 Address Addr; 2209 if (!computeAddress(PtrV, Addr, Op0->getType())) 2210 return false; 2211 2212 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2213 return false; 2214 return true; 2215 } 2216 2217 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2218 switch (Pred) { 2219 case CmpInst::FCMP_ONE: 2220 case CmpInst::FCMP_UEQ: 2221 default: 2222 // AL is our "false" for now. The other two need more compares. 2223 return AArch64CC::AL; 2224 case CmpInst::ICMP_EQ: 2225 case CmpInst::FCMP_OEQ: 2226 return AArch64CC::EQ; 2227 case CmpInst::ICMP_SGT: 2228 case CmpInst::FCMP_OGT: 2229 return AArch64CC::GT; 2230 case CmpInst::ICMP_SGE: 2231 case CmpInst::FCMP_OGE: 2232 return AArch64CC::GE; 2233 case CmpInst::ICMP_UGT: 2234 case CmpInst::FCMP_UGT: 2235 return AArch64CC::HI; 2236 case CmpInst::FCMP_OLT: 2237 return AArch64CC::MI; 2238 case CmpInst::ICMP_ULE: 2239 case CmpInst::FCMP_OLE: 2240 return AArch64CC::LS; 2241 case CmpInst::FCMP_ORD: 2242 return AArch64CC::VC; 2243 case CmpInst::FCMP_UNO: 2244 return AArch64CC::VS; 2245 case CmpInst::FCMP_UGE: 2246 return AArch64CC::PL; 2247 case CmpInst::ICMP_SLT: 2248 case CmpInst::FCMP_ULT: 2249 return AArch64CC::LT; 2250 case CmpInst::ICMP_SLE: 2251 case CmpInst::FCMP_ULE: 2252 return AArch64CC::LE; 2253 case CmpInst::FCMP_UNE: 2254 case CmpInst::ICMP_NE: 2255 return AArch64CC::NE; 2256 case CmpInst::ICMP_UGE: 2257 return AArch64CC::HS; 2258 case CmpInst::ICMP_ULT: 2259 return AArch64CC::LO; 2260 } 2261 } 2262 2263 /// Try to emit a combined compare-and-branch instruction. 2264 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2265 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2266 // will not be produced, as they are conditional branch instructions that do 2267 // not set flags. 2268 if (FuncInfo.MF->getFunction().hasFnAttribute( 2269 Attribute::SpeculativeLoadHardening)) 2270 return false; 2271 2272 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2273 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2274 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2275 2276 const Value *LHS = CI->getOperand(0); 2277 const Value *RHS = CI->getOperand(1); 2278 2279 MVT VT; 2280 if (!isTypeSupported(LHS->getType(), VT)) 2281 return false; 2282 2283 unsigned BW = VT.getSizeInBits(); 2284 if (BW > 64) 2285 return false; 2286 2287 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2288 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2289 2290 // Try to take advantage of fallthrough opportunities. 2291 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2292 std::swap(TBB, FBB); 2293 Predicate = CmpInst::getInversePredicate(Predicate); 2294 } 2295 2296 int TestBit = -1; 2297 bool IsCmpNE; 2298 switch (Predicate) { 2299 default: 2300 return false; 2301 case CmpInst::ICMP_EQ: 2302 case CmpInst::ICMP_NE: 2303 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2304 std::swap(LHS, RHS); 2305 2306 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2307 return false; 2308 2309 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2310 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2311 const Value *AndLHS = AI->getOperand(0); 2312 const Value *AndRHS = AI->getOperand(1); 2313 2314 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2315 if (C->getValue().isPowerOf2()) 2316 std::swap(AndLHS, AndRHS); 2317 2318 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2319 if (C->getValue().isPowerOf2()) { 2320 TestBit = C->getValue().logBase2(); 2321 LHS = AndLHS; 2322 } 2323 } 2324 2325 if (VT == MVT::i1) 2326 TestBit = 0; 2327 2328 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2329 break; 2330 case CmpInst::ICMP_SLT: 2331 case CmpInst::ICMP_SGE: 2332 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2333 return false; 2334 2335 TestBit = BW - 1; 2336 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2337 break; 2338 case CmpInst::ICMP_SGT: 2339 case CmpInst::ICMP_SLE: 2340 if (!isa<ConstantInt>(RHS)) 2341 return false; 2342 2343 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2344 return false; 2345 2346 TestBit = BW - 1; 2347 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2348 break; 2349 } // end switch 2350 2351 static const unsigned OpcTable[2][2][2] = { 2352 { {AArch64::CBZW, AArch64::CBZX }, 2353 {AArch64::CBNZW, AArch64::CBNZX} }, 2354 { {AArch64::TBZW, AArch64::TBZX }, 2355 {AArch64::TBNZW, AArch64::TBNZX} } 2356 }; 2357 2358 bool IsBitTest = TestBit != -1; 2359 bool Is64Bit = BW == 64; 2360 if (TestBit < 32 && TestBit >= 0) 2361 Is64Bit = false; 2362 2363 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2364 const MCInstrDesc &II = TII.get(Opc); 2365 2366 Register SrcReg = getRegForValue(LHS); 2367 if (!SrcReg) 2368 return false; 2369 2370 if (BW == 64 && !Is64Bit) 2371 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); 2372 2373 if ((BW < 32) && !IsBitTest) 2374 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2375 2376 // Emit the combined compare and branch instruction. 2377 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2378 MachineInstrBuilder MIB = 2379 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 2380 .addReg(SrcReg); 2381 if (IsBitTest) 2382 MIB.addImm(TestBit); 2383 MIB.addMBB(TBB); 2384 2385 finishCondBranch(BI->getParent(), TBB, FBB); 2386 return true; 2387 } 2388 2389 bool AArch64FastISel::selectBranch(const Instruction *I) { 2390 const BranchInst *BI = cast<BranchInst>(I); 2391 if (BI->isUnconditional()) { 2392 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2393 fastEmitBranch(MSucc, BI->getDebugLoc()); 2394 return true; 2395 } 2396 2397 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2398 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2399 2400 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2401 if (CI->hasOneUse() && isValueAvailable(CI)) { 2402 // Try to optimize or fold the cmp. 2403 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2404 switch (Predicate) { 2405 default: 2406 break; 2407 case CmpInst::FCMP_FALSE: 2408 fastEmitBranch(FBB, MIMD.getDL()); 2409 return true; 2410 case CmpInst::FCMP_TRUE: 2411 fastEmitBranch(TBB, MIMD.getDL()); 2412 return true; 2413 } 2414 2415 // Try to emit a combined compare-and-branch first. 2416 if (emitCompareAndBranch(BI)) 2417 return true; 2418 2419 // Try to take advantage of fallthrough opportunities. 2420 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2421 std::swap(TBB, FBB); 2422 Predicate = CmpInst::getInversePredicate(Predicate); 2423 } 2424 2425 // Emit the cmp. 2426 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2427 return false; 2428 2429 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2430 // instruction. 2431 AArch64CC::CondCode CC = getCompareCC(Predicate); 2432 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2433 switch (Predicate) { 2434 default: 2435 break; 2436 case CmpInst::FCMP_UEQ: 2437 ExtraCC = AArch64CC::EQ; 2438 CC = AArch64CC::VS; 2439 break; 2440 case CmpInst::FCMP_ONE: 2441 ExtraCC = AArch64CC::MI; 2442 CC = AArch64CC::GT; 2443 break; 2444 } 2445 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2446 2447 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2448 if (ExtraCC != AArch64CC::AL) { 2449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2450 .addImm(ExtraCC) 2451 .addMBB(TBB); 2452 } 2453 2454 // Emit the branch. 2455 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2456 .addImm(CC) 2457 .addMBB(TBB); 2458 2459 finishCondBranch(BI->getParent(), TBB, FBB); 2460 return true; 2461 } 2462 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2463 uint64_t Imm = CI->getZExtValue(); 2464 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2465 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B)) 2466 .addMBB(Target); 2467 2468 // Obtain the branch probability and add the target to the successor list. 2469 if (FuncInfo.BPI) { 2470 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2471 BI->getParent(), Target->getBasicBlock()); 2472 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2473 } else 2474 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2475 return true; 2476 } else { 2477 AArch64CC::CondCode CC = AArch64CC::NE; 2478 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2479 // Fake request the condition, otherwise the intrinsic might be completely 2480 // optimized away. 2481 Register CondReg = getRegForValue(BI->getCondition()); 2482 if (!CondReg) 2483 return false; 2484 2485 // Emit the branch. 2486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2487 .addImm(CC) 2488 .addMBB(TBB); 2489 2490 finishCondBranch(BI->getParent(), TBB, FBB); 2491 return true; 2492 } 2493 } 2494 2495 Register CondReg = getRegForValue(BI->getCondition()); 2496 if (CondReg == 0) 2497 return false; 2498 2499 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2500 unsigned Opcode = AArch64::TBNZW; 2501 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2502 std::swap(TBB, FBB); 2503 Opcode = AArch64::TBZW; 2504 } 2505 2506 const MCInstrDesc &II = TII.get(Opcode); 2507 Register ConstrainedCondReg 2508 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2509 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2510 .addReg(ConstrainedCondReg) 2511 .addImm(0) 2512 .addMBB(TBB); 2513 2514 finishCondBranch(BI->getParent(), TBB, FBB); 2515 return true; 2516 } 2517 2518 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2519 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2520 Register AddrReg = getRegForValue(BI->getOperand(0)); 2521 if (AddrReg == 0) 2522 return false; 2523 2524 // Emit the indirect branch. 2525 const MCInstrDesc &II = TII.get(AArch64::BR); 2526 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg); 2528 2529 // Make sure the CFG is up-to-date. 2530 for (const auto *Succ : BI->successors()) 2531 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2532 2533 return true; 2534 } 2535 2536 bool AArch64FastISel::selectCmp(const Instruction *I) { 2537 const CmpInst *CI = cast<CmpInst>(I); 2538 2539 // Vectors of i1 are weird: bail out. 2540 if (CI->getType()->isVectorTy()) 2541 return false; 2542 2543 // Try to optimize or fold the cmp. 2544 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2545 unsigned ResultReg = 0; 2546 switch (Predicate) { 2547 default: 2548 break; 2549 case CmpInst::FCMP_FALSE: 2550 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2551 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 2552 TII.get(TargetOpcode::COPY), ResultReg) 2553 .addReg(AArch64::WZR, getKillRegState(true)); 2554 break; 2555 case CmpInst::FCMP_TRUE: 2556 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2557 break; 2558 } 2559 2560 if (ResultReg) { 2561 updateValueMap(I, ResultReg); 2562 return true; 2563 } 2564 2565 // Emit the cmp. 2566 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2567 return false; 2568 2569 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2570 2571 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2572 // condition codes are inverted, because they are used by CSINC. 2573 static unsigned CondCodeTable[2][2] = { 2574 { AArch64CC::NE, AArch64CC::VC }, 2575 { AArch64CC::PL, AArch64CC::LE } 2576 }; 2577 unsigned *CondCodes = nullptr; 2578 switch (Predicate) { 2579 default: 2580 break; 2581 case CmpInst::FCMP_UEQ: 2582 CondCodes = &CondCodeTable[0][0]; 2583 break; 2584 case CmpInst::FCMP_ONE: 2585 CondCodes = &CondCodeTable[1][0]; 2586 break; 2587 } 2588 2589 if (CondCodes) { 2590 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2591 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2592 TmpReg1) 2593 .addReg(AArch64::WZR, getKillRegState(true)) 2594 .addReg(AArch64::WZR, getKillRegState(true)) 2595 .addImm(CondCodes[0]); 2596 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2597 ResultReg) 2598 .addReg(TmpReg1, getKillRegState(true)) 2599 .addReg(AArch64::WZR, getKillRegState(true)) 2600 .addImm(CondCodes[1]); 2601 2602 updateValueMap(I, ResultReg); 2603 return true; 2604 } 2605 2606 // Now set a register based on the comparison. 2607 AArch64CC::CondCode CC = getCompareCC(Predicate); 2608 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2609 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2610 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2611 ResultReg) 2612 .addReg(AArch64::WZR, getKillRegState(true)) 2613 .addReg(AArch64::WZR, getKillRegState(true)) 2614 .addImm(invertedCC); 2615 2616 updateValueMap(I, ResultReg); 2617 return true; 2618 } 2619 2620 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2621 /// value. 2622 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2623 if (!SI->getType()->isIntegerTy(1)) 2624 return false; 2625 2626 const Value *Src1Val, *Src2Val; 2627 unsigned Opc = 0; 2628 bool NeedExtraOp = false; 2629 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2630 if (CI->isOne()) { 2631 Src1Val = SI->getCondition(); 2632 Src2Val = SI->getFalseValue(); 2633 Opc = AArch64::ORRWrr; 2634 } else { 2635 assert(CI->isZero()); 2636 Src1Val = SI->getFalseValue(); 2637 Src2Val = SI->getCondition(); 2638 Opc = AArch64::BICWrr; 2639 } 2640 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2641 if (CI->isOne()) { 2642 Src1Val = SI->getCondition(); 2643 Src2Val = SI->getTrueValue(); 2644 Opc = AArch64::ORRWrr; 2645 NeedExtraOp = true; 2646 } else { 2647 assert(CI->isZero()); 2648 Src1Val = SI->getCondition(); 2649 Src2Val = SI->getTrueValue(); 2650 Opc = AArch64::ANDWrr; 2651 } 2652 } 2653 2654 if (!Opc) 2655 return false; 2656 2657 Register Src1Reg = getRegForValue(Src1Val); 2658 if (!Src1Reg) 2659 return false; 2660 2661 Register Src2Reg = getRegForValue(Src2Val); 2662 if (!Src2Reg) 2663 return false; 2664 2665 if (NeedExtraOp) 2666 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1); 2667 2668 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2669 Src2Reg); 2670 updateValueMap(SI, ResultReg); 2671 return true; 2672 } 2673 2674 bool AArch64FastISel::selectSelect(const Instruction *I) { 2675 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2676 MVT VT; 2677 if (!isTypeSupported(I->getType(), VT)) 2678 return false; 2679 2680 unsigned Opc; 2681 const TargetRegisterClass *RC; 2682 switch (VT.SimpleTy) { 2683 default: 2684 return false; 2685 case MVT::i1: 2686 case MVT::i8: 2687 case MVT::i16: 2688 case MVT::i32: 2689 Opc = AArch64::CSELWr; 2690 RC = &AArch64::GPR32RegClass; 2691 break; 2692 case MVT::i64: 2693 Opc = AArch64::CSELXr; 2694 RC = &AArch64::GPR64RegClass; 2695 break; 2696 case MVT::f32: 2697 Opc = AArch64::FCSELSrrr; 2698 RC = &AArch64::FPR32RegClass; 2699 break; 2700 case MVT::f64: 2701 Opc = AArch64::FCSELDrrr; 2702 RC = &AArch64::FPR64RegClass; 2703 break; 2704 } 2705 2706 const SelectInst *SI = cast<SelectInst>(I); 2707 const Value *Cond = SI->getCondition(); 2708 AArch64CC::CondCode CC = AArch64CC::NE; 2709 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2710 2711 if (optimizeSelect(SI)) 2712 return true; 2713 2714 // Try to pickup the flags, so we don't have to emit another compare. 2715 if (foldXALUIntrinsic(CC, I, Cond)) { 2716 // Fake request the condition to force emission of the XALU intrinsic. 2717 Register CondReg = getRegForValue(Cond); 2718 if (!CondReg) 2719 return false; 2720 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2721 isValueAvailable(Cond)) { 2722 const auto *Cmp = cast<CmpInst>(Cond); 2723 // Try to optimize or fold the cmp. 2724 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2725 const Value *FoldSelect = nullptr; 2726 switch (Predicate) { 2727 default: 2728 break; 2729 case CmpInst::FCMP_FALSE: 2730 FoldSelect = SI->getFalseValue(); 2731 break; 2732 case CmpInst::FCMP_TRUE: 2733 FoldSelect = SI->getTrueValue(); 2734 break; 2735 } 2736 2737 if (FoldSelect) { 2738 Register SrcReg = getRegForValue(FoldSelect); 2739 if (!SrcReg) 2740 return false; 2741 2742 updateValueMap(I, SrcReg); 2743 return true; 2744 } 2745 2746 // Emit the cmp. 2747 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2748 return false; 2749 2750 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2751 CC = getCompareCC(Predicate); 2752 switch (Predicate) { 2753 default: 2754 break; 2755 case CmpInst::FCMP_UEQ: 2756 ExtraCC = AArch64CC::EQ; 2757 CC = AArch64CC::VS; 2758 break; 2759 case CmpInst::FCMP_ONE: 2760 ExtraCC = AArch64CC::MI; 2761 CC = AArch64CC::GT; 2762 break; 2763 } 2764 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2765 } else { 2766 Register CondReg = getRegForValue(Cond); 2767 if (!CondReg) 2768 return false; 2769 2770 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2771 CondReg = constrainOperandRegClass(II, CondReg, 1); 2772 2773 // Emit a TST instruction (ANDS wzr, reg, #imm). 2774 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, 2775 AArch64::WZR) 2776 .addReg(CondReg) 2777 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2778 } 2779 2780 Register Src1Reg = getRegForValue(SI->getTrueValue()); 2781 Register Src2Reg = getRegForValue(SI->getFalseValue()); 2782 2783 if (!Src1Reg || !Src2Reg) 2784 return false; 2785 2786 if (ExtraCC != AArch64CC::AL) 2787 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC); 2788 2789 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC); 2790 updateValueMap(I, ResultReg); 2791 return true; 2792 } 2793 2794 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2795 Value *V = I->getOperand(0); 2796 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2797 return false; 2798 2799 Register Op = getRegForValue(V); 2800 if (Op == 0) 2801 return false; 2802 2803 Register ResultReg = createResultReg(&AArch64::FPR64RegClass); 2804 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr), 2805 ResultReg).addReg(Op); 2806 updateValueMap(I, ResultReg); 2807 return true; 2808 } 2809 2810 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2811 Value *V = I->getOperand(0); 2812 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2813 return false; 2814 2815 Register Op = getRegForValue(V); 2816 if (Op == 0) 2817 return false; 2818 2819 Register ResultReg = createResultReg(&AArch64::FPR32RegClass); 2820 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr), 2821 ResultReg).addReg(Op); 2822 updateValueMap(I, ResultReg); 2823 return true; 2824 } 2825 2826 // FPToUI and FPToSI 2827 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2828 MVT DestVT; 2829 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2830 return false; 2831 2832 Register SrcReg = getRegForValue(I->getOperand(0)); 2833 if (SrcReg == 0) 2834 return false; 2835 2836 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2837 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2838 return false; 2839 2840 unsigned Opc; 2841 if (SrcVT == MVT::f64) { 2842 if (Signed) 2843 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2844 else 2845 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2846 } else { 2847 if (Signed) 2848 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2849 else 2850 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2851 } 2852 Register ResultReg = createResultReg( 2853 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2854 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 2855 .addReg(SrcReg); 2856 updateValueMap(I, ResultReg); 2857 return true; 2858 } 2859 2860 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2861 MVT DestVT; 2862 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2863 return false; 2864 // Let regular ISEL handle FP16 2865 if (DestVT == MVT::f16) 2866 return false; 2867 2868 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2869 "Unexpected value type."); 2870 2871 Register SrcReg = getRegForValue(I->getOperand(0)); 2872 if (!SrcReg) 2873 return false; 2874 2875 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2876 2877 // Handle sign-extension. 2878 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2879 SrcReg = 2880 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2881 if (!SrcReg) 2882 return false; 2883 } 2884 2885 unsigned Opc; 2886 if (SrcVT == MVT::i64) { 2887 if (Signed) 2888 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2889 else 2890 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2891 } else { 2892 if (Signed) 2893 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2894 else 2895 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2896 } 2897 2898 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg); 2899 updateValueMap(I, ResultReg); 2900 return true; 2901 } 2902 2903 bool AArch64FastISel::fastLowerArguments() { 2904 if (!FuncInfo.CanLowerReturn) 2905 return false; 2906 2907 const Function *F = FuncInfo.Fn; 2908 if (F->isVarArg()) 2909 return false; 2910 2911 CallingConv::ID CC = F->getCallingConv(); 2912 if (CC != CallingConv::C && CC != CallingConv::Swift) 2913 return false; 2914 2915 if (Subtarget->hasCustomCallingConv()) 2916 return false; 2917 2918 // Only handle simple cases of up to 8 GPR and FPR each. 2919 unsigned GPRCnt = 0; 2920 unsigned FPRCnt = 0; 2921 for (auto const &Arg : F->args()) { 2922 if (Arg.hasAttribute(Attribute::ByVal) || 2923 Arg.hasAttribute(Attribute::InReg) || 2924 Arg.hasAttribute(Attribute::StructRet) || 2925 Arg.hasAttribute(Attribute::SwiftSelf) || 2926 Arg.hasAttribute(Attribute::SwiftAsync) || 2927 Arg.hasAttribute(Attribute::SwiftError) || 2928 Arg.hasAttribute(Attribute::Nest)) 2929 return false; 2930 2931 Type *ArgTy = Arg.getType(); 2932 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2933 return false; 2934 2935 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2936 if (!ArgVT.isSimple()) 2937 return false; 2938 2939 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2940 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2941 return false; 2942 2943 if (VT.isVector() && 2944 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2945 return false; 2946 2947 if (VT >= MVT::i1 && VT <= MVT::i64) 2948 ++GPRCnt; 2949 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2950 VT.is128BitVector()) 2951 ++FPRCnt; 2952 else 2953 return false; 2954 2955 if (GPRCnt > 8 || FPRCnt > 8) 2956 return false; 2957 } 2958 2959 static const MCPhysReg Registers[6][8] = { 2960 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2961 AArch64::W5, AArch64::W6, AArch64::W7 }, 2962 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2963 AArch64::X5, AArch64::X6, AArch64::X7 }, 2964 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2965 AArch64::H5, AArch64::H6, AArch64::H7 }, 2966 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2967 AArch64::S5, AArch64::S6, AArch64::S7 }, 2968 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2969 AArch64::D5, AArch64::D6, AArch64::D7 }, 2970 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2971 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2972 }; 2973 2974 unsigned GPRIdx = 0; 2975 unsigned FPRIdx = 0; 2976 for (auto const &Arg : F->args()) { 2977 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2978 unsigned SrcReg; 2979 const TargetRegisterClass *RC; 2980 if (VT >= MVT::i1 && VT <= MVT::i32) { 2981 SrcReg = Registers[0][GPRIdx++]; 2982 RC = &AArch64::GPR32RegClass; 2983 VT = MVT::i32; 2984 } else if (VT == MVT::i64) { 2985 SrcReg = Registers[1][GPRIdx++]; 2986 RC = &AArch64::GPR64RegClass; 2987 } else if (VT == MVT::f16) { 2988 SrcReg = Registers[2][FPRIdx++]; 2989 RC = &AArch64::FPR16RegClass; 2990 } else if (VT == MVT::f32) { 2991 SrcReg = Registers[3][FPRIdx++]; 2992 RC = &AArch64::FPR32RegClass; 2993 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2994 SrcReg = Registers[4][FPRIdx++]; 2995 RC = &AArch64::FPR64RegClass; 2996 } else if (VT.is128BitVector()) { 2997 SrcReg = Registers[5][FPRIdx++]; 2998 RC = &AArch64::FPR128RegClass; 2999 } else 3000 llvm_unreachable("Unexpected value type."); 3001 3002 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 3003 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 3004 // Without this, EmitLiveInCopies may eliminate the livein if its only 3005 // use is a bitcast (which isn't turned into an instruction). 3006 Register ResultReg = createResultReg(RC); 3007 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3008 TII.get(TargetOpcode::COPY), ResultReg) 3009 .addReg(DstReg, getKillRegState(true)); 3010 updateValueMap(&Arg, ResultReg); 3011 } 3012 return true; 3013 } 3014 3015 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3016 SmallVectorImpl<MVT> &OutVTs, 3017 unsigned &NumBytes) { 3018 CallingConv::ID CC = CLI.CallConv; 3019 SmallVector<CCValAssign, 16> ArgLocs; 3020 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3021 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3022 3023 // Get a count of how many bytes are to be pushed on the stack. 3024 NumBytes = CCInfo.getNextStackOffset(); 3025 3026 // Issue CALLSEQ_START 3027 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3028 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown)) 3029 .addImm(NumBytes).addImm(0); 3030 3031 // Process the args. 3032 for (CCValAssign &VA : ArgLocs) { 3033 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3034 MVT ArgVT = OutVTs[VA.getValNo()]; 3035 3036 Register ArgReg = getRegForValue(ArgVal); 3037 if (!ArgReg) 3038 return false; 3039 3040 // Handle arg promotion: SExt, ZExt, AExt. 3041 switch (VA.getLocInfo()) { 3042 case CCValAssign::Full: 3043 break; 3044 case CCValAssign::SExt: { 3045 MVT DestVT = VA.getLocVT(); 3046 MVT SrcVT = ArgVT; 3047 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3048 if (!ArgReg) 3049 return false; 3050 break; 3051 } 3052 case CCValAssign::AExt: 3053 // Intentional fall-through. 3054 case CCValAssign::ZExt: { 3055 MVT DestVT = VA.getLocVT(); 3056 MVT SrcVT = ArgVT; 3057 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3058 if (!ArgReg) 3059 return false; 3060 break; 3061 } 3062 default: 3063 llvm_unreachable("Unknown arg promotion!"); 3064 } 3065 3066 // Now copy/store arg to correct locations. 3067 if (VA.isRegLoc() && !VA.needsCustom()) { 3068 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3069 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3070 CLI.OutRegs.push_back(VA.getLocReg()); 3071 } else if (VA.needsCustom()) { 3072 // FIXME: Handle custom args. 3073 return false; 3074 } else { 3075 assert(VA.isMemLoc() && "Assuming store on stack."); 3076 3077 // Don't emit stores for undef values. 3078 if (isa<UndefValue>(ArgVal)) 3079 continue; 3080 3081 // Need to store on the stack. 3082 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3083 3084 unsigned BEAlign = 0; 3085 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3086 BEAlign = 8 - ArgSize; 3087 3088 Address Addr; 3089 Addr.setKind(Address::RegBase); 3090 Addr.setReg(AArch64::SP); 3091 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3092 3093 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3094 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3095 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3096 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3097 3098 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3099 return false; 3100 } 3101 } 3102 return true; 3103 } 3104 3105 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 3106 unsigned NumBytes) { 3107 CallingConv::ID CC = CLI.CallConv; 3108 3109 // Issue CALLSEQ_END 3110 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3111 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp)) 3112 .addImm(NumBytes).addImm(0); 3113 3114 // Now the return value. 3115 if (RetVT != MVT::isVoid) { 3116 SmallVector<CCValAssign, 16> RVLocs; 3117 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3118 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3119 3120 // Only handle a single return value. 3121 if (RVLocs.size() != 1) 3122 return false; 3123 3124 // Copy all of the result registers out of their specified physreg. 3125 MVT CopyVT = RVLocs[0].getValVT(); 3126 3127 // TODO: Handle big-endian results 3128 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3129 return false; 3130 3131 Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3132 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3133 TII.get(TargetOpcode::COPY), ResultReg) 3134 .addReg(RVLocs[0].getLocReg()); 3135 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3136 3137 CLI.ResultReg = ResultReg; 3138 CLI.NumResultRegs = 1; 3139 } 3140 3141 return true; 3142 } 3143 3144 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3145 CallingConv::ID CC = CLI.CallConv; 3146 bool IsTailCall = CLI.IsTailCall; 3147 bool IsVarArg = CLI.IsVarArg; 3148 const Value *Callee = CLI.Callee; 3149 MCSymbol *Symbol = CLI.Symbol; 3150 3151 if (!Callee && !Symbol) 3152 return false; 3153 3154 // Allow SelectionDAG isel to handle calls to functions like setjmp that need 3155 // a bti instruction following the call. 3156 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && 3157 !Subtarget->noBTIAtReturnTwice() && 3158 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 3159 return false; 3160 3161 // Allow SelectionDAG isel to handle indirect calls with KCFI checks. 3162 if (CLI.CB && CLI.CB->isIndirectCall() && 3163 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi)) 3164 return false; 3165 3166 // Allow SelectionDAG isel to handle tail calls. 3167 if (IsTailCall) 3168 return false; 3169 3170 // FIXME: we could and should support this, but for now correctness at -O0 is 3171 // more important. 3172 if (Subtarget->isTargetILP32()) 3173 return false; 3174 3175 CodeModel::Model CM = TM.getCodeModel(); 3176 // Only support the small-addressing and large code models. 3177 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3178 return false; 3179 3180 // FIXME: Add large code model support for ELF. 3181 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3182 return false; 3183 3184 // Let SDISel handle vararg functions. 3185 if (IsVarArg) 3186 return false; 3187 3188 // FIXME: Only handle *simple* calls for now. 3189 MVT RetVT; 3190 if (CLI.RetTy->isVoidTy()) 3191 RetVT = MVT::isVoid; 3192 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3193 return false; 3194 3195 for (auto Flag : CLI.OutFlags) 3196 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3197 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) 3198 return false; 3199 3200 // Set up the argument vectors. 3201 SmallVector<MVT, 16> OutVTs; 3202 OutVTs.reserve(CLI.OutVals.size()); 3203 3204 for (auto *Val : CLI.OutVals) { 3205 MVT VT; 3206 if (!isTypeLegal(Val->getType(), VT) && 3207 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3208 return false; 3209 3210 // We don't handle vector parameters yet. 3211 if (VT.isVector() || VT.getSizeInBits() > 64) 3212 return false; 3213 3214 OutVTs.push_back(VT); 3215 } 3216 3217 Address Addr; 3218 if (Callee && !computeCallAddress(Callee, Addr)) 3219 return false; 3220 3221 // The weak function target may be zero; in that case we must use indirect 3222 // addressing via a stub on windows as it may be out of range for a 3223 // PC-relative jump. 3224 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3225 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3226 return false; 3227 3228 // Handle the arguments now that we've gotten them. 3229 unsigned NumBytes; 3230 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3231 return false; 3232 3233 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3234 if (RegInfo->isAnyArgRegReserved(*MF)) 3235 RegInfo->emitReservedArgRegCallError(*MF); 3236 3237 // Issue the call. 3238 MachineInstrBuilder MIB; 3239 if (Subtarget->useSmallAddressing()) { 3240 const MCInstrDesc &II = 3241 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3242 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II); 3243 if (Symbol) 3244 MIB.addSym(Symbol, 0); 3245 else if (Addr.getGlobalValue()) 3246 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3247 else if (Addr.getReg()) { 3248 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3249 MIB.addReg(Reg); 3250 } else 3251 return false; 3252 } else { 3253 unsigned CallReg = 0; 3254 if (Symbol) { 3255 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3256 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 3257 ADRPReg) 3258 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3259 3260 CallReg = createResultReg(&AArch64::GPR64RegClass); 3261 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3262 TII.get(AArch64::LDRXui), CallReg) 3263 .addReg(ADRPReg) 3264 .addSym(Symbol, 3265 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3266 } else if (Addr.getGlobalValue()) 3267 CallReg = materializeGV(Addr.getGlobalValue()); 3268 else if (Addr.getReg()) 3269 CallReg = Addr.getReg(); 3270 3271 if (!CallReg) 3272 return false; 3273 3274 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3275 CallReg = constrainOperandRegClass(II, CallReg, 0); 3276 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg); 3277 } 3278 3279 // Add implicit physical register uses to the call. 3280 for (auto Reg : CLI.OutRegs) 3281 MIB.addReg(Reg, RegState::Implicit); 3282 3283 // Add a register mask with the call-preserved registers. 3284 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3285 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3286 3287 CLI.Call = MIB; 3288 3289 // Finish off the call including any return values. 3290 return finishCall(CLI, RetVT, NumBytes); 3291 } 3292 3293 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) { 3294 if (Alignment) 3295 return Len / Alignment->value() <= 4; 3296 else 3297 return Len < 32; 3298 } 3299 3300 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3301 uint64_t Len, MaybeAlign Alignment) { 3302 // Make sure we don't bloat code by inlining very large memcpy's. 3303 if (!isMemCpySmall(Len, Alignment)) 3304 return false; 3305 3306 int64_t UnscaledOffset = 0; 3307 Address OrigDest = Dest; 3308 Address OrigSrc = Src; 3309 3310 while (Len) { 3311 MVT VT; 3312 if (!Alignment || *Alignment >= 8) { 3313 if (Len >= 8) 3314 VT = MVT::i64; 3315 else if (Len >= 4) 3316 VT = MVT::i32; 3317 else if (Len >= 2) 3318 VT = MVT::i16; 3319 else { 3320 VT = MVT::i8; 3321 } 3322 } else { 3323 assert(Alignment && "Alignment is set in this branch"); 3324 // Bound based on alignment. 3325 if (Len >= 4 && *Alignment == 4) 3326 VT = MVT::i32; 3327 else if (Len >= 2 && *Alignment == 2) 3328 VT = MVT::i16; 3329 else { 3330 VT = MVT::i8; 3331 } 3332 } 3333 3334 unsigned ResultReg = emitLoad(VT, VT, Src); 3335 if (!ResultReg) 3336 return false; 3337 3338 if (!emitStore(VT, ResultReg, Dest)) 3339 return false; 3340 3341 int64_t Size = VT.getSizeInBits() / 8; 3342 Len -= Size; 3343 UnscaledOffset += Size; 3344 3345 // We need to recompute the unscaled offset for each iteration. 3346 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3347 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3348 } 3349 3350 return true; 3351 } 3352 3353 /// Check if it is possible to fold the condition from the XALU intrinsic 3354 /// into the user. The condition code will only be updated on success. 3355 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3356 const Instruction *I, 3357 const Value *Cond) { 3358 if (!isa<ExtractValueInst>(Cond)) 3359 return false; 3360 3361 const auto *EV = cast<ExtractValueInst>(Cond); 3362 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3363 return false; 3364 3365 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3366 MVT RetVT; 3367 const Function *Callee = II->getCalledFunction(); 3368 Type *RetTy = 3369 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3370 if (!isTypeLegal(RetTy, RetVT)) 3371 return false; 3372 3373 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3374 return false; 3375 3376 const Value *LHS = II->getArgOperand(0); 3377 const Value *RHS = II->getArgOperand(1); 3378 3379 // Canonicalize immediate to the RHS. 3380 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3381 std::swap(LHS, RHS); 3382 3383 // Simplify multiplies. 3384 Intrinsic::ID IID = II->getIntrinsicID(); 3385 switch (IID) { 3386 default: 3387 break; 3388 case Intrinsic::smul_with_overflow: 3389 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3390 if (C->getValue() == 2) 3391 IID = Intrinsic::sadd_with_overflow; 3392 break; 3393 case Intrinsic::umul_with_overflow: 3394 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3395 if (C->getValue() == 2) 3396 IID = Intrinsic::uadd_with_overflow; 3397 break; 3398 } 3399 3400 AArch64CC::CondCode TmpCC; 3401 switch (IID) { 3402 default: 3403 return false; 3404 case Intrinsic::sadd_with_overflow: 3405 case Intrinsic::ssub_with_overflow: 3406 TmpCC = AArch64CC::VS; 3407 break; 3408 case Intrinsic::uadd_with_overflow: 3409 TmpCC = AArch64CC::HS; 3410 break; 3411 case Intrinsic::usub_with_overflow: 3412 TmpCC = AArch64CC::LO; 3413 break; 3414 case Intrinsic::smul_with_overflow: 3415 case Intrinsic::umul_with_overflow: 3416 TmpCC = AArch64CC::NE; 3417 break; 3418 } 3419 3420 // Check if both instructions are in the same basic block. 3421 if (!isValueAvailable(II)) 3422 return false; 3423 3424 // Make sure nothing is in the way 3425 BasicBlock::const_iterator Start(I); 3426 BasicBlock::const_iterator End(II); 3427 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3428 // We only expect extractvalue instructions between the intrinsic and the 3429 // instruction to be selected. 3430 if (!isa<ExtractValueInst>(Itr)) 3431 return false; 3432 3433 // Check that the extractvalue operand comes from the intrinsic. 3434 const auto *EVI = cast<ExtractValueInst>(Itr); 3435 if (EVI->getAggregateOperand() != II) 3436 return false; 3437 } 3438 3439 CC = TmpCC; 3440 return true; 3441 } 3442 3443 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3444 // FIXME: Handle more intrinsics. 3445 switch (II->getIntrinsicID()) { 3446 default: return false; 3447 case Intrinsic::frameaddress: { 3448 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3449 MFI.setFrameAddressIsTaken(true); 3450 3451 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3452 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3453 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3454 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3455 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3456 // Recursively load frame address 3457 // ldr x0, [fp] 3458 // ldr x0, [x0] 3459 // ldr x0, [x0] 3460 // ... 3461 unsigned DestReg; 3462 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3463 while (Depth--) { 3464 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3465 SrcReg, 0); 3466 assert(DestReg && "Unexpected LDR instruction emission failure."); 3467 SrcReg = DestReg; 3468 } 3469 3470 updateValueMap(II, SrcReg); 3471 return true; 3472 } 3473 case Intrinsic::sponentry: { 3474 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3475 3476 // SP = FP + Fixed Object + 16 3477 int FI = MFI.CreateFixedObject(4, 0, false); 3478 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3479 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3480 TII.get(AArch64::ADDXri), ResultReg) 3481 .addFrameIndex(FI) 3482 .addImm(0) 3483 .addImm(0); 3484 3485 updateValueMap(II, ResultReg); 3486 return true; 3487 } 3488 case Intrinsic::memcpy: 3489 case Intrinsic::memmove: { 3490 const auto *MTI = cast<MemTransferInst>(II); 3491 // Don't handle volatile. 3492 if (MTI->isVolatile()) 3493 return false; 3494 3495 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3496 // we would emit dead code because we don't currently handle memmoves. 3497 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3498 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3499 // Small memcpy's are common enough that we want to do them without a call 3500 // if possible. 3501 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3502 MaybeAlign Alignment; 3503 if (MTI->getDestAlign() || MTI->getSourceAlign()) 3504 Alignment = std::min(MTI->getDestAlign().valueOrOne(), 3505 MTI->getSourceAlign().valueOrOne()); 3506 if (isMemCpySmall(Len, Alignment)) { 3507 Address Dest, Src; 3508 if (!computeAddress(MTI->getRawDest(), Dest) || 3509 !computeAddress(MTI->getRawSource(), Src)) 3510 return false; 3511 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3512 return true; 3513 } 3514 } 3515 3516 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3517 return false; 3518 3519 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3520 // Fast instruction selection doesn't support the special 3521 // address spaces. 3522 return false; 3523 3524 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3525 return lowerCallTo(II, IntrMemName, II->arg_size() - 1); 3526 } 3527 case Intrinsic::memset: { 3528 const MemSetInst *MSI = cast<MemSetInst>(II); 3529 // Don't handle volatile. 3530 if (MSI->isVolatile()) 3531 return false; 3532 3533 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3534 return false; 3535 3536 if (MSI->getDestAddressSpace() > 255) 3537 // Fast instruction selection doesn't support the special 3538 // address spaces. 3539 return false; 3540 3541 return lowerCallTo(II, "memset", II->arg_size() - 1); 3542 } 3543 case Intrinsic::sin: 3544 case Intrinsic::cos: 3545 case Intrinsic::pow: { 3546 MVT RetVT; 3547 if (!isTypeLegal(II->getType(), RetVT)) 3548 return false; 3549 3550 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3551 return false; 3552 3553 static const RTLIB::Libcall LibCallTable[3][2] = { 3554 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3555 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3556 { RTLIB::POW_F32, RTLIB::POW_F64 } 3557 }; 3558 RTLIB::Libcall LC; 3559 bool Is64Bit = RetVT == MVT::f64; 3560 switch (II->getIntrinsicID()) { 3561 default: 3562 llvm_unreachable("Unexpected intrinsic."); 3563 case Intrinsic::sin: 3564 LC = LibCallTable[0][Is64Bit]; 3565 break; 3566 case Intrinsic::cos: 3567 LC = LibCallTable[1][Is64Bit]; 3568 break; 3569 case Intrinsic::pow: 3570 LC = LibCallTable[2][Is64Bit]; 3571 break; 3572 } 3573 3574 ArgListTy Args; 3575 Args.reserve(II->arg_size()); 3576 3577 // Populate the argument list. 3578 for (auto &Arg : II->args()) { 3579 ArgListEntry Entry; 3580 Entry.Val = Arg; 3581 Entry.Ty = Arg->getType(); 3582 Args.push_back(Entry); 3583 } 3584 3585 CallLoweringInfo CLI; 3586 MCContext &Ctx = MF->getContext(); 3587 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3588 TLI.getLibcallName(LC), std::move(Args)); 3589 if (!lowerCallTo(CLI)) 3590 return false; 3591 updateValueMap(II, CLI.ResultReg); 3592 return true; 3593 } 3594 case Intrinsic::fabs: { 3595 MVT VT; 3596 if (!isTypeLegal(II->getType(), VT)) 3597 return false; 3598 3599 unsigned Opc; 3600 switch (VT.SimpleTy) { 3601 default: 3602 return false; 3603 case MVT::f32: 3604 Opc = AArch64::FABSSr; 3605 break; 3606 case MVT::f64: 3607 Opc = AArch64::FABSDr; 3608 break; 3609 } 3610 Register SrcReg = getRegForValue(II->getOperand(0)); 3611 if (!SrcReg) 3612 return false; 3613 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3614 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 3615 .addReg(SrcReg); 3616 updateValueMap(II, ResultReg); 3617 return true; 3618 } 3619 case Intrinsic::trap: 3620 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3621 .addImm(1); 3622 return true; 3623 case Intrinsic::debugtrap: 3624 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3625 .addImm(0xF000); 3626 return true; 3627 3628 case Intrinsic::sqrt: { 3629 Type *RetTy = II->getCalledFunction()->getReturnType(); 3630 3631 MVT VT; 3632 if (!isTypeLegal(RetTy, VT)) 3633 return false; 3634 3635 Register Op0Reg = getRegForValue(II->getOperand(0)); 3636 if (!Op0Reg) 3637 return false; 3638 3639 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg); 3640 if (!ResultReg) 3641 return false; 3642 3643 updateValueMap(II, ResultReg); 3644 return true; 3645 } 3646 case Intrinsic::sadd_with_overflow: 3647 case Intrinsic::uadd_with_overflow: 3648 case Intrinsic::ssub_with_overflow: 3649 case Intrinsic::usub_with_overflow: 3650 case Intrinsic::smul_with_overflow: 3651 case Intrinsic::umul_with_overflow: { 3652 // This implements the basic lowering of the xalu with overflow intrinsics. 3653 const Function *Callee = II->getCalledFunction(); 3654 auto *Ty = cast<StructType>(Callee->getReturnType()); 3655 Type *RetTy = Ty->getTypeAtIndex(0U); 3656 3657 MVT VT; 3658 if (!isTypeLegal(RetTy, VT)) 3659 return false; 3660 3661 if (VT != MVT::i32 && VT != MVT::i64) 3662 return false; 3663 3664 const Value *LHS = II->getArgOperand(0); 3665 const Value *RHS = II->getArgOperand(1); 3666 // Canonicalize immediate to the RHS. 3667 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3668 std::swap(LHS, RHS); 3669 3670 // Simplify multiplies. 3671 Intrinsic::ID IID = II->getIntrinsicID(); 3672 switch (IID) { 3673 default: 3674 break; 3675 case Intrinsic::smul_with_overflow: 3676 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3677 if (C->getValue() == 2) { 3678 IID = Intrinsic::sadd_with_overflow; 3679 RHS = LHS; 3680 } 3681 break; 3682 case Intrinsic::umul_with_overflow: 3683 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3684 if (C->getValue() == 2) { 3685 IID = Intrinsic::uadd_with_overflow; 3686 RHS = LHS; 3687 } 3688 break; 3689 } 3690 3691 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3692 AArch64CC::CondCode CC = AArch64CC::Invalid; 3693 switch (IID) { 3694 default: llvm_unreachable("Unexpected intrinsic!"); 3695 case Intrinsic::sadd_with_overflow: 3696 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3697 CC = AArch64CC::VS; 3698 break; 3699 case Intrinsic::uadd_with_overflow: 3700 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3701 CC = AArch64CC::HS; 3702 break; 3703 case Intrinsic::ssub_with_overflow: 3704 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3705 CC = AArch64CC::VS; 3706 break; 3707 case Intrinsic::usub_with_overflow: 3708 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3709 CC = AArch64CC::LO; 3710 break; 3711 case Intrinsic::smul_with_overflow: { 3712 CC = AArch64CC::NE; 3713 Register LHSReg = getRegForValue(LHS); 3714 if (!LHSReg) 3715 return false; 3716 3717 Register RHSReg = getRegForValue(RHS); 3718 if (!RHSReg) 3719 return false; 3720 3721 if (VT == MVT::i32) { 3722 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg); 3723 Register MulSubReg = 3724 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3725 // cmp xreg, wreg, sxtw 3726 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg, 3727 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, 3728 /*WantResult=*/false); 3729 MulReg = MulSubReg; 3730 } else { 3731 assert(VT == MVT::i64 && "Unexpected value type."); 3732 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3733 // reused in the next instruction. 3734 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3735 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg); 3736 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63, 3737 /*WantResult=*/false); 3738 } 3739 break; 3740 } 3741 case Intrinsic::umul_with_overflow: { 3742 CC = AArch64CC::NE; 3743 Register LHSReg = getRegForValue(LHS); 3744 if (!LHSReg) 3745 return false; 3746 3747 Register RHSReg = getRegForValue(RHS); 3748 if (!RHSReg) 3749 return false; 3750 3751 if (VT == MVT::i32) { 3752 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg); 3753 // tst xreg, #0xffffffff00000000 3754 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3755 TII.get(AArch64::ANDSXri), AArch64::XZR) 3756 .addReg(MulReg) 3757 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64)); 3758 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3759 } else { 3760 assert(VT == MVT::i64 && "Unexpected value type."); 3761 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3762 // reused in the next instruction. 3763 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3764 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg); 3765 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false); 3766 } 3767 break; 3768 } 3769 } 3770 3771 if (MulReg) { 3772 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3773 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3774 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3775 } 3776 3777 if (!ResultReg1) 3778 return false; 3779 3780 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3781 AArch64::WZR, AArch64::WZR, 3782 getInvertedCondCode(CC)); 3783 (void)ResultReg2; 3784 assert((ResultReg1 + 1) == ResultReg2 && 3785 "Nonconsecutive result registers."); 3786 updateValueMap(II, ResultReg1, 2); 3787 return true; 3788 } 3789 } 3790 return false; 3791 } 3792 3793 bool AArch64FastISel::selectRet(const Instruction *I) { 3794 const ReturnInst *Ret = cast<ReturnInst>(I); 3795 const Function &F = *I->getParent()->getParent(); 3796 3797 if (!FuncInfo.CanLowerReturn) 3798 return false; 3799 3800 if (F.isVarArg()) 3801 return false; 3802 3803 if (TLI.supportSwiftError() && 3804 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3805 return false; 3806 3807 if (TLI.supportSplitCSR(FuncInfo.MF)) 3808 return false; 3809 3810 // Build a list of return value registers. 3811 SmallVector<unsigned, 4> RetRegs; 3812 3813 if (Ret->getNumOperands() > 0) { 3814 CallingConv::ID CC = F.getCallingConv(); 3815 SmallVector<ISD::OutputArg, 4> Outs; 3816 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3817 3818 // Analyze operands of the call, assigning locations to each operand. 3819 SmallVector<CCValAssign, 16> ValLocs; 3820 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3821 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3822 : RetCC_AArch64_AAPCS; 3823 CCInfo.AnalyzeReturn(Outs, RetCC); 3824 3825 // Only handle a single return value for now. 3826 if (ValLocs.size() != 1) 3827 return false; 3828 3829 CCValAssign &VA = ValLocs[0]; 3830 const Value *RV = Ret->getOperand(0); 3831 3832 // Don't bother handling odd stuff for now. 3833 if ((VA.getLocInfo() != CCValAssign::Full) && 3834 (VA.getLocInfo() != CCValAssign::BCvt)) 3835 return false; 3836 3837 // Only handle register returns for now. 3838 if (!VA.isRegLoc()) 3839 return false; 3840 3841 Register Reg = getRegForValue(RV); 3842 if (Reg == 0) 3843 return false; 3844 3845 unsigned SrcReg = Reg + VA.getValNo(); 3846 Register DestReg = VA.getLocReg(); 3847 // Avoid a cross-class copy. This is very unlikely. 3848 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3849 return false; 3850 3851 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3852 if (!RVEVT.isSimple()) 3853 return false; 3854 3855 // Vectors (of > 1 lane) in big endian need tricky handling. 3856 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && 3857 !Subtarget->isLittleEndian()) 3858 return false; 3859 3860 MVT RVVT = RVEVT.getSimpleVT(); 3861 if (RVVT == MVT::f128) 3862 return false; 3863 3864 MVT DestVT = VA.getValVT(); 3865 // Special handling for extended integers. 3866 if (RVVT != DestVT) { 3867 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3868 return false; 3869 3870 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3871 return false; 3872 3873 bool IsZExt = Outs[0].Flags.isZExt(); 3874 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3875 if (SrcReg == 0) 3876 return false; 3877 } 3878 3879 // "Callee" (i.e. value producer) zero extends pointers at function 3880 // boundary. 3881 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3882 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff); 3883 3884 // Make the copy. 3885 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3886 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3887 3888 // Add register to return instruction. 3889 RetRegs.push_back(VA.getLocReg()); 3890 } 3891 3892 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3893 TII.get(AArch64::RET_ReallyLR)); 3894 for (unsigned RetReg : RetRegs) 3895 MIB.addReg(RetReg, RegState::Implicit); 3896 return true; 3897 } 3898 3899 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3900 Type *DestTy = I->getType(); 3901 Value *Op = I->getOperand(0); 3902 Type *SrcTy = Op->getType(); 3903 3904 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3905 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3906 if (!SrcEVT.isSimple()) 3907 return false; 3908 if (!DestEVT.isSimple()) 3909 return false; 3910 3911 MVT SrcVT = SrcEVT.getSimpleVT(); 3912 MVT DestVT = DestEVT.getSimpleVT(); 3913 3914 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3915 SrcVT != MVT::i8) 3916 return false; 3917 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3918 DestVT != MVT::i1) 3919 return false; 3920 3921 Register SrcReg = getRegForValue(Op); 3922 if (!SrcReg) 3923 return false; 3924 3925 // If we're truncating from i64 to a smaller non-legal type then generate an 3926 // AND. Otherwise, we know the high bits are undefined and a truncate only 3927 // generate a COPY. We cannot mark the source register also as result 3928 // register, because this can incorrectly transfer the kill flag onto the 3929 // source register. 3930 unsigned ResultReg; 3931 if (SrcVT == MVT::i64) { 3932 uint64_t Mask = 0; 3933 switch (DestVT.SimpleTy) { 3934 default: 3935 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3936 return false; 3937 case MVT::i1: 3938 Mask = 0x1; 3939 break; 3940 case MVT::i8: 3941 Mask = 0xff; 3942 break; 3943 case MVT::i16: 3944 Mask = 0xffff; 3945 break; 3946 } 3947 // Issue an extract_subreg to get the lower 32-bits. 3948 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, 3949 AArch64::sub_32); 3950 // Create the AND instruction which performs the actual truncation. 3951 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask); 3952 assert(ResultReg && "Unexpected AND instruction emission failure."); 3953 } else { 3954 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3955 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3956 TII.get(TargetOpcode::COPY), ResultReg) 3957 .addReg(SrcReg); 3958 } 3959 3960 updateValueMap(I, ResultReg); 3961 return true; 3962 } 3963 3964 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 3965 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 3966 DestVT == MVT::i64) && 3967 "Unexpected value type."); 3968 // Handle i8 and i16 as i32. 3969 if (DestVT == MVT::i8 || DestVT == MVT::i16) 3970 DestVT = MVT::i32; 3971 3972 if (IsZExt) { 3973 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1); 3974 assert(ResultReg && "Unexpected AND instruction emission failure."); 3975 if (DestVT == MVT::i64) { 3976 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 3977 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 3978 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3979 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3980 TII.get(AArch64::SUBREG_TO_REG), Reg64) 3981 .addImm(0) 3982 .addReg(ResultReg) 3983 .addImm(AArch64::sub_32); 3984 ResultReg = Reg64; 3985 } 3986 return ResultReg; 3987 } else { 3988 if (DestVT == MVT::i64) { 3989 // FIXME: We're SExt i1 to i64. 3990 return 0; 3991 } 3992 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 3993 0, 0); 3994 } 3995 } 3996 3997 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 3998 unsigned Opc, ZReg; 3999 switch (RetVT.SimpleTy) { 4000 default: return 0; 4001 case MVT::i8: 4002 case MVT::i16: 4003 case MVT::i32: 4004 RetVT = MVT::i32; 4005 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4006 case MVT::i64: 4007 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4008 } 4009 4010 const TargetRegisterClass *RC = 4011 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4012 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg); 4013 } 4014 4015 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4016 if (RetVT != MVT::i64) 4017 return 0; 4018 4019 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4020 Op0, Op1, AArch64::XZR); 4021 } 4022 4023 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4024 if (RetVT != MVT::i64) 4025 return 0; 4026 4027 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4028 Op0, Op1, AArch64::XZR); 4029 } 4030 4031 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, 4032 unsigned Op1Reg) { 4033 unsigned Opc = 0; 4034 bool NeedTrunc = false; 4035 uint64_t Mask = 0; 4036 switch (RetVT.SimpleTy) { 4037 default: return 0; 4038 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4039 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4040 case MVT::i32: Opc = AArch64::LSLVWr; break; 4041 case MVT::i64: Opc = AArch64::LSLVXr; break; 4042 } 4043 4044 const TargetRegisterClass *RC = 4045 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4046 if (NeedTrunc) 4047 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4048 4049 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4050 if (NeedTrunc) 4051 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4052 return ResultReg; 4053 } 4054 4055 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4056 uint64_t Shift, bool IsZExt) { 4057 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4058 "Unexpected source/return type pair."); 4059 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4060 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4061 "Unexpected source value type."); 4062 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4063 RetVT == MVT::i64) && "Unexpected return value type."); 4064 4065 bool Is64Bit = (RetVT == MVT::i64); 4066 unsigned RegSize = Is64Bit ? 64 : 32; 4067 unsigned DstBits = RetVT.getSizeInBits(); 4068 unsigned SrcBits = SrcVT.getSizeInBits(); 4069 const TargetRegisterClass *RC = 4070 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4071 4072 // Just emit a copy for "zero" shifts. 4073 if (Shift == 0) { 4074 if (RetVT == SrcVT) { 4075 Register ResultReg = createResultReg(RC); 4076 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4077 TII.get(TargetOpcode::COPY), ResultReg) 4078 .addReg(Op0); 4079 return ResultReg; 4080 } else 4081 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4082 } 4083 4084 // Don't deal with undefined shifts. 4085 if (Shift >= DstBits) 4086 return 0; 4087 4088 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4089 // {S|U}BFM Wd, Wn, #r, #s 4090 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4091 4092 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4093 // %2 = shl i16 %1, 4 4094 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4095 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4096 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4097 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4098 4099 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4100 // %2 = shl i16 %1, 8 4101 // Wd<32+7-24,32-24> = Wn<7:0> 4102 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4103 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4104 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4105 4106 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4107 // %2 = shl i16 %1, 12 4108 // Wd<32+3-20,32-20> = Wn<3:0> 4109 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4110 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4111 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4112 4113 unsigned ImmR = RegSize - Shift; 4114 // Limit the width to the length of the source type. 4115 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4116 static const unsigned OpcTable[2][2] = { 4117 {AArch64::SBFMWri, AArch64::SBFMXri}, 4118 {AArch64::UBFMWri, AArch64::UBFMXri} 4119 }; 4120 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4121 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4122 Register TmpReg = MRI.createVirtualRegister(RC); 4123 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4124 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4125 .addImm(0) 4126 .addReg(Op0) 4127 .addImm(AArch64::sub_32); 4128 Op0 = TmpReg; 4129 } 4130 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4131 } 4132 4133 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, 4134 unsigned Op1Reg) { 4135 unsigned Opc = 0; 4136 bool NeedTrunc = false; 4137 uint64_t Mask = 0; 4138 switch (RetVT.SimpleTy) { 4139 default: return 0; 4140 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4141 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4142 case MVT::i32: Opc = AArch64::LSRVWr; break; 4143 case MVT::i64: Opc = AArch64::LSRVXr; break; 4144 } 4145 4146 const TargetRegisterClass *RC = 4147 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4148 if (NeedTrunc) { 4149 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask); 4150 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4151 } 4152 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4153 if (NeedTrunc) 4154 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4155 return ResultReg; 4156 } 4157 4158 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4159 uint64_t Shift, bool IsZExt) { 4160 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4161 "Unexpected source/return type pair."); 4162 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4163 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4164 "Unexpected source value type."); 4165 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4166 RetVT == MVT::i64) && "Unexpected return value type."); 4167 4168 bool Is64Bit = (RetVT == MVT::i64); 4169 unsigned RegSize = Is64Bit ? 64 : 32; 4170 unsigned DstBits = RetVT.getSizeInBits(); 4171 unsigned SrcBits = SrcVT.getSizeInBits(); 4172 const TargetRegisterClass *RC = 4173 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4174 4175 // Just emit a copy for "zero" shifts. 4176 if (Shift == 0) { 4177 if (RetVT == SrcVT) { 4178 Register ResultReg = createResultReg(RC); 4179 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4180 TII.get(TargetOpcode::COPY), ResultReg) 4181 .addReg(Op0); 4182 return ResultReg; 4183 } else 4184 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4185 } 4186 4187 // Don't deal with undefined shifts. 4188 if (Shift >= DstBits) 4189 return 0; 4190 4191 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4192 // {S|U}BFM Wd, Wn, #r, #s 4193 // Wd<s-r:0> = Wn<s:r> when r <= s 4194 4195 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4196 // %2 = lshr i16 %1, 4 4197 // Wd<7-4:0> = Wn<7:4> 4198 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4199 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4200 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4201 4202 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4203 // %2 = lshr i16 %1, 8 4204 // Wd<7-7,0> = Wn<7:7> 4205 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4206 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4207 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4208 4209 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4210 // %2 = lshr i16 %1, 12 4211 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4212 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4213 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4214 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4215 4216 if (Shift >= SrcBits && IsZExt) 4217 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4218 4219 // It is not possible to fold a sign-extend into the LShr instruction. In this 4220 // case emit a sign-extend. 4221 if (!IsZExt) { 4222 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4223 if (!Op0) 4224 return 0; 4225 SrcVT = RetVT; 4226 SrcBits = SrcVT.getSizeInBits(); 4227 IsZExt = true; 4228 } 4229 4230 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4231 unsigned ImmS = SrcBits - 1; 4232 static const unsigned OpcTable[2][2] = { 4233 {AArch64::SBFMWri, AArch64::SBFMXri}, 4234 {AArch64::UBFMWri, AArch64::UBFMXri} 4235 }; 4236 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4237 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4238 Register TmpReg = MRI.createVirtualRegister(RC); 4239 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4240 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4241 .addImm(0) 4242 .addReg(Op0) 4243 .addImm(AArch64::sub_32); 4244 Op0 = TmpReg; 4245 } 4246 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4247 } 4248 4249 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, 4250 unsigned Op1Reg) { 4251 unsigned Opc = 0; 4252 bool NeedTrunc = false; 4253 uint64_t Mask = 0; 4254 switch (RetVT.SimpleTy) { 4255 default: return 0; 4256 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4257 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4258 case MVT::i32: Opc = AArch64::ASRVWr; break; 4259 case MVT::i64: Opc = AArch64::ASRVXr; break; 4260 } 4261 4262 const TargetRegisterClass *RC = 4263 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4264 if (NeedTrunc) { 4265 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4266 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4267 } 4268 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4269 if (NeedTrunc) 4270 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4271 return ResultReg; 4272 } 4273 4274 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4275 uint64_t Shift, bool IsZExt) { 4276 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4277 "Unexpected source/return type pair."); 4278 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4279 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4280 "Unexpected source value type."); 4281 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4282 RetVT == MVT::i64) && "Unexpected return value type."); 4283 4284 bool Is64Bit = (RetVT == MVT::i64); 4285 unsigned RegSize = Is64Bit ? 64 : 32; 4286 unsigned DstBits = RetVT.getSizeInBits(); 4287 unsigned SrcBits = SrcVT.getSizeInBits(); 4288 const TargetRegisterClass *RC = 4289 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4290 4291 // Just emit a copy for "zero" shifts. 4292 if (Shift == 0) { 4293 if (RetVT == SrcVT) { 4294 Register ResultReg = createResultReg(RC); 4295 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4296 TII.get(TargetOpcode::COPY), ResultReg) 4297 .addReg(Op0); 4298 return ResultReg; 4299 } else 4300 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4301 } 4302 4303 // Don't deal with undefined shifts. 4304 if (Shift >= DstBits) 4305 return 0; 4306 4307 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4308 // {S|U}BFM Wd, Wn, #r, #s 4309 // Wd<s-r:0> = Wn<s:r> when r <= s 4310 4311 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4312 // %2 = ashr i16 %1, 4 4313 // Wd<7-4:0> = Wn<7:4> 4314 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4315 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4316 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4317 4318 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4319 // %2 = ashr i16 %1, 8 4320 // Wd<7-7,0> = Wn<7:7> 4321 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4322 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4323 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4324 4325 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4326 // %2 = ashr i16 %1, 12 4327 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4328 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4329 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4330 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4331 4332 if (Shift >= SrcBits && IsZExt) 4333 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4334 4335 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4336 unsigned ImmS = SrcBits - 1; 4337 static const unsigned OpcTable[2][2] = { 4338 {AArch64::SBFMWri, AArch64::SBFMXri}, 4339 {AArch64::UBFMWri, AArch64::UBFMXri} 4340 }; 4341 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4342 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4343 Register TmpReg = MRI.createVirtualRegister(RC); 4344 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4345 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4346 .addImm(0) 4347 .addReg(Op0) 4348 .addImm(AArch64::sub_32); 4349 Op0 = TmpReg; 4350 } 4351 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4352 } 4353 4354 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4355 bool IsZExt) { 4356 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4357 4358 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4359 // DestVT are odd things, so test to make sure that they are both types we can 4360 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4361 // bail out to SelectionDAG. 4362 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4363 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4364 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4365 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4366 return 0; 4367 4368 unsigned Opc; 4369 unsigned Imm = 0; 4370 4371 switch (SrcVT.SimpleTy) { 4372 default: 4373 return 0; 4374 case MVT::i1: 4375 return emiti1Ext(SrcReg, DestVT, IsZExt); 4376 case MVT::i8: 4377 if (DestVT == MVT::i64) 4378 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4379 else 4380 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4381 Imm = 7; 4382 break; 4383 case MVT::i16: 4384 if (DestVT == MVT::i64) 4385 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4386 else 4387 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4388 Imm = 15; 4389 break; 4390 case MVT::i32: 4391 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4392 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4393 Imm = 31; 4394 break; 4395 } 4396 4397 // Handle i8 and i16 as i32. 4398 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4399 DestVT = MVT::i32; 4400 else if (DestVT == MVT::i64) { 4401 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4402 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4403 TII.get(AArch64::SUBREG_TO_REG), Src64) 4404 .addImm(0) 4405 .addReg(SrcReg) 4406 .addImm(AArch64::sub_32); 4407 SrcReg = Src64; 4408 } 4409 4410 const TargetRegisterClass *RC = 4411 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4412 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm); 4413 } 4414 4415 static bool isZExtLoad(const MachineInstr *LI) { 4416 switch (LI->getOpcode()) { 4417 default: 4418 return false; 4419 case AArch64::LDURBBi: 4420 case AArch64::LDURHHi: 4421 case AArch64::LDURWi: 4422 case AArch64::LDRBBui: 4423 case AArch64::LDRHHui: 4424 case AArch64::LDRWui: 4425 case AArch64::LDRBBroX: 4426 case AArch64::LDRHHroX: 4427 case AArch64::LDRWroX: 4428 case AArch64::LDRBBroW: 4429 case AArch64::LDRHHroW: 4430 case AArch64::LDRWroW: 4431 return true; 4432 } 4433 } 4434 4435 static bool isSExtLoad(const MachineInstr *LI) { 4436 switch (LI->getOpcode()) { 4437 default: 4438 return false; 4439 case AArch64::LDURSBWi: 4440 case AArch64::LDURSHWi: 4441 case AArch64::LDURSBXi: 4442 case AArch64::LDURSHXi: 4443 case AArch64::LDURSWi: 4444 case AArch64::LDRSBWui: 4445 case AArch64::LDRSHWui: 4446 case AArch64::LDRSBXui: 4447 case AArch64::LDRSHXui: 4448 case AArch64::LDRSWui: 4449 case AArch64::LDRSBWroX: 4450 case AArch64::LDRSHWroX: 4451 case AArch64::LDRSBXroX: 4452 case AArch64::LDRSHXroX: 4453 case AArch64::LDRSWroX: 4454 case AArch64::LDRSBWroW: 4455 case AArch64::LDRSHWroW: 4456 case AArch64::LDRSBXroW: 4457 case AArch64::LDRSHXroW: 4458 case AArch64::LDRSWroW: 4459 return true; 4460 } 4461 } 4462 4463 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4464 MVT SrcVT) { 4465 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4466 if (!LI || !LI->hasOneUse()) 4467 return false; 4468 4469 // Check if the load instruction has already been selected. 4470 Register Reg = lookUpRegForValue(LI); 4471 if (!Reg) 4472 return false; 4473 4474 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4475 if (!MI) 4476 return false; 4477 4478 // Check if the correct load instruction has been emitted - SelectionDAG might 4479 // have emitted a zero-extending load, but we need a sign-extending load. 4480 bool IsZExt = isa<ZExtInst>(I); 4481 const auto *LoadMI = MI; 4482 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4483 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4484 Register LoadReg = MI->getOperand(1).getReg(); 4485 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4486 assert(LoadMI && "Expected valid instruction"); 4487 } 4488 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4489 return false; 4490 4491 // Nothing to be done. 4492 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4493 updateValueMap(I, Reg); 4494 return true; 4495 } 4496 4497 if (IsZExt) { 4498 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 4499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4500 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4501 .addImm(0) 4502 .addReg(Reg, getKillRegState(true)) 4503 .addImm(AArch64::sub_32); 4504 Reg = Reg64; 4505 } else { 4506 assert((MI->getOpcode() == TargetOpcode::COPY && 4507 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4508 "Expected copy instruction"); 4509 Reg = MI->getOperand(1).getReg(); 4510 MachineBasicBlock::iterator I(MI); 4511 removeDeadCode(I, std::next(I)); 4512 } 4513 updateValueMap(I, Reg); 4514 return true; 4515 } 4516 4517 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4518 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4519 "Unexpected integer extend instruction."); 4520 MVT RetVT; 4521 MVT SrcVT; 4522 if (!isTypeSupported(I->getType(), RetVT)) 4523 return false; 4524 4525 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4526 return false; 4527 4528 // Try to optimize already sign-/zero-extended values from load instructions. 4529 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4530 return true; 4531 4532 Register SrcReg = getRegForValue(I->getOperand(0)); 4533 if (!SrcReg) 4534 return false; 4535 4536 // Try to optimize already sign-/zero-extended values from function arguments. 4537 bool IsZExt = isa<ZExtInst>(I); 4538 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4539 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4540 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4541 Register ResultReg = createResultReg(&AArch64::GPR64RegClass); 4542 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4543 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4544 .addImm(0) 4545 .addReg(SrcReg) 4546 .addImm(AArch64::sub_32); 4547 SrcReg = ResultReg; 4548 } 4549 4550 updateValueMap(I, SrcReg); 4551 return true; 4552 } 4553 } 4554 4555 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4556 if (!ResultReg) 4557 return false; 4558 4559 updateValueMap(I, ResultReg); 4560 return true; 4561 } 4562 4563 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4564 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4565 if (!DestEVT.isSimple()) 4566 return false; 4567 4568 MVT DestVT = DestEVT.getSimpleVT(); 4569 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4570 return false; 4571 4572 unsigned DivOpc; 4573 bool Is64bit = (DestVT == MVT::i64); 4574 switch (ISDOpcode) { 4575 default: 4576 return false; 4577 case ISD::SREM: 4578 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4579 break; 4580 case ISD::UREM: 4581 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4582 break; 4583 } 4584 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4585 Register Src0Reg = getRegForValue(I->getOperand(0)); 4586 if (!Src0Reg) 4587 return false; 4588 4589 Register Src1Reg = getRegForValue(I->getOperand(1)); 4590 if (!Src1Reg) 4591 return false; 4592 4593 const TargetRegisterClass *RC = 4594 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4595 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg); 4596 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4597 // The remainder is computed as numerator - (quotient * denominator) using the 4598 // MSUB instruction. 4599 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg); 4600 updateValueMap(I, ResultReg); 4601 return true; 4602 } 4603 4604 bool AArch64FastISel::selectMul(const Instruction *I) { 4605 MVT VT; 4606 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4607 return false; 4608 4609 if (VT.isVector()) 4610 return selectBinaryOp(I, ISD::MUL); 4611 4612 const Value *Src0 = I->getOperand(0); 4613 const Value *Src1 = I->getOperand(1); 4614 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4615 if (C->getValue().isPowerOf2()) 4616 std::swap(Src0, Src1); 4617 4618 // Try to simplify to a shift instruction. 4619 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4620 if (C->getValue().isPowerOf2()) { 4621 uint64_t ShiftVal = C->getValue().logBase2(); 4622 MVT SrcVT = VT; 4623 bool IsZExt = true; 4624 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4625 if (!isIntExtFree(ZExt)) { 4626 MVT VT; 4627 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4628 SrcVT = VT; 4629 IsZExt = true; 4630 Src0 = ZExt->getOperand(0); 4631 } 4632 } 4633 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4634 if (!isIntExtFree(SExt)) { 4635 MVT VT; 4636 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4637 SrcVT = VT; 4638 IsZExt = false; 4639 Src0 = SExt->getOperand(0); 4640 } 4641 } 4642 } 4643 4644 Register Src0Reg = getRegForValue(Src0); 4645 if (!Src0Reg) 4646 return false; 4647 4648 unsigned ResultReg = 4649 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt); 4650 4651 if (ResultReg) { 4652 updateValueMap(I, ResultReg); 4653 return true; 4654 } 4655 } 4656 4657 Register Src0Reg = getRegForValue(I->getOperand(0)); 4658 if (!Src0Reg) 4659 return false; 4660 4661 Register Src1Reg = getRegForValue(I->getOperand(1)); 4662 if (!Src1Reg) 4663 return false; 4664 4665 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg); 4666 4667 if (!ResultReg) 4668 return false; 4669 4670 updateValueMap(I, ResultReg); 4671 return true; 4672 } 4673 4674 bool AArch64FastISel::selectShift(const Instruction *I) { 4675 MVT RetVT; 4676 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4677 return false; 4678 4679 if (RetVT.isVector()) 4680 return selectOperator(I, I->getOpcode()); 4681 4682 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4683 unsigned ResultReg = 0; 4684 uint64_t ShiftVal = C->getZExtValue(); 4685 MVT SrcVT = RetVT; 4686 bool IsZExt = I->getOpcode() != Instruction::AShr; 4687 const Value *Op0 = I->getOperand(0); 4688 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4689 if (!isIntExtFree(ZExt)) { 4690 MVT TmpVT; 4691 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4692 SrcVT = TmpVT; 4693 IsZExt = true; 4694 Op0 = ZExt->getOperand(0); 4695 } 4696 } 4697 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4698 if (!isIntExtFree(SExt)) { 4699 MVT TmpVT; 4700 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4701 SrcVT = TmpVT; 4702 IsZExt = false; 4703 Op0 = SExt->getOperand(0); 4704 } 4705 } 4706 } 4707 4708 Register Op0Reg = getRegForValue(Op0); 4709 if (!Op0Reg) 4710 return false; 4711 4712 switch (I->getOpcode()) { 4713 default: llvm_unreachable("Unexpected instruction."); 4714 case Instruction::Shl: 4715 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4716 break; 4717 case Instruction::AShr: 4718 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4719 break; 4720 case Instruction::LShr: 4721 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4722 break; 4723 } 4724 if (!ResultReg) 4725 return false; 4726 4727 updateValueMap(I, ResultReg); 4728 return true; 4729 } 4730 4731 Register Op0Reg = getRegForValue(I->getOperand(0)); 4732 if (!Op0Reg) 4733 return false; 4734 4735 Register Op1Reg = getRegForValue(I->getOperand(1)); 4736 if (!Op1Reg) 4737 return false; 4738 4739 unsigned ResultReg = 0; 4740 switch (I->getOpcode()) { 4741 default: llvm_unreachable("Unexpected instruction."); 4742 case Instruction::Shl: 4743 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); 4744 break; 4745 case Instruction::AShr: 4746 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); 4747 break; 4748 case Instruction::LShr: 4749 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); 4750 break; 4751 } 4752 4753 if (!ResultReg) 4754 return false; 4755 4756 updateValueMap(I, ResultReg); 4757 return true; 4758 } 4759 4760 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4761 MVT RetVT, SrcVT; 4762 4763 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4764 return false; 4765 if (!isTypeLegal(I->getType(), RetVT)) 4766 return false; 4767 4768 unsigned Opc; 4769 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4770 Opc = AArch64::FMOVWSr; 4771 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4772 Opc = AArch64::FMOVXDr; 4773 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4774 Opc = AArch64::FMOVSWr; 4775 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4776 Opc = AArch64::FMOVDXr; 4777 else 4778 return false; 4779 4780 const TargetRegisterClass *RC = nullptr; 4781 switch (RetVT.SimpleTy) { 4782 default: llvm_unreachable("Unexpected value type."); 4783 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4784 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4785 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4786 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4787 } 4788 Register Op0Reg = getRegForValue(I->getOperand(0)); 4789 if (!Op0Reg) 4790 return false; 4791 4792 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg); 4793 if (!ResultReg) 4794 return false; 4795 4796 updateValueMap(I, ResultReg); 4797 return true; 4798 } 4799 4800 bool AArch64FastISel::selectFRem(const Instruction *I) { 4801 MVT RetVT; 4802 if (!isTypeLegal(I->getType(), RetVT)) 4803 return false; 4804 4805 RTLIB::Libcall LC; 4806 switch (RetVT.SimpleTy) { 4807 default: 4808 return false; 4809 case MVT::f32: 4810 LC = RTLIB::REM_F32; 4811 break; 4812 case MVT::f64: 4813 LC = RTLIB::REM_F64; 4814 break; 4815 } 4816 4817 ArgListTy Args; 4818 Args.reserve(I->getNumOperands()); 4819 4820 // Populate the argument list. 4821 for (auto &Arg : I->operands()) { 4822 ArgListEntry Entry; 4823 Entry.Val = Arg; 4824 Entry.Ty = Arg->getType(); 4825 Args.push_back(Entry); 4826 } 4827 4828 CallLoweringInfo CLI; 4829 MCContext &Ctx = MF->getContext(); 4830 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4831 TLI.getLibcallName(LC), std::move(Args)); 4832 if (!lowerCallTo(CLI)) 4833 return false; 4834 updateValueMap(I, CLI.ResultReg); 4835 return true; 4836 } 4837 4838 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4839 MVT VT; 4840 if (!isTypeLegal(I->getType(), VT)) 4841 return false; 4842 4843 if (!isa<ConstantInt>(I->getOperand(1))) 4844 return selectBinaryOp(I, ISD::SDIV); 4845 4846 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4847 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4848 !(C.isPowerOf2() || C.isNegatedPowerOf2())) 4849 return selectBinaryOp(I, ISD::SDIV); 4850 4851 unsigned Lg2 = C.countTrailingZeros(); 4852 Register Src0Reg = getRegForValue(I->getOperand(0)); 4853 if (!Src0Reg) 4854 return false; 4855 4856 if (cast<BinaryOperator>(I)->isExact()) { 4857 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2); 4858 if (!ResultReg) 4859 return false; 4860 updateValueMap(I, ResultReg); 4861 return true; 4862 } 4863 4864 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4865 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne); 4866 if (!AddReg) 4867 return false; 4868 4869 // (Src0 < 0) ? Pow2 - 1 : 0; 4870 if (!emitICmp_ri(VT, Src0Reg, 0)) 4871 return false; 4872 4873 unsigned SelectOpc; 4874 const TargetRegisterClass *RC; 4875 if (VT == MVT::i64) { 4876 SelectOpc = AArch64::CSELXr; 4877 RC = &AArch64::GPR64RegClass; 4878 } else { 4879 SelectOpc = AArch64::CSELWr; 4880 RC = &AArch64::GPR32RegClass; 4881 } 4882 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg, 4883 AArch64CC::LT); 4884 if (!SelectReg) 4885 return false; 4886 4887 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4888 // negate the result. 4889 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4890 unsigned ResultReg; 4891 if (C.isNegative()) 4892 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg, 4893 AArch64_AM::ASR, Lg2); 4894 else 4895 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2); 4896 4897 if (!ResultReg) 4898 return false; 4899 4900 updateValueMap(I, ResultReg); 4901 return true; 4902 } 4903 4904 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4905 /// have to duplicate it for AArch64, because otherwise we would fail during the 4906 /// sign-extend emission. 4907 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4908 Register IdxN = getRegForValue(Idx); 4909 if (IdxN == 0) 4910 // Unhandled operand. Halt "fast" selection and bail. 4911 return 0; 4912 4913 // If the index is smaller or larger than intptr_t, truncate or extend it. 4914 MVT PtrVT = TLI.getPointerTy(DL); 4915 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4916 if (IdxVT.bitsLT(PtrVT)) { 4917 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 4918 } else if (IdxVT.bitsGT(PtrVT)) 4919 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4920 return IdxN; 4921 } 4922 4923 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4924 /// duplicate it for AArch64, because otherwise we would bail out even for 4925 /// simple cases. This is because the standard fastEmit functions don't cover 4926 /// MUL at all and ADD is lowered very inefficientily. 4927 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4928 if (Subtarget->isTargetILP32()) 4929 return false; 4930 4931 Register N = getRegForValue(I->getOperand(0)); 4932 if (!N) 4933 return false; 4934 4935 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4936 // into a single N = N + TotalOffset. 4937 uint64_t TotalOffs = 0; 4938 MVT VT = TLI.getPointerTy(DL); 4939 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4940 GTI != E; ++GTI) { 4941 const Value *Idx = GTI.getOperand(); 4942 if (auto *StTy = GTI.getStructTypeOrNull()) { 4943 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4944 // N = N + Offset 4945 if (Field) 4946 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4947 } else { 4948 Type *Ty = GTI.getIndexedType(); 4949 4950 // If this is a constant subscript, handle it quickly. 4951 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4952 if (CI->isZero()) 4953 continue; 4954 // N = N + Offset 4955 TotalOffs += 4956 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 4957 continue; 4958 } 4959 if (TotalOffs) { 4960 N = emitAdd_ri_(VT, N, TotalOffs); 4961 if (!N) 4962 return false; 4963 TotalOffs = 0; 4964 } 4965 4966 // N = N + Idx * ElementSize; 4967 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 4968 unsigned IdxN = getRegForGEPIndex(Idx); 4969 if (!IdxN) 4970 return false; 4971 4972 if (ElementSize != 1) { 4973 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 4974 if (!C) 4975 return false; 4976 IdxN = emitMul_rr(VT, IdxN, C); 4977 if (!IdxN) 4978 return false; 4979 } 4980 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN); 4981 if (!N) 4982 return false; 4983 } 4984 } 4985 if (TotalOffs) { 4986 N = emitAdd_ri_(VT, N, TotalOffs); 4987 if (!N) 4988 return false; 4989 } 4990 updateValueMap(I, N); 4991 return true; 4992 } 4993 4994 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 4995 assert(TM.getOptLevel() == CodeGenOpt::None && 4996 "cmpxchg survived AtomicExpand at optlevel > -O0"); 4997 4998 auto *RetPairTy = cast<StructType>(I->getType()); 4999 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5000 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5001 "cmpxchg has a non-i1 status result"); 5002 5003 MVT VT; 5004 if (!isTypeLegal(RetTy, VT)) 5005 return false; 5006 5007 const TargetRegisterClass *ResRC; 5008 unsigned Opc, CmpOpc; 5009 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5010 // extractvalue selection doesn't support that. 5011 if (VT == MVT::i32) { 5012 Opc = AArch64::CMP_SWAP_32; 5013 CmpOpc = AArch64::SUBSWrs; 5014 ResRC = &AArch64::GPR32RegClass; 5015 } else if (VT == MVT::i64) { 5016 Opc = AArch64::CMP_SWAP_64; 5017 CmpOpc = AArch64::SUBSXrs; 5018 ResRC = &AArch64::GPR64RegClass; 5019 } else { 5020 return false; 5021 } 5022 5023 const MCInstrDesc &II = TII.get(Opc); 5024 5025 const Register AddrReg = constrainOperandRegClass( 5026 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 5027 const Register DesiredReg = constrainOperandRegClass( 5028 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 5029 const Register NewReg = constrainOperandRegClass( 5030 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5031 5032 const Register ResultReg1 = createResultReg(ResRC); 5033 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5034 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5035 5036 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5037 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 5038 .addDef(ResultReg1) 5039 .addDef(ScratchReg) 5040 .addUse(AddrReg) 5041 .addUse(DesiredReg) 5042 .addUse(NewReg); 5043 5044 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc)) 5045 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5046 .addUse(ResultReg1) 5047 .addUse(DesiredReg) 5048 .addImm(0); 5049 5050 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr)) 5051 .addDef(ResultReg2) 5052 .addUse(AArch64::WZR) 5053 .addUse(AArch64::WZR) 5054 .addImm(AArch64CC::NE); 5055 5056 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5057 updateValueMap(I, ResultReg1, 2); 5058 return true; 5059 } 5060 5061 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5062 if (TLI.fallBackToDAGISel(*I)) 5063 return false; 5064 switch (I->getOpcode()) { 5065 default: 5066 break; 5067 case Instruction::Add: 5068 case Instruction::Sub: 5069 return selectAddSub(I); 5070 case Instruction::Mul: 5071 return selectMul(I); 5072 case Instruction::SDiv: 5073 return selectSDiv(I); 5074 case Instruction::SRem: 5075 if (!selectBinaryOp(I, ISD::SREM)) 5076 return selectRem(I, ISD::SREM); 5077 return true; 5078 case Instruction::URem: 5079 if (!selectBinaryOp(I, ISD::UREM)) 5080 return selectRem(I, ISD::UREM); 5081 return true; 5082 case Instruction::Shl: 5083 case Instruction::LShr: 5084 case Instruction::AShr: 5085 return selectShift(I); 5086 case Instruction::And: 5087 case Instruction::Or: 5088 case Instruction::Xor: 5089 return selectLogicalOp(I); 5090 case Instruction::Br: 5091 return selectBranch(I); 5092 case Instruction::IndirectBr: 5093 return selectIndirectBr(I); 5094 case Instruction::BitCast: 5095 if (!FastISel::selectBitCast(I)) 5096 return selectBitCast(I); 5097 return true; 5098 case Instruction::FPToSI: 5099 if (!selectCast(I, ISD::FP_TO_SINT)) 5100 return selectFPToInt(I, /*Signed=*/true); 5101 return true; 5102 case Instruction::FPToUI: 5103 return selectFPToInt(I, /*Signed=*/false); 5104 case Instruction::ZExt: 5105 case Instruction::SExt: 5106 return selectIntExt(I); 5107 case Instruction::Trunc: 5108 if (!selectCast(I, ISD::TRUNCATE)) 5109 return selectTrunc(I); 5110 return true; 5111 case Instruction::FPExt: 5112 return selectFPExt(I); 5113 case Instruction::FPTrunc: 5114 return selectFPTrunc(I); 5115 case Instruction::SIToFP: 5116 if (!selectCast(I, ISD::SINT_TO_FP)) 5117 return selectIntToFP(I, /*Signed=*/true); 5118 return true; 5119 case Instruction::UIToFP: 5120 return selectIntToFP(I, /*Signed=*/false); 5121 case Instruction::Load: 5122 return selectLoad(I); 5123 case Instruction::Store: 5124 return selectStore(I); 5125 case Instruction::FCmp: 5126 case Instruction::ICmp: 5127 return selectCmp(I); 5128 case Instruction::Select: 5129 return selectSelect(I); 5130 case Instruction::Ret: 5131 return selectRet(I); 5132 case Instruction::FRem: 5133 return selectFRem(I); 5134 case Instruction::GetElementPtr: 5135 return selectGetElementPtr(I); 5136 case Instruction::AtomicCmpXchg: 5137 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5138 } 5139 5140 // fall-back to target-independent instruction selection. 5141 return selectOperator(I, I->getOpcode()); 5142 } 5143 5144 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5145 const TargetLibraryInfo *LibInfo) { 5146 5147 SMEAttrs CallerAttrs(*FuncInfo.Fn); 5148 if (CallerAttrs.hasZAState() || 5149 (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody())) 5150 return nullptr; 5151 return new AArch64FastISel(FuncInfo, LibInfo); 5152 } 5153