1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/APFloat.h" 23 #include "llvm/ADT/APInt.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/Analysis/BranchProbabilityInfo.h" 27 #include "llvm/CodeGen/CallingConvLower.h" 28 #include "llvm/CodeGen/FastISel.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineConstantPool.h" 33 #include "llvm/CodeGen/MachineFrameInfo.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineMemOperand.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/MachineValueType.h" 39 #include "llvm/CodeGen/RuntimeLibcalls.h" 40 #include "llvm/CodeGen/ValueTypes.h" 41 #include "llvm/IR/Argument.h" 42 #include "llvm/IR/Attributes.h" 43 #include "llvm/IR/BasicBlock.h" 44 #include "llvm/IR/CallingConv.h" 45 #include "llvm/IR/Constant.h" 46 #include "llvm/IR/Constants.h" 47 #include "llvm/IR/DataLayout.h" 48 #include "llvm/IR/DerivedTypes.h" 49 #include "llvm/IR/Function.h" 50 #include "llvm/IR/GetElementPtrTypeIterator.h" 51 #include "llvm/IR/GlobalValue.h" 52 #include "llvm/IR/InstrTypes.h" 53 #include "llvm/IR/Instruction.h" 54 #include "llvm/IR/Instructions.h" 55 #include "llvm/IR/IntrinsicInst.h" 56 #include "llvm/IR/Intrinsics.h" 57 #include "llvm/IR/IntrinsicsAArch64.h" 58 #include "llvm/IR/Operator.h" 59 #include "llvm/IR/Type.h" 60 #include "llvm/IR/User.h" 61 #include "llvm/IR/Value.h" 62 #include "llvm/MC/MCInstrDesc.h" 63 #include "llvm/MC/MCRegisterInfo.h" 64 #include "llvm/MC/MCSymbol.h" 65 #include "llvm/Support/AtomicOrdering.h" 66 #include "llvm/Support/Casting.h" 67 #include "llvm/Support/CodeGen.h" 68 #include "llvm/Support/Compiler.h" 69 #include "llvm/Support/ErrorHandling.h" 70 #include "llvm/Support/MathExtras.h" 71 #include <algorithm> 72 #include <cassert> 73 #include <cstdint> 74 #include <iterator> 75 #include <utility> 76 77 using namespace llvm; 78 79 namespace { 80 81 class AArch64FastISel final : public FastISel { 82 class Address { 83 public: 84 using BaseKind = enum { 85 RegBase, 86 FrameIndexBase 87 }; 88 89 private: 90 BaseKind Kind = RegBase; 91 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 92 union { 93 unsigned Reg; 94 int FI; 95 } Base; 96 unsigned OffsetReg = 0; 97 unsigned Shift = 0; 98 int64_t Offset = 0; 99 const GlobalValue *GV = nullptr; 100 101 public: 102 Address() { Base.Reg = 0; } 103 104 void setKind(BaseKind K) { Kind = K; } 105 BaseKind getKind() const { return Kind; } 106 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 107 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 108 bool isRegBase() const { return Kind == RegBase; } 109 bool isFIBase() const { return Kind == FrameIndexBase; } 110 111 void setReg(unsigned Reg) { 112 assert(isRegBase() && "Invalid base register access!"); 113 Base.Reg = Reg; 114 } 115 116 unsigned getReg() const { 117 assert(isRegBase() && "Invalid base register access!"); 118 return Base.Reg; 119 } 120 121 void setOffsetReg(unsigned Reg) { 122 OffsetReg = Reg; 123 } 124 125 unsigned getOffsetReg() const { 126 return OffsetReg; 127 } 128 129 void setFI(unsigned FI) { 130 assert(isFIBase() && "Invalid base frame index access!"); 131 Base.FI = FI; 132 } 133 134 unsigned getFI() const { 135 assert(isFIBase() && "Invalid base frame index access!"); 136 return Base.FI; 137 } 138 139 void setOffset(int64_t O) { Offset = O; } 140 int64_t getOffset() { return Offset; } 141 void setShift(unsigned S) { Shift = S; } 142 unsigned getShift() { return Shift; } 143 144 void setGlobalValue(const GlobalValue *G) { GV = G; } 145 const GlobalValue *getGlobalValue() { return GV; } 146 }; 147 148 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 149 /// make the right decision when generating code for different targets. 150 const AArch64Subtarget *Subtarget; 151 LLVMContext *Context; 152 153 bool fastLowerArguments() override; 154 bool fastLowerCall(CallLoweringInfo &CLI) override; 155 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 156 157 private: 158 // Selection routines. 159 bool selectAddSub(const Instruction *I); 160 bool selectLogicalOp(const Instruction *I); 161 bool selectLoad(const Instruction *I); 162 bool selectStore(const Instruction *I); 163 bool selectBranch(const Instruction *I); 164 bool selectIndirectBr(const Instruction *I); 165 bool selectCmp(const Instruction *I); 166 bool selectSelect(const Instruction *I); 167 bool selectFPExt(const Instruction *I); 168 bool selectFPTrunc(const Instruction *I); 169 bool selectFPToInt(const Instruction *I, bool Signed); 170 bool selectIntToFP(const Instruction *I, bool Signed); 171 bool selectRem(const Instruction *I, unsigned ISDOpcode); 172 bool selectRet(const Instruction *I); 173 bool selectTrunc(const Instruction *I); 174 bool selectIntExt(const Instruction *I); 175 bool selectMul(const Instruction *I); 176 bool selectShift(const Instruction *I); 177 bool selectBitCast(const Instruction *I); 178 bool selectFRem(const Instruction *I); 179 bool selectSDiv(const Instruction *I); 180 bool selectGetElementPtr(const Instruction *I); 181 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 182 183 // Utility helper routines. 184 bool isTypeLegal(Type *Ty, MVT &VT); 185 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 186 bool isValueAvailable(const Value *V) const; 187 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 188 bool computeCallAddress(const Value *V, Address &Addr); 189 bool simplifyAddress(Address &Addr, MVT VT); 190 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 191 MachineMemOperand::Flags Flags, 192 unsigned ScaleFactor, MachineMemOperand *MMO); 193 bool isMemCpySmall(uint64_t Len, MaybeAlign Alignment); 194 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 195 MaybeAlign Alignment); 196 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 197 const Value *Cond); 198 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 199 bool optimizeSelect(const SelectInst *SI); 200 unsigned getRegForGEPIndex(const Value *Idx); 201 202 // Emit helper routines. 203 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 204 const Value *RHS, bool SetFlags = false, 205 bool WantResult = true, bool IsZExt = false); 206 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 207 unsigned RHSReg, bool SetFlags = false, 208 bool WantResult = true); 209 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 210 uint64_t Imm, bool SetFlags = false, 211 bool WantResult = true); 212 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 213 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, 214 uint64_t ShiftImm, bool SetFlags = false, 215 bool WantResult = true); 216 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 217 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, 218 uint64_t ShiftImm, bool SetFlags = false, 219 bool WantResult = true); 220 221 // Emit functions. 222 bool emitCompareAndBranch(const BranchInst *BI); 223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 228 MachineMemOperand *MMO = nullptr); 229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 230 MachineMemOperand *MMO = nullptr); 231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 232 MachineMemOperand *MMO = nullptr); 233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 236 bool SetFlags = false, bool WantResult = true, 237 bool IsZExt = false); 238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); 239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 240 bool SetFlags = false, bool WantResult = true, 241 bool IsZExt = false); 242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 243 bool WantResult = true); 244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 245 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 246 bool WantResult = true); 247 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 248 const Value *RHS); 249 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 250 uint64_t Imm); 251 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 252 unsigned RHSReg, uint64_t ShiftImm); 253 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 254 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); 255 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 256 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 257 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 258 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 259 bool IsZExt = true); 260 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 261 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 262 bool IsZExt = true); 263 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 264 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 265 bool IsZExt = false); 266 267 unsigned materializeInt(const ConstantInt *CI, MVT VT); 268 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 269 unsigned materializeGV(const GlobalValue *GV); 270 271 // Call handling routines. 272 private: 273 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 274 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 275 unsigned &NumBytes); 276 bool finishCall(CallLoweringInfo &CLI, unsigned NumBytes); 277 278 public: 279 // Backend specific FastISel code. 280 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 281 unsigned fastMaterializeConstant(const Constant *C) override; 282 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 283 284 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 285 const TargetLibraryInfo *LibInfo) 286 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 287 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); 288 Context = &FuncInfo.Fn->getContext(); 289 } 290 291 bool fastSelectInstruction(const Instruction *I) override; 292 293 #include "AArch64GenFastISel.inc" 294 }; 295 296 } // end anonymous namespace 297 298 /// Check if the sign-/zero-extend will be a noop. 299 static bool isIntExtFree(const Instruction *I) { 300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 301 "Unexpected integer extend instruction."); 302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 303 "Unexpected value type."); 304 bool IsZExt = isa<ZExtInst>(I); 305 306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 307 if (LI->hasOneUse()) 308 return true; 309 310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 312 return true; 313 314 return false; 315 } 316 317 /// Determine the implicit scale factor that is applied by a memory 318 /// operation for a given value type. 319 static unsigned getImplicitScaleFactor(MVT VT) { 320 switch (VT.SimpleTy) { 321 default: 322 return 0; // invalid 323 case MVT::i1: // fall-through 324 case MVT::i8: 325 return 1; 326 case MVT::i16: 327 return 2; 328 case MVT::i32: // fall-through 329 case MVT::f32: 330 return 4; 331 case MVT::i64: // fall-through 332 case MVT::f64: 333 return 8; 334 } 335 } 336 337 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 338 if (CC == CallingConv::GHC) 339 return CC_AArch64_GHC; 340 if (CC == CallingConv::CFGuard_Check) 341 return CC_AArch64_Win64_CFGuard_Check; 342 if (Subtarget->isTargetDarwin()) 343 return CC_AArch64_DarwinPCS; 344 if (Subtarget->isTargetWindows()) 345 return CC_AArch64_Win64PCS; 346 return CC_AArch64_AAPCS; 347 } 348 349 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 350 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 351 "Alloca should always return a pointer."); 352 353 // Don't handle dynamic allocas. 354 if (!FuncInfo.StaticAllocaMap.count(AI)) 355 return 0; 356 357 DenseMap<const AllocaInst *, int>::iterator SI = 358 FuncInfo.StaticAllocaMap.find(AI); 359 360 if (SI != FuncInfo.StaticAllocaMap.end()) { 361 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 362 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 363 ResultReg) 364 .addFrameIndex(SI->second) 365 .addImm(0) 366 .addImm(0); 367 return ResultReg; 368 } 369 370 return 0; 371 } 372 373 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 374 if (VT > MVT::i64) 375 return 0; 376 377 if (!CI->isZero()) 378 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 379 380 // Create a copy from the zero register to materialize a "0" value. 381 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 382 : &AArch64::GPR32RegClass; 383 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 384 Register ResultReg = createResultReg(RC); 385 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 386 ResultReg).addReg(ZeroReg, getKillRegState(true)); 387 return ResultReg; 388 } 389 390 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 391 // Positive zero (+0.0) has to be materialized with a fmov from the zero 392 // register, because the immediate version of fmov cannot encode zero. 393 if (CFP->isNullValue()) 394 return fastMaterializeFloatZero(CFP); 395 396 if (VT != MVT::f32 && VT != MVT::f64) 397 return 0; 398 399 const APFloat Val = CFP->getValueAPF(); 400 bool Is64Bit = (VT == MVT::f64); 401 // This checks to see if we can use FMOV instructions to materialize 402 // a constant, otherwise we have to materialize via the constant pool. 403 int Imm = 404 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 405 if (Imm != -1) { 406 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 407 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 408 } 409 410 // For the large code model materialize the FP constant in code. 411 if (TM.getCodeModel() == CodeModel::Large) { 412 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 413 const TargetRegisterClass *RC = Is64Bit ? 414 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 415 416 Register TmpReg = createResultReg(RC); 417 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc1), TmpReg) 418 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 419 420 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 421 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 422 TII.get(TargetOpcode::COPY), ResultReg) 423 .addReg(TmpReg, getKillRegState(true)); 424 425 return ResultReg; 426 } 427 428 // Materialize via constant pool. MachineConstantPool wants an explicit 429 // alignment. 430 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 431 432 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 433 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 434 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 435 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 436 437 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 438 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 440 .addReg(ADRPReg) 441 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 442 return ResultReg; 443 } 444 445 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 446 // We can't handle thread-local variables quickly yet. 447 if (GV->isThreadLocal()) 448 return 0; 449 450 // MachO still uses GOT for large code-model accesses, but ELF requires 451 // movz/movk sequences, which FastISel doesn't handle yet. 452 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 453 return 0; 454 455 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 456 457 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 458 if (!DestEVT.isSimple()) 459 return 0; 460 461 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 462 unsigned ResultReg; 463 464 if (OpFlags & AArch64II::MO_GOT) { 465 // ADRP + LDRX 466 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 467 ADRPReg) 468 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 469 470 unsigned LdrOpc; 471 if (Subtarget->isTargetILP32()) { 472 ResultReg = createResultReg(&AArch64::GPR32RegClass); 473 LdrOpc = AArch64::LDRWui; 474 } else { 475 ResultReg = createResultReg(&AArch64::GPR64RegClass); 476 LdrOpc = AArch64::LDRXui; 477 } 478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(LdrOpc), 479 ResultReg) 480 .addReg(ADRPReg) 481 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 482 AArch64II::MO_NC | OpFlags); 483 if (!Subtarget->isTargetILP32()) 484 return ResultReg; 485 486 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 487 // so we must extend the result on ILP32. 488 Register Result64 = createResultReg(&AArch64::GPR64RegClass); 489 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 490 TII.get(TargetOpcode::SUBREG_TO_REG)) 491 .addDef(Result64) 492 .addImm(0) 493 .addReg(ResultReg, RegState::Kill) 494 .addImm(AArch64::sub_32); 495 return Result64; 496 } else { 497 // ADRP + ADDX 498 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 499 ADRPReg) 500 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 501 502 if (OpFlags & AArch64II::MO_TAGGED) { 503 // MO_TAGGED on the page indicates a tagged address. Set the tag now. 504 // We do so by creating a MOVK that sets bits 48-63 of the register to 505 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in 506 // the small code model so we can assume a binary size of <= 4GB, which 507 // makes the untagged PC relative offset positive. The binary must also be 508 // loaded into address range [0, 2^48). Both of these properties need to 509 // be ensured at runtime when using tagged addresses. 510 // 511 // TODO: There is duplicate logic in AArch64ExpandPseudoInsts.cpp that 512 // also uses BuildMI for making an ADRP (+ MOVK) + ADD, but the operands 513 // are not exactly 1:1 with FastISel so we cannot easily abstract this 514 // out. At some point, it would be nice to find a way to not have this 515 // duplciate code. 516 unsigned DstReg = createResultReg(&AArch64::GPR64commonRegClass); 517 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::MOVKXi), 518 DstReg) 519 .addReg(ADRPReg) 520 .addGlobalAddress(GV, /*Offset=*/0x100000000, 521 AArch64II::MO_PREL | AArch64II::MO_G3) 522 .addImm(48); 523 ADRPReg = DstReg; 524 } 525 526 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 528 ResultReg) 529 .addReg(ADRPReg) 530 .addGlobalAddress(GV, 0, 531 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 532 .addImm(0); 533 } 534 return ResultReg; 535 } 536 537 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 538 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 539 540 // Only handle simple types. 541 if (!CEVT.isSimple()) 542 return 0; 543 MVT VT = CEVT.getSimpleVT(); 544 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 545 // 'null' pointers need to have a somewhat special treatment. 546 if (isa<ConstantPointerNull>(C)) { 547 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 548 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 549 } 550 551 if (const auto *CI = dyn_cast<ConstantInt>(C)) 552 return materializeInt(CI, VT); 553 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 554 return materializeFP(CFP, VT); 555 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 556 return materializeGV(GV); 557 558 return 0; 559 } 560 561 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 562 assert(CFP->isNullValue() && 563 "Floating-point constant is not a positive zero."); 564 MVT VT; 565 if (!isTypeLegal(CFP->getType(), VT)) 566 return 0; 567 568 if (VT != MVT::f32 && VT != MVT::f64) 569 return 0; 570 571 bool Is64Bit = (VT == MVT::f64); 572 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 573 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 574 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg); 575 } 576 577 /// Check if the multiply is by a power-of-2 constant. 578 static bool isMulPowOf2(const Value *I) { 579 if (const auto *MI = dyn_cast<MulOperator>(I)) { 580 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 581 if (C->getValue().isPowerOf2()) 582 return true; 583 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 584 if (C->getValue().isPowerOf2()) 585 return true; 586 } 587 return false; 588 } 589 590 // Computes the address to get to an object. 591 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 592 { 593 const User *U = nullptr; 594 unsigned Opcode = Instruction::UserOp1; 595 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 596 // Don't walk into other basic blocks unless the object is an alloca from 597 // another block, otherwise it may not have a virtual register assigned. 598 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 599 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 600 Opcode = I->getOpcode(); 601 U = I; 602 } 603 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 604 Opcode = C->getOpcode(); 605 U = C; 606 } 607 608 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 609 if (Ty->getAddressSpace() > 255) 610 // Fast instruction selection doesn't support the special 611 // address spaces. 612 return false; 613 614 switch (Opcode) { 615 default: 616 break; 617 case Instruction::BitCast: 618 // Look through bitcasts. 619 return computeAddress(U->getOperand(0), Addr, Ty); 620 621 case Instruction::IntToPtr: 622 // Look past no-op inttoptrs. 623 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 624 TLI.getPointerTy(DL)) 625 return computeAddress(U->getOperand(0), Addr, Ty); 626 break; 627 628 case Instruction::PtrToInt: 629 // Look past no-op ptrtoints. 630 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 631 return computeAddress(U->getOperand(0), Addr, Ty); 632 break; 633 634 case Instruction::GetElementPtr: { 635 Address SavedAddr = Addr; 636 uint64_t TmpOffset = Addr.getOffset(); 637 638 // Iterate through the GEP folding the constants into offsets where 639 // we can. 640 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 641 GTI != E; ++GTI) { 642 const Value *Op = GTI.getOperand(); 643 if (StructType *STy = GTI.getStructTypeOrNull()) { 644 const StructLayout *SL = DL.getStructLayout(STy); 645 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 646 TmpOffset += SL->getElementOffset(Idx); 647 } else { 648 uint64_t S = GTI.getSequentialElementStride(DL); 649 while (true) { 650 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 651 // Constant-offset addressing. 652 TmpOffset += CI->getSExtValue() * S; 653 break; 654 } 655 if (canFoldAddIntoGEP(U, Op)) { 656 // A compatible add with a constant operand. Fold the constant. 657 ConstantInt *CI = 658 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 659 TmpOffset += CI->getSExtValue() * S; 660 // Iterate on the other operand. 661 Op = cast<AddOperator>(Op)->getOperand(0); 662 continue; 663 } 664 // Unsupported 665 goto unsupported_gep; 666 } 667 } 668 } 669 670 // Try to grab the base operand now. 671 Addr.setOffset(TmpOffset); 672 if (computeAddress(U->getOperand(0), Addr, Ty)) 673 return true; 674 675 // We failed, restore everything and try the other options. 676 Addr = SavedAddr; 677 678 unsupported_gep: 679 break; 680 } 681 case Instruction::Alloca: { 682 const AllocaInst *AI = cast<AllocaInst>(Obj); 683 DenseMap<const AllocaInst *, int>::iterator SI = 684 FuncInfo.StaticAllocaMap.find(AI); 685 if (SI != FuncInfo.StaticAllocaMap.end()) { 686 Addr.setKind(Address::FrameIndexBase); 687 Addr.setFI(SI->second); 688 return true; 689 } 690 break; 691 } 692 case Instruction::Add: { 693 // Adds of constants are common and easy enough. 694 const Value *LHS = U->getOperand(0); 695 const Value *RHS = U->getOperand(1); 696 697 if (isa<ConstantInt>(LHS)) 698 std::swap(LHS, RHS); 699 700 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 701 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 702 return computeAddress(LHS, Addr, Ty); 703 } 704 705 Address Backup = Addr; 706 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 707 return true; 708 Addr = Backup; 709 710 break; 711 } 712 case Instruction::Sub: { 713 // Subs of constants are common and easy enough. 714 const Value *LHS = U->getOperand(0); 715 const Value *RHS = U->getOperand(1); 716 717 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 718 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 719 return computeAddress(LHS, Addr, Ty); 720 } 721 break; 722 } 723 case Instruction::Shl: { 724 if (Addr.getOffsetReg()) 725 break; 726 727 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 728 if (!CI) 729 break; 730 731 unsigned Val = CI->getZExtValue(); 732 if (Val < 1 || Val > 3) 733 break; 734 735 uint64_t NumBytes = 0; 736 if (Ty && Ty->isSized()) { 737 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 738 NumBytes = NumBits / 8; 739 if (!isPowerOf2_64(NumBits)) 740 NumBytes = 0; 741 } 742 743 if (NumBytes != (1ULL << Val)) 744 break; 745 746 Addr.setShift(Val); 747 Addr.setExtendType(AArch64_AM::LSL); 748 749 const Value *Src = U->getOperand(0); 750 if (const auto *I = dyn_cast<Instruction>(Src)) { 751 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 752 // Fold the zext or sext when it won't become a noop. 753 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 754 if (!isIntExtFree(ZE) && 755 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 756 Addr.setExtendType(AArch64_AM::UXTW); 757 Src = ZE->getOperand(0); 758 } 759 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 760 if (!isIntExtFree(SE) && 761 SE->getOperand(0)->getType()->isIntegerTy(32)) { 762 Addr.setExtendType(AArch64_AM::SXTW); 763 Src = SE->getOperand(0); 764 } 765 } 766 } 767 } 768 769 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 770 if (AI->getOpcode() == Instruction::And) { 771 const Value *LHS = AI->getOperand(0); 772 const Value *RHS = AI->getOperand(1); 773 774 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 775 if (C->getValue() == 0xffffffff) 776 std::swap(LHS, RHS); 777 778 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 779 if (C->getValue() == 0xffffffff) { 780 Addr.setExtendType(AArch64_AM::UXTW); 781 Register Reg = getRegForValue(LHS); 782 if (!Reg) 783 return false; 784 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 785 Addr.setOffsetReg(Reg); 786 return true; 787 } 788 } 789 790 Register Reg = getRegForValue(Src); 791 if (!Reg) 792 return false; 793 Addr.setOffsetReg(Reg); 794 return true; 795 } 796 case Instruction::Mul: { 797 if (Addr.getOffsetReg()) 798 break; 799 800 if (!isMulPowOf2(U)) 801 break; 802 803 const Value *LHS = U->getOperand(0); 804 const Value *RHS = U->getOperand(1); 805 806 // Canonicalize power-of-2 value to the RHS. 807 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 808 if (C->getValue().isPowerOf2()) 809 std::swap(LHS, RHS); 810 811 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 812 const auto *C = cast<ConstantInt>(RHS); 813 unsigned Val = C->getValue().logBase2(); 814 if (Val < 1 || Val > 3) 815 break; 816 817 uint64_t NumBytes = 0; 818 if (Ty && Ty->isSized()) { 819 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 820 NumBytes = NumBits / 8; 821 if (!isPowerOf2_64(NumBits)) 822 NumBytes = 0; 823 } 824 825 if (NumBytes != (1ULL << Val)) 826 break; 827 828 Addr.setShift(Val); 829 Addr.setExtendType(AArch64_AM::LSL); 830 831 const Value *Src = LHS; 832 if (const auto *I = dyn_cast<Instruction>(Src)) { 833 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 834 // Fold the zext or sext when it won't become a noop. 835 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 836 if (!isIntExtFree(ZE) && 837 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 838 Addr.setExtendType(AArch64_AM::UXTW); 839 Src = ZE->getOperand(0); 840 } 841 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 842 if (!isIntExtFree(SE) && 843 SE->getOperand(0)->getType()->isIntegerTy(32)) { 844 Addr.setExtendType(AArch64_AM::SXTW); 845 Src = SE->getOperand(0); 846 } 847 } 848 } 849 } 850 851 Register Reg = getRegForValue(Src); 852 if (!Reg) 853 return false; 854 Addr.setOffsetReg(Reg); 855 return true; 856 } 857 case Instruction::And: { 858 if (Addr.getOffsetReg()) 859 break; 860 861 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 862 break; 863 864 const Value *LHS = U->getOperand(0); 865 const Value *RHS = U->getOperand(1); 866 867 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 868 if (C->getValue() == 0xffffffff) 869 std::swap(LHS, RHS); 870 871 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 872 if (C->getValue() == 0xffffffff) { 873 Addr.setShift(0); 874 Addr.setExtendType(AArch64_AM::LSL); 875 Addr.setExtendType(AArch64_AM::UXTW); 876 877 Register Reg = getRegForValue(LHS); 878 if (!Reg) 879 return false; 880 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 881 Addr.setOffsetReg(Reg); 882 return true; 883 } 884 break; 885 } 886 case Instruction::SExt: 887 case Instruction::ZExt: { 888 if (!Addr.getReg() || Addr.getOffsetReg()) 889 break; 890 891 const Value *Src = nullptr; 892 // Fold the zext or sext when it won't become a noop. 893 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 894 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 895 Addr.setExtendType(AArch64_AM::UXTW); 896 Src = ZE->getOperand(0); 897 } 898 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 899 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 900 Addr.setExtendType(AArch64_AM::SXTW); 901 Src = SE->getOperand(0); 902 } 903 } 904 905 if (!Src) 906 break; 907 908 Addr.setShift(0); 909 Register Reg = getRegForValue(Src); 910 if (!Reg) 911 return false; 912 Addr.setOffsetReg(Reg); 913 return true; 914 } 915 } // end switch 916 917 if (Addr.isRegBase() && !Addr.getReg()) { 918 Register Reg = getRegForValue(Obj); 919 if (!Reg) 920 return false; 921 Addr.setReg(Reg); 922 return true; 923 } 924 925 if (!Addr.getOffsetReg()) { 926 Register Reg = getRegForValue(Obj); 927 if (!Reg) 928 return false; 929 Addr.setOffsetReg(Reg); 930 return true; 931 } 932 933 return false; 934 } 935 936 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 937 const User *U = nullptr; 938 unsigned Opcode = Instruction::UserOp1; 939 bool InMBB = true; 940 941 if (const auto *I = dyn_cast<Instruction>(V)) { 942 Opcode = I->getOpcode(); 943 U = I; 944 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 945 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 946 Opcode = C->getOpcode(); 947 U = C; 948 } 949 950 switch (Opcode) { 951 default: break; 952 case Instruction::BitCast: 953 // Look past bitcasts if its operand is in the same BB. 954 if (InMBB) 955 return computeCallAddress(U->getOperand(0), Addr); 956 break; 957 case Instruction::IntToPtr: 958 // Look past no-op inttoptrs if its operand is in the same BB. 959 if (InMBB && 960 TLI.getValueType(DL, U->getOperand(0)->getType()) == 961 TLI.getPointerTy(DL)) 962 return computeCallAddress(U->getOperand(0), Addr); 963 break; 964 case Instruction::PtrToInt: 965 // Look past no-op ptrtoints if its operand is in the same BB. 966 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 967 return computeCallAddress(U->getOperand(0), Addr); 968 break; 969 } 970 971 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 972 Addr.setGlobalValue(GV); 973 return true; 974 } 975 976 // If all else fails, try to materialize the value in a register. 977 if (!Addr.getGlobalValue()) { 978 Addr.setReg(getRegForValue(V)); 979 return Addr.getReg() != 0; 980 } 981 982 return false; 983 } 984 985 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 986 EVT evt = TLI.getValueType(DL, Ty, true); 987 988 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 989 return false; 990 991 // Only handle simple types. 992 if (evt == MVT::Other || !evt.isSimple()) 993 return false; 994 VT = evt.getSimpleVT(); 995 996 // This is a legal type, but it's not something we handle in fast-isel. 997 if (VT == MVT::f128) 998 return false; 999 1000 // Handle all other legal types, i.e. a register that will directly hold this 1001 // value. 1002 return TLI.isTypeLegal(VT); 1003 } 1004 1005 /// Determine if the value type is supported by FastISel. 1006 /// 1007 /// FastISel for AArch64 can handle more value types than are legal. This adds 1008 /// simple value type such as i1, i8, and i16. 1009 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 1010 if (Ty->isVectorTy() && !IsVectorAllowed) 1011 return false; 1012 1013 if (isTypeLegal(Ty, VT)) 1014 return true; 1015 1016 // If this is a type than can be sign or zero-extended to a basic operation 1017 // go ahead and accept it now. 1018 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 1019 return true; 1020 1021 return false; 1022 } 1023 1024 bool AArch64FastISel::isValueAvailable(const Value *V) const { 1025 if (!isa<Instruction>(V)) 1026 return true; 1027 1028 const auto *I = cast<Instruction>(V); 1029 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 1030 } 1031 1032 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1033 if (Subtarget->isTargetILP32()) 1034 return false; 1035 1036 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1037 if (!ScaleFactor) 1038 return false; 1039 1040 bool ImmediateOffsetNeedsLowering = false; 1041 bool RegisterOffsetNeedsLowering = false; 1042 int64_t Offset = Addr.getOffset(); 1043 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1044 ImmediateOffsetNeedsLowering = true; 1045 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1046 !isUInt<12>(Offset / ScaleFactor)) 1047 ImmediateOffsetNeedsLowering = true; 1048 1049 // Cannot encode an offset register and an immediate offset in the same 1050 // instruction. Fold the immediate offset into the load/store instruction and 1051 // emit an additional add to take care of the offset register. 1052 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1053 RegisterOffsetNeedsLowering = true; 1054 1055 // Cannot encode zero register as base. 1056 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1057 RegisterOffsetNeedsLowering = true; 1058 1059 // If this is a stack pointer and the offset needs to be simplified then put 1060 // the alloca address into a register, set the base type back to register and 1061 // continue. This should almost never happen. 1062 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1063 { 1064 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1065 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADDXri), 1066 ResultReg) 1067 .addFrameIndex(Addr.getFI()) 1068 .addImm(0) 1069 .addImm(0); 1070 Addr.setKind(Address::RegBase); 1071 Addr.setReg(ResultReg); 1072 } 1073 1074 if (RegisterOffsetNeedsLowering) { 1075 unsigned ResultReg = 0; 1076 if (Addr.getReg()) { 1077 if (Addr.getExtendType() == AArch64_AM::SXTW || 1078 Addr.getExtendType() == AArch64_AM::UXTW ) 1079 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1080 Addr.getOffsetReg(), Addr.getExtendType(), 1081 Addr.getShift()); 1082 else 1083 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1084 Addr.getOffsetReg(), AArch64_AM::LSL, 1085 Addr.getShift()); 1086 } else { 1087 if (Addr.getExtendType() == AArch64_AM::UXTW) 1088 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1089 Addr.getShift(), /*IsZExt=*/true); 1090 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1091 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1092 Addr.getShift(), /*IsZExt=*/false); 1093 else 1094 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1095 Addr.getShift()); 1096 } 1097 if (!ResultReg) 1098 return false; 1099 1100 Addr.setReg(ResultReg); 1101 Addr.setOffsetReg(0); 1102 Addr.setShift(0); 1103 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1104 } 1105 1106 // Since the offset is too large for the load/store instruction get the 1107 // reg+offset into a register. 1108 if (ImmediateOffsetNeedsLowering) { 1109 unsigned ResultReg; 1110 if (Addr.getReg()) 1111 // Try to fold the immediate into the add instruction. 1112 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); 1113 else 1114 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1115 1116 if (!ResultReg) 1117 return false; 1118 Addr.setReg(ResultReg); 1119 Addr.setOffset(0); 1120 } 1121 return true; 1122 } 1123 1124 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1125 const MachineInstrBuilder &MIB, 1126 MachineMemOperand::Flags Flags, 1127 unsigned ScaleFactor, 1128 MachineMemOperand *MMO) { 1129 int64_t Offset = Addr.getOffset() / ScaleFactor; 1130 // Frame base works a bit differently. Handle it separately. 1131 if (Addr.isFIBase()) { 1132 int FI = Addr.getFI(); 1133 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1134 // and alignment should be based on the VT. 1135 MMO = FuncInfo.MF->getMachineMemOperand( 1136 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1137 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1138 // Now add the rest of the operands. 1139 MIB.addFrameIndex(FI).addImm(Offset); 1140 } else { 1141 assert(Addr.isRegBase() && "Unexpected address kind."); 1142 const MCInstrDesc &II = MIB->getDesc(); 1143 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1144 Addr.setReg( 1145 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1146 Addr.setOffsetReg( 1147 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1148 if (Addr.getOffsetReg()) { 1149 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1150 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1151 Addr.getExtendType() == AArch64_AM::SXTX; 1152 MIB.addReg(Addr.getReg()); 1153 MIB.addReg(Addr.getOffsetReg()); 1154 MIB.addImm(IsSigned); 1155 MIB.addImm(Addr.getShift() != 0); 1156 } else 1157 MIB.addReg(Addr.getReg()).addImm(Offset); 1158 } 1159 1160 if (MMO) 1161 MIB.addMemOperand(MMO); 1162 } 1163 1164 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1165 const Value *RHS, bool SetFlags, 1166 bool WantResult, bool IsZExt) { 1167 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1168 bool NeedExtend = false; 1169 switch (RetVT.SimpleTy) { 1170 default: 1171 return 0; 1172 case MVT::i1: 1173 NeedExtend = true; 1174 break; 1175 case MVT::i8: 1176 NeedExtend = true; 1177 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1178 break; 1179 case MVT::i16: 1180 NeedExtend = true; 1181 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1182 break; 1183 case MVT::i32: // fall-through 1184 case MVT::i64: 1185 break; 1186 } 1187 MVT SrcVT = RetVT; 1188 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1189 1190 // Canonicalize immediates to the RHS first. 1191 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1192 std::swap(LHS, RHS); 1193 1194 // Canonicalize mul by power of 2 to the RHS. 1195 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1196 if (isMulPowOf2(LHS)) 1197 std::swap(LHS, RHS); 1198 1199 // Canonicalize shift immediate to the RHS. 1200 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1201 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1202 if (isa<ConstantInt>(SI->getOperand(1))) 1203 if (SI->getOpcode() == Instruction::Shl || 1204 SI->getOpcode() == Instruction::LShr || 1205 SI->getOpcode() == Instruction::AShr ) 1206 std::swap(LHS, RHS); 1207 1208 Register LHSReg = getRegForValue(LHS); 1209 if (!LHSReg) 1210 return 0; 1211 1212 if (NeedExtend) 1213 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1214 1215 unsigned ResultReg = 0; 1216 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1217 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1218 if (C->isNegative()) 1219 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags, 1220 WantResult); 1221 else 1222 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, 1223 WantResult); 1224 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1225 if (C->isNullValue()) 1226 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult); 1227 1228 if (ResultReg) 1229 return ResultReg; 1230 1231 // Only extend the RHS within the instruction if there is a valid extend type. 1232 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1233 isValueAvailable(RHS)) { 1234 Register RHSReg = getRegForValue(RHS); 1235 if (!RHSReg) 1236 return 0; 1237 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0, 1238 SetFlags, WantResult); 1239 } 1240 1241 // Check if the mul can be folded into the instruction. 1242 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1243 if (isMulPowOf2(RHS)) { 1244 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1245 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1246 1247 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1248 if (C->getValue().isPowerOf2()) 1249 std::swap(MulLHS, MulRHS); 1250 1251 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1252 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1253 Register RHSReg = getRegForValue(MulLHS); 1254 if (!RHSReg) 1255 return 0; 1256 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL, 1257 ShiftVal, SetFlags, WantResult); 1258 if (ResultReg) 1259 return ResultReg; 1260 } 1261 } 1262 1263 // Check if the shift can be folded into the instruction. 1264 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1265 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1266 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1267 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1268 switch (SI->getOpcode()) { 1269 default: break; 1270 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1271 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1272 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1273 } 1274 uint64_t ShiftVal = C->getZExtValue(); 1275 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1276 Register RHSReg = getRegForValue(SI->getOperand(0)); 1277 if (!RHSReg) 1278 return 0; 1279 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, 1280 ShiftVal, SetFlags, WantResult); 1281 if (ResultReg) 1282 return ResultReg; 1283 } 1284 } 1285 } 1286 } 1287 1288 Register RHSReg = getRegForValue(RHS); 1289 if (!RHSReg) 1290 return 0; 1291 1292 if (NeedExtend) 1293 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1294 1295 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); 1296 } 1297 1298 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1299 unsigned RHSReg, bool SetFlags, 1300 bool WantResult) { 1301 assert(LHSReg && RHSReg && "Invalid register number."); 1302 1303 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1304 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1305 return 0; 1306 1307 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1308 return 0; 1309 1310 static const unsigned OpcTable[2][2][2] = { 1311 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1312 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1313 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1314 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1315 }; 1316 bool Is64Bit = RetVT == MVT::i64; 1317 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1318 const TargetRegisterClass *RC = 1319 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1320 unsigned ResultReg; 1321 if (WantResult) 1322 ResultReg = createResultReg(RC); 1323 else 1324 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1325 1326 const MCInstrDesc &II = TII.get(Opc); 1327 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1328 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1329 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1330 .addReg(LHSReg) 1331 .addReg(RHSReg); 1332 return ResultReg; 1333 } 1334 1335 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1336 uint64_t Imm, bool SetFlags, 1337 bool WantResult) { 1338 assert(LHSReg && "Invalid register number."); 1339 1340 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1341 return 0; 1342 1343 unsigned ShiftImm; 1344 if (isUInt<12>(Imm)) 1345 ShiftImm = 0; 1346 else if ((Imm & 0xfff000) == Imm) { 1347 ShiftImm = 12; 1348 Imm >>= 12; 1349 } else 1350 return 0; 1351 1352 static const unsigned OpcTable[2][2][2] = { 1353 { { AArch64::SUBWri, AArch64::SUBXri }, 1354 { AArch64::ADDWri, AArch64::ADDXri } }, 1355 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1356 { AArch64::ADDSWri, AArch64::ADDSXri } } 1357 }; 1358 bool Is64Bit = RetVT == MVT::i64; 1359 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1360 const TargetRegisterClass *RC; 1361 if (SetFlags) 1362 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1363 else 1364 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1365 unsigned ResultReg; 1366 if (WantResult) 1367 ResultReg = createResultReg(RC); 1368 else 1369 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1370 1371 const MCInstrDesc &II = TII.get(Opc); 1372 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1374 .addReg(LHSReg) 1375 .addImm(Imm) 1376 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1377 return ResultReg; 1378 } 1379 1380 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1381 unsigned RHSReg, 1382 AArch64_AM::ShiftExtendType ShiftType, 1383 uint64_t ShiftImm, bool SetFlags, 1384 bool WantResult) { 1385 assert(LHSReg && RHSReg && "Invalid register number."); 1386 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1387 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1388 1389 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1390 return 0; 1391 1392 // Don't deal with undefined shifts. 1393 if (ShiftImm >= RetVT.getSizeInBits()) 1394 return 0; 1395 1396 static const unsigned OpcTable[2][2][2] = { 1397 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1398 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1399 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1400 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1401 }; 1402 bool Is64Bit = RetVT == MVT::i64; 1403 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1404 const TargetRegisterClass *RC = 1405 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1406 unsigned ResultReg; 1407 if (WantResult) 1408 ResultReg = createResultReg(RC); 1409 else 1410 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1411 1412 const MCInstrDesc &II = TII.get(Opc); 1413 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1414 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1415 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1416 .addReg(LHSReg) 1417 .addReg(RHSReg) 1418 .addImm(getShifterImm(ShiftType, ShiftImm)); 1419 return ResultReg; 1420 } 1421 1422 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1423 unsigned RHSReg, 1424 AArch64_AM::ShiftExtendType ExtType, 1425 uint64_t ShiftImm, bool SetFlags, 1426 bool WantResult) { 1427 assert(LHSReg && RHSReg && "Invalid register number."); 1428 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1429 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1430 1431 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1432 return 0; 1433 1434 if (ShiftImm >= 4) 1435 return 0; 1436 1437 static const unsigned OpcTable[2][2][2] = { 1438 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1439 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1440 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1441 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1442 }; 1443 bool Is64Bit = RetVT == MVT::i64; 1444 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1445 const TargetRegisterClass *RC = nullptr; 1446 if (SetFlags) 1447 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1448 else 1449 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1450 unsigned ResultReg; 1451 if (WantResult) 1452 ResultReg = createResultReg(RC); 1453 else 1454 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1455 1456 const MCInstrDesc &II = TII.get(Opc); 1457 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1458 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, ResultReg) 1460 .addReg(LHSReg) 1461 .addReg(RHSReg) 1462 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1463 return ResultReg; 1464 } 1465 1466 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1467 Type *Ty = LHS->getType(); 1468 EVT EVT = TLI.getValueType(DL, Ty, true); 1469 if (!EVT.isSimple()) 1470 return false; 1471 MVT VT = EVT.getSimpleVT(); 1472 1473 switch (VT.SimpleTy) { 1474 default: 1475 return false; 1476 case MVT::i1: 1477 case MVT::i8: 1478 case MVT::i16: 1479 case MVT::i32: 1480 case MVT::i64: 1481 return emitICmp(VT, LHS, RHS, IsZExt); 1482 case MVT::f32: 1483 case MVT::f64: 1484 return emitFCmp(VT, LHS, RHS); 1485 } 1486 } 1487 1488 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1489 bool IsZExt) { 1490 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1491 IsZExt) != 0; 1492 } 1493 1494 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { 1495 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, 1496 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1497 } 1498 1499 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1500 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1501 return false; 1502 1503 // Check to see if the 2nd operand is a constant that we can encode directly 1504 // in the compare. 1505 bool UseImm = false; 1506 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1507 if (CFP->isZero() && !CFP->isNegative()) 1508 UseImm = true; 1509 1510 Register LHSReg = getRegForValue(LHS); 1511 if (!LHSReg) 1512 return false; 1513 1514 if (UseImm) { 1515 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1516 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1517 .addReg(LHSReg); 1518 return true; 1519 } 1520 1521 Register RHSReg = getRegForValue(RHS); 1522 if (!RHSReg) 1523 return false; 1524 1525 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1526 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 1527 .addReg(LHSReg) 1528 .addReg(RHSReg); 1529 return true; 1530 } 1531 1532 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1533 bool SetFlags, bool WantResult, bool IsZExt) { 1534 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1535 IsZExt); 1536 } 1537 1538 /// This method is a wrapper to simplify add emission. 1539 /// 1540 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1541 /// that fails, then try to materialize the immediate into a register and use 1542 /// emitAddSub_rr instead. 1543 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { 1544 unsigned ResultReg; 1545 if (Imm < 0) 1546 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm); 1547 else 1548 ResultReg = emitAddSub_ri(true, VT, Op0, Imm); 1549 1550 if (ResultReg) 1551 return ResultReg; 1552 1553 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1554 if (!CReg) 1555 return 0; 1556 1557 ResultReg = emitAddSub_rr(true, VT, Op0, CReg); 1558 return ResultReg; 1559 } 1560 1561 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1562 bool SetFlags, bool WantResult, bool IsZExt) { 1563 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1564 IsZExt); 1565 } 1566 1567 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1568 unsigned RHSReg, bool WantResult) { 1569 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, 1570 /*SetFlags=*/true, WantResult); 1571 } 1572 1573 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1574 unsigned RHSReg, 1575 AArch64_AM::ShiftExtendType ShiftType, 1576 uint64_t ShiftImm, bool WantResult) { 1577 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, 1578 ShiftImm, /*SetFlags=*/true, WantResult); 1579 } 1580 1581 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1582 const Value *LHS, const Value *RHS) { 1583 // Canonicalize immediates to the RHS first. 1584 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1585 std::swap(LHS, RHS); 1586 1587 // Canonicalize mul by power-of-2 to the RHS. 1588 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1589 if (isMulPowOf2(LHS)) 1590 std::swap(LHS, RHS); 1591 1592 // Canonicalize shift immediate to the RHS. 1593 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1594 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1595 if (isa<ConstantInt>(SI->getOperand(1))) 1596 std::swap(LHS, RHS); 1597 1598 Register LHSReg = getRegForValue(LHS); 1599 if (!LHSReg) 1600 return 0; 1601 1602 unsigned ResultReg = 0; 1603 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1604 uint64_t Imm = C->getZExtValue(); 1605 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); 1606 } 1607 if (ResultReg) 1608 return ResultReg; 1609 1610 // Check if the mul can be folded into the instruction. 1611 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1612 if (isMulPowOf2(RHS)) { 1613 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1614 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1615 1616 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1617 if (C->getValue().isPowerOf2()) 1618 std::swap(MulLHS, MulRHS); 1619 1620 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1621 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1622 1623 Register RHSReg = getRegForValue(MulLHS); 1624 if (!RHSReg) 1625 return 0; 1626 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1627 if (ResultReg) 1628 return ResultReg; 1629 } 1630 } 1631 1632 // Check if the shift can be folded into the instruction. 1633 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1634 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1635 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1636 uint64_t ShiftVal = C->getZExtValue(); 1637 Register RHSReg = getRegForValue(SI->getOperand(0)); 1638 if (!RHSReg) 1639 return 0; 1640 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1641 if (ResultReg) 1642 return ResultReg; 1643 } 1644 } 1645 1646 Register RHSReg = getRegForValue(RHS); 1647 if (!RHSReg) 1648 return 0; 1649 1650 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1651 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg); 1652 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1653 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1654 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1655 } 1656 return ResultReg; 1657 } 1658 1659 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1660 unsigned LHSReg, uint64_t Imm) { 1661 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1662 "ISD nodes are not consecutive!"); 1663 static const unsigned OpcTable[3][2] = { 1664 { AArch64::ANDWri, AArch64::ANDXri }, 1665 { AArch64::ORRWri, AArch64::ORRXri }, 1666 { AArch64::EORWri, AArch64::EORXri } 1667 }; 1668 const TargetRegisterClass *RC; 1669 unsigned Opc; 1670 unsigned RegSize; 1671 switch (RetVT.SimpleTy) { 1672 default: 1673 return 0; 1674 case MVT::i1: 1675 case MVT::i8: 1676 case MVT::i16: 1677 case MVT::i32: { 1678 unsigned Idx = ISDOpc - ISD::AND; 1679 Opc = OpcTable[Idx][0]; 1680 RC = &AArch64::GPR32spRegClass; 1681 RegSize = 32; 1682 break; 1683 } 1684 case MVT::i64: 1685 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1686 RC = &AArch64::GPR64spRegClass; 1687 RegSize = 64; 1688 break; 1689 } 1690 1691 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1692 return 0; 1693 1694 Register ResultReg = 1695 fastEmitInst_ri(Opc, RC, LHSReg, 1696 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1697 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1698 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1699 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1700 } 1701 return ResultReg; 1702 } 1703 1704 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1705 unsigned LHSReg, unsigned RHSReg, 1706 uint64_t ShiftImm) { 1707 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1708 "ISD nodes are not consecutive!"); 1709 static const unsigned OpcTable[3][2] = { 1710 { AArch64::ANDWrs, AArch64::ANDXrs }, 1711 { AArch64::ORRWrs, AArch64::ORRXrs }, 1712 { AArch64::EORWrs, AArch64::EORXrs } 1713 }; 1714 1715 // Don't deal with undefined shifts. 1716 if (ShiftImm >= RetVT.getSizeInBits()) 1717 return 0; 1718 1719 const TargetRegisterClass *RC; 1720 unsigned Opc; 1721 switch (RetVT.SimpleTy) { 1722 default: 1723 return 0; 1724 case MVT::i1: 1725 case MVT::i8: 1726 case MVT::i16: 1727 case MVT::i32: 1728 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1729 RC = &AArch64::GPR32RegClass; 1730 break; 1731 case MVT::i64: 1732 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1733 RC = &AArch64::GPR64RegClass; 1734 break; 1735 } 1736 Register ResultReg = 1737 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg, 1738 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1739 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1740 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1741 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1742 } 1743 return ResultReg; 1744 } 1745 1746 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, 1747 uint64_t Imm) { 1748 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm); 1749 } 1750 1751 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1752 bool WantZExt, MachineMemOperand *MMO) { 1753 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1754 return 0; 1755 1756 // Simplify this down to something we can handle. 1757 if (!simplifyAddress(Addr, VT)) 1758 return 0; 1759 1760 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1761 if (!ScaleFactor) 1762 llvm_unreachable("Unexpected value type."); 1763 1764 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1765 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1766 bool UseScaled = true; 1767 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1768 UseScaled = false; 1769 ScaleFactor = 1; 1770 } 1771 1772 static const unsigned GPOpcTable[2][8][4] = { 1773 // Sign-extend. 1774 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1775 AArch64::LDURXi }, 1776 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1777 AArch64::LDURXi }, 1778 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1779 AArch64::LDRXui }, 1780 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1781 AArch64::LDRXui }, 1782 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1783 AArch64::LDRXroX }, 1784 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1785 AArch64::LDRXroX }, 1786 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1787 AArch64::LDRXroW }, 1788 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1789 AArch64::LDRXroW } 1790 }, 1791 // Zero-extend. 1792 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1793 AArch64::LDURXi }, 1794 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1795 AArch64::LDURXi }, 1796 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1797 AArch64::LDRXui }, 1798 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1799 AArch64::LDRXui }, 1800 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1801 AArch64::LDRXroX }, 1802 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1803 AArch64::LDRXroX }, 1804 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1805 AArch64::LDRXroW }, 1806 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1807 AArch64::LDRXroW } 1808 } 1809 }; 1810 1811 static const unsigned FPOpcTable[4][2] = { 1812 { AArch64::LDURSi, AArch64::LDURDi }, 1813 { AArch64::LDRSui, AArch64::LDRDui }, 1814 { AArch64::LDRSroX, AArch64::LDRDroX }, 1815 { AArch64::LDRSroW, AArch64::LDRDroW } 1816 }; 1817 1818 unsigned Opc; 1819 const TargetRegisterClass *RC; 1820 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1821 Addr.getOffsetReg(); 1822 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1823 if (Addr.getExtendType() == AArch64_AM::UXTW || 1824 Addr.getExtendType() == AArch64_AM::SXTW) 1825 Idx++; 1826 1827 bool IsRet64Bit = RetVT == MVT::i64; 1828 switch (VT.SimpleTy) { 1829 default: 1830 llvm_unreachable("Unexpected value type."); 1831 case MVT::i1: // Intentional fall-through. 1832 case MVT::i8: 1833 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1834 RC = (IsRet64Bit && !WantZExt) ? 1835 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1836 break; 1837 case MVT::i16: 1838 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1839 RC = (IsRet64Bit && !WantZExt) ? 1840 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1841 break; 1842 case MVT::i32: 1843 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1844 RC = (IsRet64Bit && !WantZExt) ? 1845 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1846 break; 1847 case MVT::i64: 1848 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1849 RC = &AArch64::GPR64RegClass; 1850 break; 1851 case MVT::f32: 1852 Opc = FPOpcTable[Idx][0]; 1853 RC = &AArch64::FPR32RegClass; 1854 break; 1855 case MVT::f64: 1856 Opc = FPOpcTable[Idx][1]; 1857 RC = &AArch64::FPR64RegClass; 1858 break; 1859 } 1860 1861 // Create the base instruction, then add the operands. 1862 Register ResultReg = createResultReg(RC); 1863 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1864 TII.get(Opc), ResultReg); 1865 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1866 1867 // Loading an i1 requires special handling. 1868 if (VT == MVT::i1) { 1869 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); 1870 assert(ANDReg && "Unexpected AND instruction emission failure."); 1871 ResultReg = ANDReg; 1872 } 1873 1874 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1875 // the 32bit reg to a 64bit reg. 1876 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1877 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 1878 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 1879 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1880 .addImm(0) 1881 .addReg(ResultReg, getKillRegState(true)) 1882 .addImm(AArch64::sub_32); 1883 ResultReg = Reg64; 1884 } 1885 return ResultReg; 1886 } 1887 1888 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1889 MVT VT; 1890 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1891 return false; 1892 1893 if (VT.isVector()) 1894 return selectOperator(I, I->getOpcode()); 1895 1896 unsigned ResultReg; 1897 switch (I->getOpcode()) { 1898 default: 1899 llvm_unreachable("Unexpected instruction."); 1900 case Instruction::Add: 1901 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1902 break; 1903 case Instruction::Sub: 1904 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1905 break; 1906 } 1907 if (!ResultReg) 1908 return false; 1909 1910 updateValueMap(I, ResultReg); 1911 return true; 1912 } 1913 1914 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1915 MVT VT; 1916 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1917 return false; 1918 1919 if (VT.isVector()) 1920 return selectOperator(I, I->getOpcode()); 1921 1922 unsigned ResultReg; 1923 switch (I->getOpcode()) { 1924 default: 1925 llvm_unreachable("Unexpected instruction."); 1926 case Instruction::And: 1927 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1928 break; 1929 case Instruction::Or: 1930 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1931 break; 1932 case Instruction::Xor: 1933 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1934 break; 1935 } 1936 if (!ResultReg) 1937 return false; 1938 1939 updateValueMap(I, ResultReg); 1940 return true; 1941 } 1942 1943 bool AArch64FastISel::selectLoad(const Instruction *I) { 1944 MVT VT; 1945 // Verify we have a legal type before going any further. Currently, we handle 1946 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1947 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1948 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1949 cast<LoadInst>(I)->isAtomic()) 1950 return false; 1951 1952 const Value *SV = I->getOperand(0); 1953 if (TLI.supportSwiftError()) { 1954 // Swifterror values can come from either a function parameter with 1955 // swifterror attribute or an alloca with swifterror attribute. 1956 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1957 if (Arg->hasSwiftErrorAttr()) 1958 return false; 1959 } 1960 1961 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1962 if (Alloca->isSwiftError()) 1963 return false; 1964 } 1965 } 1966 1967 // See if we can handle this address. 1968 Address Addr; 1969 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1970 return false; 1971 1972 // Fold the following sign-/zero-extend into the load instruction. 1973 bool WantZExt = true; 1974 MVT RetVT = VT; 1975 const Value *IntExtVal = nullptr; 1976 if (I->hasOneUse()) { 1977 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1978 if (isTypeSupported(ZE->getType(), RetVT)) 1979 IntExtVal = ZE; 1980 else 1981 RetVT = VT; 1982 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1983 if (isTypeSupported(SE->getType(), RetVT)) 1984 IntExtVal = SE; 1985 else 1986 RetVT = VT; 1987 WantZExt = false; 1988 } 1989 } 1990 1991 unsigned ResultReg = 1992 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1993 if (!ResultReg) 1994 return false; 1995 1996 // There are a few different cases we have to handle, because the load or the 1997 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1998 // SelectionDAG. There is also an ordering issue when both instructions are in 1999 // different basic blocks. 2000 // 1.) The load instruction is selected by FastISel, but the integer extend 2001 // not. This usually happens when the integer extend is in a different 2002 // basic block and SelectionDAG took over for that basic block. 2003 // 2.) The load instruction is selected before the integer extend. This only 2004 // happens when the integer extend is in a different basic block. 2005 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2006 // by FastISel. This happens if there are instructions between the load 2007 // and the integer extend that couldn't be selected by FastISel. 2008 if (IntExtVal) { 2009 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2010 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2011 // it when it selects the integer extend. 2012 Register Reg = lookUpRegForValue(IntExtVal); 2013 auto *MI = MRI.getUniqueVRegDef(Reg); 2014 if (!MI) { 2015 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2016 if (WantZExt) { 2017 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2018 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2019 ResultReg = std::prev(I)->getOperand(0).getReg(); 2020 removeDeadCode(I, std::next(I)); 2021 } else 2022 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2023 AArch64::sub_32); 2024 } 2025 updateValueMap(I, ResultReg); 2026 return true; 2027 } 2028 2029 // The integer extend has already been emitted - delete all the instructions 2030 // that have been emitted by the integer extend lowering code and use the 2031 // result from the load instruction directly. 2032 while (MI) { 2033 Reg = 0; 2034 for (auto &Opnd : MI->uses()) { 2035 if (Opnd.isReg()) { 2036 Reg = Opnd.getReg(); 2037 break; 2038 } 2039 } 2040 MachineBasicBlock::iterator I(MI); 2041 removeDeadCode(I, std::next(I)); 2042 MI = nullptr; 2043 if (Reg) 2044 MI = MRI.getUniqueVRegDef(Reg); 2045 } 2046 updateValueMap(IntExtVal, ResultReg); 2047 return true; 2048 } 2049 2050 updateValueMap(I, ResultReg); 2051 return true; 2052 } 2053 2054 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2055 unsigned AddrReg, 2056 MachineMemOperand *MMO) { 2057 unsigned Opc; 2058 switch (VT.SimpleTy) { 2059 default: return false; 2060 case MVT::i8: Opc = AArch64::STLRB; break; 2061 case MVT::i16: Opc = AArch64::STLRH; break; 2062 case MVT::i32: Opc = AArch64::STLRW; break; 2063 case MVT::i64: Opc = AArch64::STLRX; break; 2064 } 2065 2066 const MCInstrDesc &II = TII.get(Opc); 2067 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2068 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2069 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2070 .addReg(SrcReg) 2071 .addReg(AddrReg) 2072 .addMemOperand(MMO); 2073 return true; 2074 } 2075 2076 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2077 MachineMemOperand *MMO) { 2078 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2079 return false; 2080 2081 // Simplify this down to something we can handle. 2082 if (!simplifyAddress(Addr, VT)) 2083 return false; 2084 2085 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2086 if (!ScaleFactor) 2087 llvm_unreachable("Unexpected value type."); 2088 2089 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2090 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2091 bool UseScaled = true; 2092 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2093 UseScaled = false; 2094 ScaleFactor = 1; 2095 } 2096 2097 static const unsigned OpcTable[4][6] = { 2098 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2099 AArch64::STURSi, AArch64::STURDi }, 2100 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2101 AArch64::STRSui, AArch64::STRDui }, 2102 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2103 AArch64::STRSroX, AArch64::STRDroX }, 2104 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2105 AArch64::STRSroW, AArch64::STRDroW } 2106 }; 2107 2108 unsigned Opc; 2109 bool VTIsi1 = false; 2110 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2111 Addr.getOffsetReg(); 2112 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2113 if (Addr.getExtendType() == AArch64_AM::UXTW || 2114 Addr.getExtendType() == AArch64_AM::SXTW) 2115 Idx++; 2116 2117 switch (VT.SimpleTy) { 2118 default: llvm_unreachable("Unexpected value type."); 2119 case MVT::i1: VTIsi1 = true; [[fallthrough]]; 2120 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2121 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2122 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2123 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2124 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2125 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2126 } 2127 2128 // Storing an i1 requires special handling. 2129 if (VTIsi1 && SrcReg != AArch64::WZR) { 2130 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); 2131 assert(ANDReg && "Unexpected AND instruction emission failure."); 2132 SrcReg = ANDReg; 2133 } 2134 // Create the base instruction, then add the operands. 2135 const MCInstrDesc &II = TII.get(Opc); 2136 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2137 MachineInstrBuilder MIB = 2138 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(SrcReg); 2139 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2140 2141 return true; 2142 } 2143 2144 bool AArch64FastISel::selectStore(const Instruction *I) { 2145 MVT VT; 2146 const Value *Op0 = I->getOperand(0); 2147 // Verify we have a legal type before going any further. Currently, we handle 2148 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2149 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2150 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2151 return false; 2152 2153 const Value *PtrV = I->getOperand(1); 2154 if (TLI.supportSwiftError()) { 2155 // Swifterror values can come from either a function parameter with 2156 // swifterror attribute or an alloca with swifterror attribute. 2157 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2158 if (Arg->hasSwiftErrorAttr()) 2159 return false; 2160 } 2161 2162 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2163 if (Alloca->isSwiftError()) 2164 return false; 2165 } 2166 } 2167 2168 // Get the value to be stored into a register. Use the zero register directly 2169 // when possible to avoid an unnecessary copy and a wasted register. 2170 unsigned SrcReg = 0; 2171 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2172 if (CI->isZero()) 2173 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2174 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2175 if (CF->isZero() && !CF->isNegative()) { 2176 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2177 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2178 } 2179 } 2180 2181 if (!SrcReg) 2182 SrcReg = getRegForValue(Op0); 2183 2184 if (!SrcReg) 2185 return false; 2186 2187 auto *SI = cast<StoreInst>(I); 2188 2189 // Try to emit a STLR for seq_cst/release. 2190 if (SI->isAtomic()) { 2191 AtomicOrdering Ord = SI->getOrdering(); 2192 // The non-atomic instructions are sufficient for relaxed stores. 2193 if (isReleaseOrStronger(Ord)) { 2194 // The STLR addressing mode only supports a base reg; pass that directly. 2195 Register AddrReg = getRegForValue(PtrV); 2196 return emitStoreRelease(VT, SrcReg, AddrReg, 2197 createMachineMemOperandFor(I)); 2198 } 2199 } 2200 2201 // See if we can handle this address. 2202 Address Addr; 2203 if (!computeAddress(PtrV, Addr, Op0->getType())) 2204 return false; 2205 2206 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2207 return false; 2208 return true; 2209 } 2210 2211 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2212 switch (Pred) { 2213 case CmpInst::FCMP_ONE: 2214 case CmpInst::FCMP_UEQ: 2215 default: 2216 // AL is our "false" for now. The other two need more compares. 2217 return AArch64CC::AL; 2218 case CmpInst::ICMP_EQ: 2219 case CmpInst::FCMP_OEQ: 2220 return AArch64CC::EQ; 2221 case CmpInst::ICMP_SGT: 2222 case CmpInst::FCMP_OGT: 2223 return AArch64CC::GT; 2224 case CmpInst::ICMP_SGE: 2225 case CmpInst::FCMP_OGE: 2226 return AArch64CC::GE; 2227 case CmpInst::ICMP_UGT: 2228 case CmpInst::FCMP_UGT: 2229 return AArch64CC::HI; 2230 case CmpInst::FCMP_OLT: 2231 return AArch64CC::MI; 2232 case CmpInst::ICMP_ULE: 2233 case CmpInst::FCMP_OLE: 2234 return AArch64CC::LS; 2235 case CmpInst::FCMP_ORD: 2236 return AArch64CC::VC; 2237 case CmpInst::FCMP_UNO: 2238 return AArch64CC::VS; 2239 case CmpInst::FCMP_UGE: 2240 return AArch64CC::PL; 2241 case CmpInst::ICMP_SLT: 2242 case CmpInst::FCMP_ULT: 2243 return AArch64CC::LT; 2244 case CmpInst::ICMP_SLE: 2245 case CmpInst::FCMP_ULE: 2246 return AArch64CC::LE; 2247 case CmpInst::FCMP_UNE: 2248 case CmpInst::ICMP_NE: 2249 return AArch64CC::NE; 2250 case CmpInst::ICMP_UGE: 2251 return AArch64CC::HS; 2252 case CmpInst::ICMP_ULT: 2253 return AArch64CC::LO; 2254 } 2255 } 2256 2257 /// Try to emit a combined compare-and-branch instruction. 2258 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2259 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2260 // will not be produced, as they are conditional branch instructions that do 2261 // not set flags. 2262 if (FuncInfo.MF->getFunction().hasFnAttribute( 2263 Attribute::SpeculativeLoadHardening)) 2264 return false; 2265 2266 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2267 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2268 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2269 2270 const Value *LHS = CI->getOperand(0); 2271 const Value *RHS = CI->getOperand(1); 2272 2273 MVT VT; 2274 if (!isTypeSupported(LHS->getType(), VT)) 2275 return false; 2276 2277 unsigned BW = VT.getSizeInBits(); 2278 if (BW > 64) 2279 return false; 2280 2281 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2282 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2283 2284 // Try to take advantage of fallthrough opportunities. 2285 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2286 std::swap(TBB, FBB); 2287 Predicate = CmpInst::getInversePredicate(Predicate); 2288 } 2289 2290 int TestBit = -1; 2291 bool IsCmpNE; 2292 switch (Predicate) { 2293 default: 2294 return false; 2295 case CmpInst::ICMP_EQ: 2296 case CmpInst::ICMP_NE: 2297 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2298 std::swap(LHS, RHS); 2299 2300 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2301 return false; 2302 2303 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2304 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2305 const Value *AndLHS = AI->getOperand(0); 2306 const Value *AndRHS = AI->getOperand(1); 2307 2308 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2309 if (C->getValue().isPowerOf2()) 2310 std::swap(AndLHS, AndRHS); 2311 2312 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2313 if (C->getValue().isPowerOf2()) { 2314 TestBit = C->getValue().logBase2(); 2315 LHS = AndLHS; 2316 } 2317 } 2318 2319 if (VT == MVT::i1) 2320 TestBit = 0; 2321 2322 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2323 break; 2324 case CmpInst::ICMP_SLT: 2325 case CmpInst::ICMP_SGE: 2326 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2327 return false; 2328 2329 TestBit = BW - 1; 2330 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2331 break; 2332 case CmpInst::ICMP_SGT: 2333 case CmpInst::ICMP_SLE: 2334 if (!isa<ConstantInt>(RHS)) 2335 return false; 2336 2337 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2338 return false; 2339 2340 TestBit = BW - 1; 2341 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2342 break; 2343 } // end switch 2344 2345 static const unsigned OpcTable[2][2][2] = { 2346 { {AArch64::CBZW, AArch64::CBZX }, 2347 {AArch64::CBNZW, AArch64::CBNZX} }, 2348 { {AArch64::TBZW, AArch64::TBZX }, 2349 {AArch64::TBNZW, AArch64::TBNZX} } 2350 }; 2351 2352 bool IsBitTest = TestBit != -1; 2353 bool Is64Bit = BW == 64; 2354 if (TestBit < 32 && TestBit >= 0) 2355 Is64Bit = false; 2356 2357 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2358 const MCInstrDesc &II = TII.get(Opc); 2359 2360 Register SrcReg = getRegForValue(LHS); 2361 if (!SrcReg) 2362 return false; 2363 2364 if (BW == 64 && !Is64Bit) 2365 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); 2366 2367 if ((BW < 32) && !IsBitTest) 2368 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2369 2370 // Emit the combined compare and branch instruction. 2371 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2372 MachineInstrBuilder MIB = 2373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc)) 2374 .addReg(SrcReg); 2375 if (IsBitTest) 2376 MIB.addImm(TestBit); 2377 MIB.addMBB(TBB); 2378 2379 finishCondBranch(BI->getParent(), TBB, FBB); 2380 return true; 2381 } 2382 2383 bool AArch64FastISel::selectBranch(const Instruction *I) { 2384 const BranchInst *BI = cast<BranchInst>(I); 2385 if (BI->isUnconditional()) { 2386 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2387 fastEmitBranch(MSucc, BI->getDebugLoc()); 2388 return true; 2389 } 2390 2391 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2392 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2393 2394 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2395 if (CI->hasOneUse() && isValueAvailable(CI)) { 2396 // Try to optimize or fold the cmp. 2397 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2398 switch (Predicate) { 2399 default: 2400 break; 2401 case CmpInst::FCMP_FALSE: 2402 fastEmitBranch(FBB, MIMD.getDL()); 2403 return true; 2404 case CmpInst::FCMP_TRUE: 2405 fastEmitBranch(TBB, MIMD.getDL()); 2406 return true; 2407 } 2408 2409 // Try to emit a combined compare-and-branch first. 2410 if (emitCompareAndBranch(BI)) 2411 return true; 2412 2413 // Try to take advantage of fallthrough opportunities. 2414 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2415 std::swap(TBB, FBB); 2416 Predicate = CmpInst::getInversePredicate(Predicate); 2417 } 2418 2419 // Emit the cmp. 2420 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2421 return false; 2422 2423 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2424 // instruction. 2425 AArch64CC::CondCode CC = getCompareCC(Predicate); 2426 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2427 switch (Predicate) { 2428 default: 2429 break; 2430 case CmpInst::FCMP_UEQ: 2431 ExtraCC = AArch64CC::EQ; 2432 CC = AArch64CC::VS; 2433 break; 2434 case CmpInst::FCMP_ONE: 2435 ExtraCC = AArch64CC::MI; 2436 CC = AArch64CC::GT; 2437 break; 2438 } 2439 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2440 2441 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2442 if (ExtraCC != AArch64CC::AL) { 2443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2444 .addImm(ExtraCC) 2445 .addMBB(TBB); 2446 } 2447 2448 // Emit the branch. 2449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2450 .addImm(CC) 2451 .addMBB(TBB); 2452 2453 finishCondBranch(BI->getParent(), TBB, FBB); 2454 return true; 2455 } 2456 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2457 uint64_t Imm = CI->getZExtValue(); 2458 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::B)) 2460 .addMBB(Target); 2461 2462 // Obtain the branch probability and add the target to the successor list. 2463 if (FuncInfo.BPI) { 2464 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2465 BI->getParent(), Target->getBasicBlock()); 2466 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2467 } else 2468 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2469 return true; 2470 } else { 2471 AArch64CC::CondCode CC = AArch64CC::NE; 2472 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2473 // Fake request the condition, otherwise the intrinsic might be completely 2474 // optimized away. 2475 Register CondReg = getRegForValue(BI->getCondition()); 2476 if (!CondReg) 2477 return false; 2478 2479 // Emit the branch. 2480 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::Bcc)) 2481 .addImm(CC) 2482 .addMBB(TBB); 2483 2484 finishCondBranch(BI->getParent(), TBB, FBB); 2485 return true; 2486 } 2487 } 2488 2489 Register CondReg = getRegForValue(BI->getCondition()); 2490 if (CondReg == 0) 2491 return false; 2492 2493 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2494 unsigned Opcode = AArch64::TBNZW; 2495 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2496 std::swap(TBB, FBB); 2497 Opcode = AArch64::TBZW; 2498 } 2499 2500 const MCInstrDesc &II = TII.get(Opcode); 2501 Register ConstrainedCondReg 2502 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2503 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 2504 .addReg(ConstrainedCondReg) 2505 .addImm(0) 2506 .addMBB(TBB); 2507 2508 finishCondBranch(BI->getParent(), TBB, FBB); 2509 return true; 2510 } 2511 2512 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2513 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2514 Register AddrReg = getRegForValue(BI->getOperand(0)); 2515 if (AddrReg == 0) 2516 return false; 2517 2518 // Emit the indirect branch. 2519 const MCInstrDesc &II = TII.get(AArch64::BR); 2520 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2521 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(AddrReg); 2522 2523 // Make sure the CFG is up-to-date. 2524 for (const auto *Succ : BI->successors()) 2525 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2526 2527 return true; 2528 } 2529 2530 bool AArch64FastISel::selectCmp(const Instruction *I) { 2531 const CmpInst *CI = cast<CmpInst>(I); 2532 2533 // Vectors of i1 are weird: bail out. 2534 if (CI->getType()->isVectorTy()) 2535 return false; 2536 2537 // Try to optimize or fold the cmp. 2538 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2539 unsigned ResultReg = 0; 2540 switch (Predicate) { 2541 default: 2542 break; 2543 case CmpInst::FCMP_FALSE: 2544 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 2546 TII.get(TargetOpcode::COPY), ResultReg) 2547 .addReg(AArch64::WZR, getKillRegState(true)); 2548 break; 2549 case CmpInst::FCMP_TRUE: 2550 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2551 break; 2552 } 2553 2554 if (ResultReg) { 2555 updateValueMap(I, ResultReg); 2556 return true; 2557 } 2558 2559 // Emit the cmp. 2560 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2561 return false; 2562 2563 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2564 2565 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2566 // condition codes are inverted, because they are used by CSINC. 2567 static unsigned CondCodeTable[2][2] = { 2568 { AArch64CC::NE, AArch64CC::VC }, 2569 { AArch64CC::PL, AArch64CC::LE } 2570 }; 2571 unsigned *CondCodes = nullptr; 2572 switch (Predicate) { 2573 default: 2574 break; 2575 case CmpInst::FCMP_UEQ: 2576 CondCodes = &CondCodeTable[0][0]; 2577 break; 2578 case CmpInst::FCMP_ONE: 2579 CondCodes = &CondCodeTable[1][0]; 2580 break; 2581 } 2582 2583 if (CondCodes) { 2584 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2585 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2586 TmpReg1) 2587 .addReg(AArch64::WZR, getKillRegState(true)) 2588 .addReg(AArch64::WZR, getKillRegState(true)) 2589 .addImm(CondCodes[0]); 2590 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2591 ResultReg) 2592 .addReg(TmpReg1, getKillRegState(true)) 2593 .addReg(AArch64::WZR, getKillRegState(true)) 2594 .addImm(CondCodes[1]); 2595 2596 updateValueMap(I, ResultReg); 2597 return true; 2598 } 2599 2600 // Now set a register based on the comparison. 2601 AArch64CC::CondCode CC = getCompareCC(Predicate); 2602 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2603 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr), 2605 ResultReg) 2606 .addReg(AArch64::WZR, getKillRegState(true)) 2607 .addReg(AArch64::WZR, getKillRegState(true)) 2608 .addImm(invertedCC); 2609 2610 updateValueMap(I, ResultReg); 2611 return true; 2612 } 2613 2614 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2615 /// value. 2616 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2617 if (!SI->getType()->isIntegerTy(1)) 2618 return false; 2619 2620 const Value *Src1Val, *Src2Val; 2621 unsigned Opc = 0; 2622 bool NeedExtraOp = false; 2623 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2624 if (CI->isOne()) { 2625 Src1Val = SI->getCondition(); 2626 Src2Val = SI->getFalseValue(); 2627 Opc = AArch64::ORRWrr; 2628 } else { 2629 assert(CI->isZero()); 2630 Src1Val = SI->getFalseValue(); 2631 Src2Val = SI->getCondition(); 2632 Opc = AArch64::BICWrr; 2633 } 2634 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2635 if (CI->isOne()) { 2636 Src1Val = SI->getCondition(); 2637 Src2Val = SI->getTrueValue(); 2638 Opc = AArch64::ORRWrr; 2639 NeedExtraOp = true; 2640 } else { 2641 assert(CI->isZero()); 2642 Src1Val = SI->getCondition(); 2643 Src2Val = SI->getTrueValue(); 2644 Opc = AArch64::ANDWrr; 2645 } 2646 } 2647 2648 if (!Opc) 2649 return false; 2650 2651 Register Src1Reg = getRegForValue(Src1Val); 2652 if (!Src1Reg) 2653 return false; 2654 2655 Register Src2Reg = getRegForValue(Src2Val); 2656 if (!Src2Reg) 2657 return false; 2658 2659 if (NeedExtraOp) 2660 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1); 2661 2662 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2663 Src2Reg); 2664 updateValueMap(SI, ResultReg); 2665 return true; 2666 } 2667 2668 bool AArch64FastISel::selectSelect(const Instruction *I) { 2669 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2670 MVT VT; 2671 if (!isTypeSupported(I->getType(), VT)) 2672 return false; 2673 2674 unsigned Opc; 2675 const TargetRegisterClass *RC; 2676 switch (VT.SimpleTy) { 2677 default: 2678 return false; 2679 case MVT::i1: 2680 case MVT::i8: 2681 case MVT::i16: 2682 case MVT::i32: 2683 Opc = AArch64::CSELWr; 2684 RC = &AArch64::GPR32RegClass; 2685 break; 2686 case MVT::i64: 2687 Opc = AArch64::CSELXr; 2688 RC = &AArch64::GPR64RegClass; 2689 break; 2690 case MVT::f32: 2691 Opc = AArch64::FCSELSrrr; 2692 RC = &AArch64::FPR32RegClass; 2693 break; 2694 case MVT::f64: 2695 Opc = AArch64::FCSELDrrr; 2696 RC = &AArch64::FPR64RegClass; 2697 break; 2698 } 2699 2700 const SelectInst *SI = cast<SelectInst>(I); 2701 const Value *Cond = SI->getCondition(); 2702 AArch64CC::CondCode CC = AArch64CC::NE; 2703 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2704 2705 if (optimizeSelect(SI)) 2706 return true; 2707 2708 // Try to pickup the flags, so we don't have to emit another compare. 2709 if (foldXALUIntrinsic(CC, I, Cond)) { 2710 // Fake request the condition to force emission of the XALU intrinsic. 2711 Register CondReg = getRegForValue(Cond); 2712 if (!CondReg) 2713 return false; 2714 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2715 isValueAvailable(Cond)) { 2716 const auto *Cmp = cast<CmpInst>(Cond); 2717 // Try to optimize or fold the cmp. 2718 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2719 const Value *FoldSelect = nullptr; 2720 switch (Predicate) { 2721 default: 2722 break; 2723 case CmpInst::FCMP_FALSE: 2724 FoldSelect = SI->getFalseValue(); 2725 break; 2726 case CmpInst::FCMP_TRUE: 2727 FoldSelect = SI->getTrueValue(); 2728 break; 2729 } 2730 2731 if (FoldSelect) { 2732 Register SrcReg = getRegForValue(FoldSelect); 2733 if (!SrcReg) 2734 return false; 2735 2736 updateValueMap(I, SrcReg); 2737 return true; 2738 } 2739 2740 // Emit the cmp. 2741 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2742 return false; 2743 2744 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2745 CC = getCompareCC(Predicate); 2746 switch (Predicate) { 2747 default: 2748 break; 2749 case CmpInst::FCMP_UEQ: 2750 ExtraCC = AArch64CC::EQ; 2751 CC = AArch64CC::VS; 2752 break; 2753 case CmpInst::FCMP_ONE: 2754 ExtraCC = AArch64CC::MI; 2755 CC = AArch64CC::GT; 2756 break; 2757 } 2758 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2759 } else { 2760 Register CondReg = getRegForValue(Cond); 2761 if (!CondReg) 2762 return false; 2763 2764 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2765 CondReg = constrainOperandRegClass(II, CondReg, 1); 2766 2767 // Emit a TST instruction (ANDS wzr, reg, #imm). 2768 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II, 2769 AArch64::WZR) 2770 .addReg(CondReg) 2771 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2772 } 2773 2774 Register Src1Reg = getRegForValue(SI->getTrueValue()); 2775 Register Src2Reg = getRegForValue(SI->getFalseValue()); 2776 2777 if (!Src1Reg || !Src2Reg) 2778 return false; 2779 2780 if (ExtraCC != AArch64CC::AL) 2781 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC); 2782 2783 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC); 2784 updateValueMap(I, ResultReg); 2785 return true; 2786 } 2787 2788 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2789 Value *V = I->getOperand(0); 2790 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2791 return false; 2792 2793 Register Op = getRegForValue(V); 2794 if (Op == 0) 2795 return false; 2796 2797 Register ResultReg = createResultReg(&AArch64::FPR64RegClass); 2798 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTDSr), 2799 ResultReg).addReg(Op); 2800 updateValueMap(I, ResultReg); 2801 return true; 2802 } 2803 2804 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2805 Value *V = I->getOperand(0); 2806 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2807 return false; 2808 2809 Register Op = getRegForValue(V); 2810 if (Op == 0) 2811 return false; 2812 2813 Register ResultReg = createResultReg(&AArch64::FPR32RegClass); 2814 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::FCVTSDr), 2815 ResultReg).addReg(Op); 2816 updateValueMap(I, ResultReg); 2817 return true; 2818 } 2819 2820 // FPToUI and FPToSI 2821 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2822 MVT DestVT; 2823 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2824 return false; 2825 2826 Register SrcReg = getRegForValue(I->getOperand(0)); 2827 if (SrcReg == 0) 2828 return false; 2829 2830 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2831 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2832 return false; 2833 2834 unsigned Opc; 2835 if (SrcVT == MVT::f64) { 2836 if (Signed) 2837 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2838 else 2839 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2840 } else { 2841 if (Signed) 2842 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2843 else 2844 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2845 } 2846 Register ResultReg = createResultReg( 2847 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2848 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 2849 .addReg(SrcReg); 2850 updateValueMap(I, ResultReg); 2851 return true; 2852 } 2853 2854 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2855 MVT DestVT; 2856 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2857 return false; 2858 // Let regular ISEL handle FP16 2859 if (DestVT == MVT::f16) 2860 return false; 2861 2862 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2863 "Unexpected value type."); 2864 2865 Register SrcReg = getRegForValue(I->getOperand(0)); 2866 if (!SrcReg) 2867 return false; 2868 2869 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2870 2871 // Handle sign-extension. 2872 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2873 SrcReg = 2874 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2875 if (!SrcReg) 2876 return false; 2877 } 2878 2879 unsigned Opc; 2880 if (SrcVT == MVT::i64) { 2881 if (Signed) 2882 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2883 else 2884 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2885 } else { 2886 if (Signed) 2887 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2888 else 2889 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2890 } 2891 2892 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg); 2893 updateValueMap(I, ResultReg); 2894 return true; 2895 } 2896 2897 bool AArch64FastISel::fastLowerArguments() { 2898 if (!FuncInfo.CanLowerReturn) 2899 return false; 2900 2901 const Function *F = FuncInfo.Fn; 2902 if (F->isVarArg()) 2903 return false; 2904 2905 CallingConv::ID CC = F->getCallingConv(); 2906 if (CC != CallingConv::C && CC != CallingConv::Swift) 2907 return false; 2908 2909 if (Subtarget->hasCustomCallingConv()) 2910 return false; 2911 2912 // Only handle simple cases of up to 8 GPR and FPR each. 2913 unsigned GPRCnt = 0; 2914 unsigned FPRCnt = 0; 2915 for (auto const &Arg : F->args()) { 2916 if (Arg.hasAttribute(Attribute::ByVal) || 2917 Arg.hasAttribute(Attribute::InReg) || 2918 Arg.hasAttribute(Attribute::StructRet) || 2919 Arg.hasAttribute(Attribute::SwiftSelf) || 2920 Arg.hasAttribute(Attribute::SwiftAsync) || 2921 Arg.hasAttribute(Attribute::SwiftError) || 2922 Arg.hasAttribute(Attribute::Nest)) 2923 return false; 2924 2925 Type *ArgTy = Arg.getType(); 2926 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2927 return false; 2928 2929 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2930 if (!ArgVT.isSimple()) 2931 return false; 2932 2933 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2934 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2935 return false; 2936 2937 if (VT.isVector() && 2938 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2939 return false; 2940 2941 if (VT >= MVT::i1 && VT <= MVT::i64) 2942 ++GPRCnt; 2943 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2944 VT.is128BitVector()) 2945 ++FPRCnt; 2946 else 2947 return false; 2948 2949 if (GPRCnt > 8 || FPRCnt > 8) 2950 return false; 2951 } 2952 2953 static const MCPhysReg Registers[6][8] = { 2954 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2955 AArch64::W5, AArch64::W6, AArch64::W7 }, 2956 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2957 AArch64::X5, AArch64::X6, AArch64::X7 }, 2958 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2959 AArch64::H5, AArch64::H6, AArch64::H7 }, 2960 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2961 AArch64::S5, AArch64::S6, AArch64::S7 }, 2962 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2963 AArch64::D5, AArch64::D6, AArch64::D7 }, 2964 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2965 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2966 }; 2967 2968 unsigned GPRIdx = 0; 2969 unsigned FPRIdx = 0; 2970 for (auto const &Arg : F->args()) { 2971 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2972 unsigned SrcReg; 2973 const TargetRegisterClass *RC; 2974 if (VT >= MVT::i1 && VT <= MVT::i32) { 2975 SrcReg = Registers[0][GPRIdx++]; 2976 RC = &AArch64::GPR32RegClass; 2977 VT = MVT::i32; 2978 } else if (VT == MVT::i64) { 2979 SrcReg = Registers[1][GPRIdx++]; 2980 RC = &AArch64::GPR64RegClass; 2981 } else if (VT == MVT::f16) { 2982 SrcReg = Registers[2][FPRIdx++]; 2983 RC = &AArch64::FPR16RegClass; 2984 } else if (VT == MVT::f32) { 2985 SrcReg = Registers[3][FPRIdx++]; 2986 RC = &AArch64::FPR32RegClass; 2987 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2988 SrcReg = Registers[4][FPRIdx++]; 2989 RC = &AArch64::FPR64RegClass; 2990 } else if (VT.is128BitVector()) { 2991 SrcReg = Registers[5][FPRIdx++]; 2992 RC = &AArch64::FPR128RegClass; 2993 } else 2994 llvm_unreachable("Unexpected value type."); 2995 2996 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2997 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2998 // Without this, EmitLiveInCopies may eliminate the livein if its only 2999 // use is a bitcast (which isn't turned into an instruction). 3000 Register ResultReg = createResultReg(RC); 3001 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3002 TII.get(TargetOpcode::COPY), ResultReg) 3003 .addReg(DstReg, getKillRegState(true)); 3004 updateValueMap(&Arg, ResultReg); 3005 } 3006 return true; 3007 } 3008 3009 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3010 SmallVectorImpl<MVT> &OutVTs, 3011 unsigned &NumBytes) { 3012 CallingConv::ID CC = CLI.CallConv; 3013 SmallVector<CCValAssign, 16> ArgLocs; 3014 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3015 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3016 3017 // Get a count of how many bytes are to be pushed on the stack. 3018 NumBytes = CCInfo.getStackSize(); 3019 3020 // Issue CALLSEQ_START 3021 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3022 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackDown)) 3023 .addImm(NumBytes).addImm(0); 3024 3025 // Process the args. 3026 for (CCValAssign &VA : ArgLocs) { 3027 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3028 MVT ArgVT = OutVTs[VA.getValNo()]; 3029 3030 Register ArgReg = getRegForValue(ArgVal); 3031 if (!ArgReg) 3032 return false; 3033 3034 // Handle arg promotion: SExt, ZExt, AExt. 3035 switch (VA.getLocInfo()) { 3036 case CCValAssign::Full: 3037 break; 3038 case CCValAssign::SExt: { 3039 MVT DestVT = VA.getLocVT(); 3040 MVT SrcVT = ArgVT; 3041 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3042 if (!ArgReg) 3043 return false; 3044 break; 3045 } 3046 case CCValAssign::AExt: 3047 // Intentional fall-through. 3048 case CCValAssign::ZExt: { 3049 MVT DestVT = VA.getLocVT(); 3050 MVT SrcVT = ArgVT; 3051 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3052 if (!ArgReg) 3053 return false; 3054 break; 3055 } 3056 default: 3057 llvm_unreachable("Unknown arg promotion!"); 3058 } 3059 3060 // Now copy/store arg to correct locations. 3061 if (VA.isRegLoc() && !VA.needsCustom()) { 3062 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3063 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3064 CLI.OutRegs.push_back(VA.getLocReg()); 3065 } else if (VA.needsCustom()) { 3066 // FIXME: Handle custom args. 3067 return false; 3068 } else { 3069 assert(VA.isMemLoc() && "Assuming store on stack."); 3070 3071 // Don't emit stores for undef values. 3072 if (isa<UndefValue>(ArgVal)) 3073 continue; 3074 3075 // Need to store on the stack. 3076 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3077 3078 unsigned BEAlign = 0; 3079 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3080 BEAlign = 8 - ArgSize; 3081 3082 Address Addr; 3083 Addr.setKind(Address::RegBase); 3084 Addr.setReg(AArch64::SP); 3085 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3086 3087 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3088 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3089 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3090 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3091 3092 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3093 return false; 3094 } 3095 } 3096 return true; 3097 } 3098 3099 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, unsigned NumBytes) { 3100 CallingConv::ID CC = CLI.CallConv; 3101 3102 // Issue CALLSEQ_END 3103 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3104 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AdjStackUp)) 3105 .addImm(NumBytes).addImm(0); 3106 3107 // Now the return values. 3108 SmallVector<CCValAssign, 16> RVLocs; 3109 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3110 CCInfo.AnalyzeCallResult(CLI.Ins, CCAssignFnForCall(CC)); 3111 3112 Register ResultReg = FuncInfo.CreateRegs(CLI.RetTy); 3113 for (unsigned i = 0; i != RVLocs.size(); ++i) { 3114 CCValAssign &VA = RVLocs[i]; 3115 MVT CopyVT = VA.getValVT(); 3116 unsigned CopyReg = ResultReg + i; 3117 3118 // TODO: Handle big-endian results 3119 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3120 return false; 3121 3122 // Copy result out of their specified physreg. 3123 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(TargetOpcode::COPY), 3124 CopyReg) 3125 .addReg(VA.getLocReg()); 3126 CLI.InRegs.push_back(VA.getLocReg()); 3127 } 3128 3129 CLI.ResultReg = ResultReg; 3130 CLI.NumResultRegs = RVLocs.size(); 3131 3132 return true; 3133 } 3134 3135 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3136 CallingConv::ID CC = CLI.CallConv; 3137 bool IsTailCall = CLI.IsTailCall; 3138 bool IsVarArg = CLI.IsVarArg; 3139 const Value *Callee = CLI.Callee; 3140 MCSymbol *Symbol = CLI.Symbol; 3141 3142 if (!Callee && !Symbol) 3143 return false; 3144 3145 // Allow SelectionDAG isel to handle calls to functions like setjmp that need 3146 // a bti instruction following the call. 3147 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && 3148 !Subtarget->noBTIAtReturnTwice() && 3149 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 3150 return false; 3151 3152 // Allow SelectionDAG isel to handle indirect calls with KCFI checks. 3153 if (CLI.CB && CLI.CB->isIndirectCall() && 3154 CLI.CB->getOperandBundle(LLVMContext::OB_kcfi)) 3155 return false; 3156 3157 // Allow SelectionDAG isel to handle tail calls. 3158 if (IsTailCall) 3159 return false; 3160 3161 // FIXME: we could and should support this, but for now correctness at -O0 is 3162 // more important. 3163 if (Subtarget->isTargetILP32()) 3164 return false; 3165 3166 CodeModel::Model CM = TM.getCodeModel(); 3167 // Only support the small-addressing and large code models. 3168 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3169 return false; 3170 3171 // FIXME: Add large code model support for ELF. 3172 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3173 return false; 3174 3175 // Let SDISel handle vararg functions. 3176 if (IsVarArg) 3177 return false; 3178 3179 for (auto Flag : CLI.OutFlags) 3180 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3181 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) 3182 return false; 3183 3184 // Set up the argument vectors. 3185 SmallVector<MVT, 16> OutVTs; 3186 OutVTs.reserve(CLI.OutVals.size()); 3187 3188 for (auto *Val : CLI.OutVals) { 3189 MVT VT; 3190 if (!isTypeLegal(Val->getType(), VT) && 3191 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3192 return false; 3193 3194 // We don't handle vector parameters yet. 3195 if (VT.isVector() || VT.getSizeInBits() > 64) 3196 return false; 3197 3198 OutVTs.push_back(VT); 3199 } 3200 3201 Address Addr; 3202 if (Callee && !computeCallAddress(Callee, Addr)) 3203 return false; 3204 3205 // The weak function target may be zero; in that case we must use indirect 3206 // addressing via a stub on windows as it may be out of range for a 3207 // PC-relative jump. 3208 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3209 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3210 return false; 3211 3212 // Handle the arguments now that we've gotten them. 3213 unsigned NumBytes; 3214 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3215 return false; 3216 3217 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3218 if (RegInfo->isAnyArgRegReserved(*MF)) 3219 RegInfo->emitReservedArgRegCallError(*MF); 3220 3221 // Issue the call. 3222 MachineInstrBuilder MIB; 3223 if (Subtarget->useSmallAddressing()) { 3224 const MCInstrDesc &II = 3225 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3226 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II); 3227 if (Symbol) 3228 MIB.addSym(Symbol, 0); 3229 else if (Addr.getGlobalValue()) 3230 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3231 else if (Addr.getReg()) { 3232 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3233 MIB.addReg(Reg); 3234 } else 3235 return false; 3236 } else { 3237 unsigned CallReg = 0; 3238 if (Symbol) { 3239 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3240 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::ADRP), 3241 ADRPReg) 3242 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3243 3244 CallReg = createResultReg(&AArch64::GPR64RegClass); 3245 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3246 TII.get(AArch64::LDRXui), CallReg) 3247 .addReg(ADRPReg) 3248 .addSym(Symbol, 3249 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3250 } else if (Addr.getGlobalValue()) 3251 CallReg = materializeGV(Addr.getGlobalValue()); 3252 else if (Addr.getReg()) 3253 CallReg = Addr.getReg(); 3254 3255 if (!CallReg) 3256 return false; 3257 3258 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3259 CallReg = constrainOperandRegClass(II, CallReg, 0); 3260 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II).addReg(CallReg); 3261 } 3262 3263 // Add implicit physical register uses to the call. 3264 for (auto Reg : CLI.OutRegs) 3265 MIB.addReg(Reg, RegState::Implicit); 3266 3267 // Add a register mask with the call-preserved registers. 3268 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3269 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3270 3271 CLI.Call = MIB; 3272 3273 // Finish off the call including any return values. 3274 return finishCall(CLI, NumBytes); 3275 } 3276 3277 bool AArch64FastISel::isMemCpySmall(uint64_t Len, MaybeAlign Alignment) { 3278 if (Alignment) 3279 return Len / Alignment->value() <= 4; 3280 else 3281 return Len < 32; 3282 } 3283 3284 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3285 uint64_t Len, MaybeAlign Alignment) { 3286 // Make sure we don't bloat code by inlining very large memcpy's. 3287 if (!isMemCpySmall(Len, Alignment)) 3288 return false; 3289 3290 int64_t UnscaledOffset = 0; 3291 Address OrigDest = Dest; 3292 Address OrigSrc = Src; 3293 3294 while (Len) { 3295 MVT VT; 3296 if (!Alignment || *Alignment >= 8) { 3297 if (Len >= 8) 3298 VT = MVT::i64; 3299 else if (Len >= 4) 3300 VT = MVT::i32; 3301 else if (Len >= 2) 3302 VT = MVT::i16; 3303 else { 3304 VT = MVT::i8; 3305 } 3306 } else { 3307 assert(Alignment && "Alignment is set in this branch"); 3308 // Bound based on alignment. 3309 if (Len >= 4 && *Alignment == 4) 3310 VT = MVT::i32; 3311 else if (Len >= 2 && *Alignment == 2) 3312 VT = MVT::i16; 3313 else { 3314 VT = MVT::i8; 3315 } 3316 } 3317 3318 unsigned ResultReg = emitLoad(VT, VT, Src); 3319 if (!ResultReg) 3320 return false; 3321 3322 if (!emitStore(VT, ResultReg, Dest)) 3323 return false; 3324 3325 int64_t Size = VT.getSizeInBits() / 8; 3326 Len -= Size; 3327 UnscaledOffset += Size; 3328 3329 // We need to recompute the unscaled offset for each iteration. 3330 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3331 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3332 } 3333 3334 return true; 3335 } 3336 3337 /// Check if it is possible to fold the condition from the XALU intrinsic 3338 /// into the user. The condition code will only be updated on success. 3339 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3340 const Instruction *I, 3341 const Value *Cond) { 3342 if (!isa<ExtractValueInst>(Cond)) 3343 return false; 3344 3345 const auto *EV = cast<ExtractValueInst>(Cond); 3346 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3347 return false; 3348 3349 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3350 MVT RetVT; 3351 const Function *Callee = II->getCalledFunction(); 3352 Type *RetTy = 3353 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3354 if (!isTypeLegal(RetTy, RetVT)) 3355 return false; 3356 3357 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3358 return false; 3359 3360 const Value *LHS = II->getArgOperand(0); 3361 const Value *RHS = II->getArgOperand(1); 3362 3363 // Canonicalize immediate to the RHS. 3364 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3365 std::swap(LHS, RHS); 3366 3367 // Simplify multiplies. 3368 Intrinsic::ID IID = II->getIntrinsicID(); 3369 switch (IID) { 3370 default: 3371 break; 3372 case Intrinsic::smul_with_overflow: 3373 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3374 if (C->getValue() == 2) 3375 IID = Intrinsic::sadd_with_overflow; 3376 break; 3377 case Intrinsic::umul_with_overflow: 3378 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3379 if (C->getValue() == 2) 3380 IID = Intrinsic::uadd_with_overflow; 3381 break; 3382 } 3383 3384 AArch64CC::CondCode TmpCC; 3385 switch (IID) { 3386 default: 3387 return false; 3388 case Intrinsic::sadd_with_overflow: 3389 case Intrinsic::ssub_with_overflow: 3390 TmpCC = AArch64CC::VS; 3391 break; 3392 case Intrinsic::uadd_with_overflow: 3393 TmpCC = AArch64CC::HS; 3394 break; 3395 case Intrinsic::usub_with_overflow: 3396 TmpCC = AArch64CC::LO; 3397 break; 3398 case Intrinsic::smul_with_overflow: 3399 case Intrinsic::umul_with_overflow: 3400 TmpCC = AArch64CC::NE; 3401 break; 3402 } 3403 3404 // Check if both instructions are in the same basic block. 3405 if (!isValueAvailable(II)) 3406 return false; 3407 3408 // Make sure nothing is in the way 3409 BasicBlock::const_iterator Start(I); 3410 BasicBlock::const_iterator End(II); 3411 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3412 // We only expect extractvalue instructions between the intrinsic and the 3413 // instruction to be selected. 3414 if (!isa<ExtractValueInst>(Itr)) 3415 return false; 3416 3417 // Check that the extractvalue operand comes from the intrinsic. 3418 const auto *EVI = cast<ExtractValueInst>(Itr); 3419 if (EVI->getAggregateOperand() != II) 3420 return false; 3421 } 3422 3423 CC = TmpCC; 3424 return true; 3425 } 3426 3427 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3428 // FIXME: Handle more intrinsics. 3429 switch (II->getIntrinsicID()) { 3430 default: return false; 3431 case Intrinsic::frameaddress: { 3432 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3433 MFI.setFrameAddressIsTaken(true); 3434 3435 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3436 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3437 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3438 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3439 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3440 // Recursively load frame address 3441 // ldr x0, [fp] 3442 // ldr x0, [x0] 3443 // ldr x0, [x0] 3444 // ... 3445 unsigned DestReg; 3446 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3447 while (Depth--) { 3448 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3449 SrcReg, 0); 3450 assert(DestReg && "Unexpected LDR instruction emission failure."); 3451 SrcReg = DestReg; 3452 } 3453 3454 updateValueMap(II, SrcReg); 3455 return true; 3456 } 3457 case Intrinsic::sponentry: { 3458 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3459 3460 // SP = FP + Fixed Object + 16 3461 int FI = MFI.CreateFixedObject(4, 0, false); 3462 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3463 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3464 TII.get(AArch64::ADDXri), ResultReg) 3465 .addFrameIndex(FI) 3466 .addImm(0) 3467 .addImm(0); 3468 3469 updateValueMap(II, ResultReg); 3470 return true; 3471 } 3472 case Intrinsic::memcpy: 3473 case Intrinsic::memmove: { 3474 const auto *MTI = cast<MemTransferInst>(II); 3475 // Don't handle volatile. 3476 if (MTI->isVolatile()) 3477 return false; 3478 3479 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3480 // we would emit dead code because we don't currently handle memmoves. 3481 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3482 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3483 // Small memcpy's are common enough that we want to do them without a call 3484 // if possible. 3485 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3486 MaybeAlign Alignment; 3487 if (MTI->getDestAlign() || MTI->getSourceAlign()) 3488 Alignment = std::min(MTI->getDestAlign().valueOrOne(), 3489 MTI->getSourceAlign().valueOrOne()); 3490 if (isMemCpySmall(Len, Alignment)) { 3491 Address Dest, Src; 3492 if (!computeAddress(MTI->getRawDest(), Dest) || 3493 !computeAddress(MTI->getRawSource(), Src)) 3494 return false; 3495 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3496 return true; 3497 } 3498 } 3499 3500 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3501 return false; 3502 3503 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3504 // Fast instruction selection doesn't support the special 3505 // address spaces. 3506 return false; 3507 3508 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3509 return lowerCallTo(II, IntrMemName, II->arg_size() - 1); 3510 } 3511 case Intrinsic::memset: { 3512 const MemSetInst *MSI = cast<MemSetInst>(II); 3513 // Don't handle volatile. 3514 if (MSI->isVolatile()) 3515 return false; 3516 3517 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3518 return false; 3519 3520 if (MSI->getDestAddressSpace() > 255) 3521 // Fast instruction selection doesn't support the special 3522 // address spaces. 3523 return false; 3524 3525 return lowerCallTo(II, "memset", II->arg_size() - 1); 3526 } 3527 case Intrinsic::sin: 3528 case Intrinsic::cos: 3529 case Intrinsic::pow: { 3530 MVT RetVT; 3531 if (!isTypeLegal(II->getType(), RetVT)) 3532 return false; 3533 3534 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3535 return false; 3536 3537 static const RTLIB::Libcall LibCallTable[3][2] = { 3538 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3539 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3540 { RTLIB::POW_F32, RTLIB::POW_F64 } 3541 }; 3542 RTLIB::Libcall LC; 3543 bool Is64Bit = RetVT == MVT::f64; 3544 switch (II->getIntrinsicID()) { 3545 default: 3546 llvm_unreachable("Unexpected intrinsic."); 3547 case Intrinsic::sin: 3548 LC = LibCallTable[0][Is64Bit]; 3549 break; 3550 case Intrinsic::cos: 3551 LC = LibCallTable[1][Is64Bit]; 3552 break; 3553 case Intrinsic::pow: 3554 LC = LibCallTable[2][Is64Bit]; 3555 break; 3556 } 3557 3558 ArgListTy Args; 3559 Args.reserve(II->arg_size()); 3560 3561 // Populate the argument list. 3562 for (auto &Arg : II->args()) { 3563 ArgListEntry Entry; 3564 Entry.Val = Arg; 3565 Entry.Ty = Arg->getType(); 3566 Args.push_back(Entry); 3567 } 3568 3569 CallLoweringInfo CLI; 3570 MCContext &Ctx = MF->getContext(); 3571 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3572 TLI.getLibcallName(LC), std::move(Args)); 3573 if (!lowerCallTo(CLI)) 3574 return false; 3575 updateValueMap(II, CLI.ResultReg); 3576 return true; 3577 } 3578 case Intrinsic::fabs: { 3579 MVT VT; 3580 if (!isTypeLegal(II->getType(), VT)) 3581 return false; 3582 3583 unsigned Opc; 3584 switch (VT.SimpleTy) { 3585 default: 3586 return false; 3587 case MVT::f32: 3588 Opc = AArch64::FABSSr; 3589 break; 3590 case MVT::f64: 3591 Opc = AArch64::FABSDr; 3592 break; 3593 } 3594 Register SrcReg = getRegForValue(II->getOperand(0)); 3595 if (!SrcReg) 3596 return false; 3597 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3598 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(Opc), ResultReg) 3599 .addReg(SrcReg); 3600 updateValueMap(II, ResultReg); 3601 return true; 3602 } 3603 case Intrinsic::trap: 3604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3605 .addImm(1); 3606 return true; 3607 case Intrinsic::debugtrap: 3608 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::BRK)) 3609 .addImm(0xF000); 3610 return true; 3611 3612 case Intrinsic::sqrt: { 3613 Type *RetTy = II->getCalledFunction()->getReturnType(); 3614 3615 MVT VT; 3616 if (!isTypeLegal(RetTy, VT)) 3617 return false; 3618 3619 Register Op0Reg = getRegForValue(II->getOperand(0)); 3620 if (!Op0Reg) 3621 return false; 3622 3623 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg); 3624 if (!ResultReg) 3625 return false; 3626 3627 updateValueMap(II, ResultReg); 3628 return true; 3629 } 3630 case Intrinsic::sadd_with_overflow: 3631 case Intrinsic::uadd_with_overflow: 3632 case Intrinsic::ssub_with_overflow: 3633 case Intrinsic::usub_with_overflow: 3634 case Intrinsic::smul_with_overflow: 3635 case Intrinsic::umul_with_overflow: { 3636 // This implements the basic lowering of the xalu with overflow intrinsics. 3637 const Function *Callee = II->getCalledFunction(); 3638 auto *Ty = cast<StructType>(Callee->getReturnType()); 3639 Type *RetTy = Ty->getTypeAtIndex(0U); 3640 3641 MVT VT; 3642 if (!isTypeLegal(RetTy, VT)) 3643 return false; 3644 3645 if (VT != MVT::i32 && VT != MVT::i64) 3646 return false; 3647 3648 const Value *LHS = II->getArgOperand(0); 3649 const Value *RHS = II->getArgOperand(1); 3650 // Canonicalize immediate to the RHS. 3651 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3652 std::swap(LHS, RHS); 3653 3654 // Simplify multiplies. 3655 Intrinsic::ID IID = II->getIntrinsicID(); 3656 switch (IID) { 3657 default: 3658 break; 3659 case Intrinsic::smul_with_overflow: 3660 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3661 if (C->getValue() == 2) { 3662 IID = Intrinsic::sadd_with_overflow; 3663 RHS = LHS; 3664 } 3665 break; 3666 case Intrinsic::umul_with_overflow: 3667 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3668 if (C->getValue() == 2) { 3669 IID = Intrinsic::uadd_with_overflow; 3670 RHS = LHS; 3671 } 3672 break; 3673 } 3674 3675 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3676 AArch64CC::CondCode CC = AArch64CC::Invalid; 3677 switch (IID) { 3678 default: llvm_unreachable("Unexpected intrinsic!"); 3679 case Intrinsic::sadd_with_overflow: 3680 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3681 CC = AArch64CC::VS; 3682 break; 3683 case Intrinsic::uadd_with_overflow: 3684 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3685 CC = AArch64CC::HS; 3686 break; 3687 case Intrinsic::ssub_with_overflow: 3688 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3689 CC = AArch64CC::VS; 3690 break; 3691 case Intrinsic::usub_with_overflow: 3692 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3693 CC = AArch64CC::LO; 3694 break; 3695 case Intrinsic::smul_with_overflow: { 3696 CC = AArch64CC::NE; 3697 Register LHSReg = getRegForValue(LHS); 3698 if (!LHSReg) 3699 return false; 3700 3701 Register RHSReg = getRegForValue(RHS); 3702 if (!RHSReg) 3703 return false; 3704 3705 if (VT == MVT::i32) { 3706 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg); 3707 Register MulSubReg = 3708 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3709 // cmp xreg, wreg, sxtw 3710 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg, 3711 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, 3712 /*WantResult=*/false); 3713 MulReg = MulSubReg; 3714 } else { 3715 assert(VT == MVT::i64 && "Unexpected value type."); 3716 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3717 // reused in the next instruction. 3718 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3719 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg); 3720 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63, 3721 /*WantResult=*/false); 3722 } 3723 break; 3724 } 3725 case Intrinsic::umul_with_overflow: { 3726 CC = AArch64CC::NE; 3727 Register LHSReg = getRegForValue(LHS); 3728 if (!LHSReg) 3729 return false; 3730 3731 Register RHSReg = getRegForValue(RHS); 3732 if (!RHSReg) 3733 return false; 3734 3735 if (VT == MVT::i32) { 3736 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg); 3737 // tst xreg, #0xffffffff00000000 3738 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3739 TII.get(AArch64::ANDSXri), AArch64::XZR) 3740 .addReg(MulReg) 3741 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64)); 3742 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3743 } else { 3744 assert(VT == MVT::i64 && "Unexpected value type."); 3745 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3746 // reused in the next instruction. 3747 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3748 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg); 3749 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false); 3750 } 3751 break; 3752 } 3753 } 3754 3755 if (MulReg) { 3756 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3757 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3758 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3759 } 3760 3761 if (!ResultReg1) 3762 return false; 3763 3764 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3765 AArch64::WZR, AArch64::WZR, 3766 getInvertedCondCode(CC)); 3767 (void)ResultReg2; 3768 assert((ResultReg1 + 1) == ResultReg2 && 3769 "Nonconsecutive result registers."); 3770 updateValueMap(II, ResultReg1, 2); 3771 return true; 3772 } 3773 case Intrinsic::aarch64_crc32b: 3774 case Intrinsic::aarch64_crc32h: 3775 case Intrinsic::aarch64_crc32w: 3776 case Intrinsic::aarch64_crc32x: 3777 case Intrinsic::aarch64_crc32cb: 3778 case Intrinsic::aarch64_crc32ch: 3779 case Intrinsic::aarch64_crc32cw: 3780 case Intrinsic::aarch64_crc32cx: { 3781 if (!Subtarget->hasCRC()) 3782 return false; 3783 3784 unsigned Opc; 3785 switch (II->getIntrinsicID()) { 3786 default: 3787 llvm_unreachable("Unexpected intrinsic!"); 3788 case Intrinsic::aarch64_crc32b: 3789 Opc = AArch64::CRC32Brr; 3790 break; 3791 case Intrinsic::aarch64_crc32h: 3792 Opc = AArch64::CRC32Hrr; 3793 break; 3794 case Intrinsic::aarch64_crc32w: 3795 Opc = AArch64::CRC32Wrr; 3796 break; 3797 case Intrinsic::aarch64_crc32x: 3798 Opc = AArch64::CRC32Xrr; 3799 break; 3800 case Intrinsic::aarch64_crc32cb: 3801 Opc = AArch64::CRC32CBrr; 3802 break; 3803 case Intrinsic::aarch64_crc32ch: 3804 Opc = AArch64::CRC32CHrr; 3805 break; 3806 case Intrinsic::aarch64_crc32cw: 3807 Opc = AArch64::CRC32CWrr; 3808 break; 3809 case Intrinsic::aarch64_crc32cx: 3810 Opc = AArch64::CRC32CXrr; 3811 break; 3812 } 3813 3814 Register LHSReg = getRegForValue(II->getArgOperand(0)); 3815 Register RHSReg = getRegForValue(II->getArgOperand(1)); 3816 if (!LHSReg || !RHSReg) 3817 return false; 3818 3819 Register ResultReg = 3820 fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, LHSReg, RHSReg); 3821 updateValueMap(II, ResultReg); 3822 return true; 3823 } 3824 } 3825 return false; 3826 } 3827 3828 bool AArch64FastISel::selectRet(const Instruction *I) { 3829 const ReturnInst *Ret = cast<ReturnInst>(I); 3830 const Function &F = *I->getParent()->getParent(); 3831 3832 if (!FuncInfo.CanLowerReturn) 3833 return false; 3834 3835 if (F.isVarArg()) 3836 return false; 3837 3838 if (TLI.supportSwiftError() && 3839 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3840 return false; 3841 3842 if (TLI.supportSplitCSR(FuncInfo.MF)) 3843 return false; 3844 3845 // Build a list of return value registers. 3846 SmallVector<unsigned, 4> RetRegs; 3847 3848 if (Ret->getNumOperands() > 0) { 3849 CallingConv::ID CC = F.getCallingConv(); 3850 SmallVector<ISD::OutputArg, 4> Outs; 3851 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3852 3853 // Analyze operands of the call, assigning locations to each operand. 3854 SmallVector<CCValAssign, 16> ValLocs; 3855 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3856 CCInfo.AnalyzeReturn(Outs, RetCC_AArch64_AAPCS); 3857 3858 // Only handle a single return value for now. 3859 if (ValLocs.size() != 1) 3860 return false; 3861 3862 CCValAssign &VA = ValLocs[0]; 3863 const Value *RV = Ret->getOperand(0); 3864 3865 // Don't bother handling odd stuff for now. 3866 if ((VA.getLocInfo() != CCValAssign::Full) && 3867 (VA.getLocInfo() != CCValAssign::BCvt)) 3868 return false; 3869 3870 // Only handle register returns for now. 3871 if (!VA.isRegLoc()) 3872 return false; 3873 3874 Register Reg = getRegForValue(RV); 3875 if (Reg == 0) 3876 return false; 3877 3878 unsigned SrcReg = Reg + VA.getValNo(); 3879 Register DestReg = VA.getLocReg(); 3880 // Avoid a cross-class copy. This is very unlikely. 3881 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3882 return false; 3883 3884 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3885 if (!RVEVT.isSimple()) 3886 return false; 3887 3888 // Vectors (of > 1 lane) in big endian need tricky handling. 3889 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && 3890 !Subtarget->isLittleEndian()) 3891 return false; 3892 3893 MVT RVVT = RVEVT.getSimpleVT(); 3894 if (RVVT == MVT::f128) 3895 return false; 3896 3897 MVT DestVT = VA.getValVT(); 3898 // Special handling for extended integers. 3899 if (RVVT != DestVT) { 3900 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3901 return false; 3902 3903 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3904 return false; 3905 3906 bool IsZExt = Outs[0].Flags.isZExt(); 3907 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3908 if (SrcReg == 0) 3909 return false; 3910 } 3911 3912 // "Callee" (i.e. value producer) zero extends pointers at function 3913 // boundary. 3914 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3915 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff); 3916 3917 // Make the copy. 3918 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3919 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3920 3921 // Add register to return instruction. 3922 RetRegs.push_back(VA.getLocReg()); 3923 } 3924 3925 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3926 TII.get(AArch64::RET_ReallyLR)); 3927 for (unsigned RetReg : RetRegs) 3928 MIB.addReg(RetReg, RegState::Implicit); 3929 return true; 3930 } 3931 3932 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3933 Type *DestTy = I->getType(); 3934 Value *Op = I->getOperand(0); 3935 Type *SrcTy = Op->getType(); 3936 3937 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3938 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3939 if (!SrcEVT.isSimple()) 3940 return false; 3941 if (!DestEVT.isSimple()) 3942 return false; 3943 3944 MVT SrcVT = SrcEVT.getSimpleVT(); 3945 MVT DestVT = DestEVT.getSimpleVT(); 3946 3947 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3948 SrcVT != MVT::i8) 3949 return false; 3950 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3951 DestVT != MVT::i1) 3952 return false; 3953 3954 Register SrcReg = getRegForValue(Op); 3955 if (!SrcReg) 3956 return false; 3957 3958 // If we're truncating from i64 to a smaller non-legal type then generate an 3959 // AND. Otherwise, we know the high bits are undefined and a truncate only 3960 // generate a COPY. We cannot mark the source register also as result 3961 // register, because this can incorrectly transfer the kill flag onto the 3962 // source register. 3963 unsigned ResultReg; 3964 if (SrcVT == MVT::i64) { 3965 uint64_t Mask = 0; 3966 switch (DestVT.SimpleTy) { 3967 default: 3968 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3969 return false; 3970 case MVT::i1: 3971 Mask = 0x1; 3972 break; 3973 case MVT::i8: 3974 Mask = 0xff; 3975 break; 3976 case MVT::i16: 3977 Mask = 0xffff; 3978 break; 3979 } 3980 // Issue an extract_subreg to get the lower 32-bits. 3981 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, 3982 AArch64::sub_32); 3983 // Create the AND instruction which performs the actual truncation. 3984 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask); 3985 assert(ResultReg && "Unexpected AND instruction emission failure."); 3986 } else { 3987 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3988 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 3989 TII.get(TargetOpcode::COPY), ResultReg) 3990 .addReg(SrcReg); 3991 } 3992 3993 updateValueMap(I, ResultReg); 3994 return true; 3995 } 3996 3997 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 3998 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 3999 DestVT == MVT::i64) && 4000 "Unexpected value type."); 4001 // Handle i8 and i16 as i32. 4002 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4003 DestVT = MVT::i32; 4004 4005 if (IsZExt) { 4006 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1); 4007 assert(ResultReg && "Unexpected AND instruction emission failure."); 4008 if (DestVT == MVT::i64) { 4009 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 4010 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 4011 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4012 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4013 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4014 .addImm(0) 4015 .addReg(ResultReg) 4016 .addImm(AArch64::sub_32); 4017 ResultReg = Reg64; 4018 } 4019 return ResultReg; 4020 } else { 4021 if (DestVT == MVT::i64) { 4022 // FIXME: We're SExt i1 to i64. 4023 return 0; 4024 } 4025 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 4026 0, 0); 4027 } 4028 } 4029 4030 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4031 unsigned Opc, ZReg; 4032 switch (RetVT.SimpleTy) { 4033 default: return 0; 4034 case MVT::i8: 4035 case MVT::i16: 4036 case MVT::i32: 4037 RetVT = MVT::i32; 4038 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4039 case MVT::i64: 4040 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4041 } 4042 4043 const TargetRegisterClass *RC = 4044 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4045 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg); 4046 } 4047 4048 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4049 if (RetVT != MVT::i64) 4050 return 0; 4051 4052 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4053 Op0, Op1, AArch64::XZR); 4054 } 4055 4056 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 4057 if (RetVT != MVT::i64) 4058 return 0; 4059 4060 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4061 Op0, Op1, AArch64::XZR); 4062 } 4063 4064 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, 4065 unsigned Op1Reg) { 4066 unsigned Opc = 0; 4067 bool NeedTrunc = false; 4068 uint64_t Mask = 0; 4069 switch (RetVT.SimpleTy) { 4070 default: return 0; 4071 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4072 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4073 case MVT::i32: Opc = AArch64::LSLVWr; break; 4074 case MVT::i64: Opc = AArch64::LSLVXr; break; 4075 } 4076 4077 const TargetRegisterClass *RC = 4078 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4079 if (NeedTrunc) 4080 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4081 4082 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4083 if (NeedTrunc) 4084 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4085 return ResultReg; 4086 } 4087 4088 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4089 uint64_t Shift, bool IsZExt) { 4090 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4091 "Unexpected source/return type pair."); 4092 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4093 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4094 "Unexpected source value type."); 4095 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4096 RetVT == MVT::i64) && "Unexpected return value type."); 4097 4098 bool Is64Bit = (RetVT == MVT::i64); 4099 unsigned RegSize = Is64Bit ? 64 : 32; 4100 unsigned DstBits = RetVT.getSizeInBits(); 4101 unsigned SrcBits = SrcVT.getSizeInBits(); 4102 const TargetRegisterClass *RC = 4103 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4104 4105 // Just emit a copy for "zero" shifts. 4106 if (Shift == 0) { 4107 if (RetVT == SrcVT) { 4108 Register ResultReg = createResultReg(RC); 4109 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4110 TII.get(TargetOpcode::COPY), ResultReg) 4111 .addReg(Op0); 4112 return ResultReg; 4113 } else 4114 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4115 } 4116 4117 // Don't deal with undefined shifts. 4118 if (Shift >= DstBits) 4119 return 0; 4120 4121 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4122 // {S|U}BFM Wd, Wn, #r, #s 4123 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4124 4125 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4126 // %2 = shl i16 %1, 4 4127 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4128 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4129 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4130 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4131 4132 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4133 // %2 = shl i16 %1, 8 4134 // Wd<32+7-24,32-24> = Wn<7:0> 4135 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4136 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4137 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4138 4139 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4140 // %2 = shl i16 %1, 12 4141 // Wd<32+3-20,32-20> = Wn<3:0> 4142 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4143 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4144 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4145 4146 unsigned ImmR = RegSize - Shift; 4147 // Limit the width to the length of the source type. 4148 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4149 static const unsigned OpcTable[2][2] = { 4150 {AArch64::SBFMWri, AArch64::SBFMXri}, 4151 {AArch64::UBFMWri, AArch64::UBFMXri} 4152 }; 4153 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4154 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4155 Register TmpReg = MRI.createVirtualRegister(RC); 4156 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4157 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4158 .addImm(0) 4159 .addReg(Op0) 4160 .addImm(AArch64::sub_32); 4161 Op0 = TmpReg; 4162 } 4163 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4164 } 4165 4166 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, 4167 unsigned Op1Reg) { 4168 unsigned Opc = 0; 4169 bool NeedTrunc = false; 4170 uint64_t Mask = 0; 4171 switch (RetVT.SimpleTy) { 4172 default: return 0; 4173 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4174 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4175 case MVT::i32: Opc = AArch64::LSRVWr; break; 4176 case MVT::i64: Opc = AArch64::LSRVXr; break; 4177 } 4178 4179 const TargetRegisterClass *RC = 4180 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4181 if (NeedTrunc) { 4182 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask); 4183 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4184 } 4185 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4186 if (NeedTrunc) 4187 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4188 return ResultReg; 4189 } 4190 4191 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4192 uint64_t Shift, bool IsZExt) { 4193 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4194 "Unexpected source/return type pair."); 4195 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4196 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4197 "Unexpected source value type."); 4198 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4199 RetVT == MVT::i64) && "Unexpected return value type."); 4200 4201 bool Is64Bit = (RetVT == MVT::i64); 4202 unsigned RegSize = Is64Bit ? 64 : 32; 4203 unsigned DstBits = RetVT.getSizeInBits(); 4204 unsigned SrcBits = SrcVT.getSizeInBits(); 4205 const TargetRegisterClass *RC = 4206 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4207 4208 // Just emit a copy for "zero" shifts. 4209 if (Shift == 0) { 4210 if (RetVT == SrcVT) { 4211 Register ResultReg = createResultReg(RC); 4212 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4213 TII.get(TargetOpcode::COPY), ResultReg) 4214 .addReg(Op0); 4215 return ResultReg; 4216 } else 4217 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4218 } 4219 4220 // Don't deal with undefined shifts. 4221 if (Shift >= DstBits) 4222 return 0; 4223 4224 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4225 // {S|U}BFM Wd, Wn, #r, #s 4226 // Wd<s-r:0> = Wn<s:r> when r <= s 4227 4228 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4229 // %2 = lshr i16 %1, 4 4230 // Wd<7-4:0> = Wn<7:4> 4231 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4232 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4233 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4234 4235 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4236 // %2 = lshr i16 %1, 8 4237 // Wd<7-7,0> = Wn<7:7> 4238 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4239 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4240 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4241 4242 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4243 // %2 = lshr i16 %1, 12 4244 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4245 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4246 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4247 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4248 4249 if (Shift >= SrcBits && IsZExt) 4250 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4251 4252 // It is not possible to fold a sign-extend into the LShr instruction. In this 4253 // case emit a sign-extend. 4254 if (!IsZExt) { 4255 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4256 if (!Op0) 4257 return 0; 4258 SrcVT = RetVT; 4259 SrcBits = SrcVT.getSizeInBits(); 4260 IsZExt = true; 4261 } 4262 4263 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4264 unsigned ImmS = SrcBits - 1; 4265 static const unsigned OpcTable[2][2] = { 4266 {AArch64::SBFMWri, AArch64::SBFMXri}, 4267 {AArch64::UBFMWri, AArch64::UBFMXri} 4268 }; 4269 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4270 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4271 Register TmpReg = MRI.createVirtualRegister(RC); 4272 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4273 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4274 .addImm(0) 4275 .addReg(Op0) 4276 .addImm(AArch64::sub_32); 4277 Op0 = TmpReg; 4278 } 4279 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4280 } 4281 4282 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, 4283 unsigned Op1Reg) { 4284 unsigned Opc = 0; 4285 bool NeedTrunc = false; 4286 uint64_t Mask = 0; 4287 switch (RetVT.SimpleTy) { 4288 default: return 0; 4289 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4290 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4291 case MVT::i32: Opc = AArch64::ASRVWr; break; 4292 case MVT::i64: Opc = AArch64::ASRVXr; break; 4293 } 4294 4295 const TargetRegisterClass *RC = 4296 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4297 if (NeedTrunc) { 4298 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4299 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4300 } 4301 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4302 if (NeedTrunc) 4303 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4304 return ResultReg; 4305 } 4306 4307 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4308 uint64_t Shift, bool IsZExt) { 4309 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4310 "Unexpected source/return type pair."); 4311 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4312 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4313 "Unexpected source value type."); 4314 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4315 RetVT == MVT::i64) && "Unexpected return value type."); 4316 4317 bool Is64Bit = (RetVT == MVT::i64); 4318 unsigned RegSize = Is64Bit ? 64 : 32; 4319 unsigned DstBits = RetVT.getSizeInBits(); 4320 unsigned SrcBits = SrcVT.getSizeInBits(); 4321 const TargetRegisterClass *RC = 4322 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4323 4324 // Just emit a copy for "zero" shifts. 4325 if (Shift == 0) { 4326 if (RetVT == SrcVT) { 4327 Register ResultReg = createResultReg(RC); 4328 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4329 TII.get(TargetOpcode::COPY), ResultReg) 4330 .addReg(Op0); 4331 return ResultReg; 4332 } else 4333 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4334 } 4335 4336 // Don't deal with undefined shifts. 4337 if (Shift >= DstBits) 4338 return 0; 4339 4340 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4341 // {S|U}BFM Wd, Wn, #r, #s 4342 // Wd<s-r:0> = Wn<s:r> when r <= s 4343 4344 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4345 // %2 = ashr i16 %1, 4 4346 // Wd<7-4:0> = Wn<7:4> 4347 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4348 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4349 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4350 4351 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4352 // %2 = ashr i16 %1, 8 4353 // Wd<7-7,0> = Wn<7:7> 4354 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4355 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4356 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4357 4358 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4359 // %2 = ashr i16 %1, 12 4360 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4361 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4362 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4363 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4364 4365 if (Shift >= SrcBits && IsZExt) 4366 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4367 4368 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4369 unsigned ImmS = SrcBits - 1; 4370 static const unsigned OpcTable[2][2] = { 4371 {AArch64::SBFMWri, AArch64::SBFMXri}, 4372 {AArch64::UBFMWri, AArch64::UBFMXri} 4373 }; 4374 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4375 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4376 Register TmpReg = MRI.createVirtualRegister(RC); 4377 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4378 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4379 .addImm(0) 4380 .addReg(Op0) 4381 .addImm(AArch64::sub_32); 4382 Op0 = TmpReg; 4383 } 4384 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4385 } 4386 4387 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4388 bool IsZExt) { 4389 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4390 4391 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4392 // DestVT are odd things, so test to make sure that they are both types we can 4393 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4394 // bail out to SelectionDAG. 4395 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4396 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4397 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4398 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4399 return 0; 4400 4401 unsigned Opc; 4402 unsigned Imm = 0; 4403 4404 switch (SrcVT.SimpleTy) { 4405 default: 4406 return 0; 4407 case MVT::i1: 4408 return emiti1Ext(SrcReg, DestVT, IsZExt); 4409 case MVT::i8: 4410 if (DestVT == MVT::i64) 4411 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4412 else 4413 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4414 Imm = 7; 4415 break; 4416 case MVT::i16: 4417 if (DestVT == MVT::i64) 4418 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4419 else 4420 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4421 Imm = 15; 4422 break; 4423 case MVT::i32: 4424 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4425 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4426 Imm = 31; 4427 break; 4428 } 4429 4430 // Handle i8 and i16 as i32. 4431 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4432 DestVT = MVT::i32; 4433 else if (DestVT == MVT::i64) { 4434 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4435 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4436 TII.get(AArch64::SUBREG_TO_REG), Src64) 4437 .addImm(0) 4438 .addReg(SrcReg) 4439 .addImm(AArch64::sub_32); 4440 SrcReg = Src64; 4441 } 4442 4443 const TargetRegisterClass *RC = 4444 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4445 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm); 4446 } 4447 4448 static bool isZExtLoad(const MachineInstr *LI) { 4449 switch (LI->getOpcode()) { 4450 default: 4451 return false; 4452 case AArch64::LDURBBi: 4453 case AArch64::LDURHHi: 4454 case AArch64::LDURWi: 4455 case AArch64::LDRBBui: 4456 case AArch64::LDRHHui: 4457 case AArch64::LDRWui: 4458 case AArch64::LDRBBroX: 4459 case AArch64::LDRHHroX: 4460 case AArch64::LDRWroX: 4461 case AArch64::LDRBBroW: 4462 case AArch64::LDRHHroW: 4463 case AArch64::LDRWroW: 4464 return true; 4465 } 4466 } 4467 4468 static bool isSExtLoad(const MachineInstr *LI) { 4469 switch (LI->getOpcode()) { 4470 default: 4471 return false; 4472 case AArch64::LDURSBWi: 4473 case AArch64::LDURSHWi: 4474 case AArch64::LDURSBXi: 4475 case AArch64::LDURSHXi: 4476 case AArch64::LDURSWi: 4477 case AArch64::LDRSBWui: 4478 case AArch64::LDRSHWui: 4479 case AArch64::LDRSBXui: 4480 case AArch64::LDRSHXui: 4481 case AArch64::LDRSWui: 4482 case AArch64::LDRSBWroX: 4483 case AArch64::LDRSHWroX: 4484 case AArch64::LDRSBXroX: 4485 case AArch64::LDRSHXroX: 4486 case AArch64::LDRSWroX: 4487 case AArch64::LDRSBWroW: 4488 case AArch64::LDRSHWroW: 4489 case AArch64::LDRSBXroW: 4490 case AArch64::LDRSHXroW: 4491 case AArch64::LDRSWroW: 4492 return true; 4493 } 4494 } 4495 4496 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4497 MVT SrcVT) { 4498 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4499 if (!LI || !LI->hasOneUse()) 4500 return false; 4501 4502 // Check if the load instruction has already been selected. 4503 Register Reg = lookUpRegForValue(LI); 4504 if (!Reg) 4505 return false; 4506 4507 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4508 if (!MI) 4509 return false; 4510 4511 // Check if the correct load instruction has been emitted - SelectionDAG might 4512 // have emitted a zero-extending load, but we need a sign-extending load. 4513 bool IsZExt = isa<ZExtInst>(I); 4514 const auto *LoadMI = MI; 4515 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4516 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4517 Register LoadReg = MI->getOperand(1).getReg(); 4518 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4519 assert(LoadMI && "Expected valid instruction"); 4520 } 4521 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4522 return false; 4523 4524 // Nothing to be done. 4525 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4526 updateValueMap(I, Reg); 4527 return true; 4528 } 4529 4530 if (IsZExt) { 4531 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 4532 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4533 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4534 .addImm(0) 4535 .addReg(Reg, getKillRegState(true)) 4536 .addImm(AArch64::sub_32); 4537 Reg = Reg64; 4538 } else { 4539 assert((MI->getOpcode() == TargetOpcode::COPY && 4540 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4541 "Expected copy instruction"); 4542 Reg = MI->getOperand(1).getReg(); 4543 MachineBasicBlock::iterator I(MI); 4544 removeDeadCode(I, std::next(I)); 4545 } 4546 updateValueMap(I, Reg); 4547 return true; 4548 } 4549 4550 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4551 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4552 "Unexpected integer extend instruction."); 4553 MVT RetVT; 4554 MVT SrcVT; 4555 if (!isTypeSupported(I->getType(), RetVT)) 4556 return false; 4557 4558 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4559 return false; 4560 4561 // Try to optimize already sign-/zero-extended values from load instructions. 4562 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4563 return true; 4564 4565 Register SrcReg = getRegForValue(I->getOperand(0)); 4566 if (!SrcReg) 4567 return false; 4568 4569 // Try to optimize already sign-/zero-extended values from function arguments. 4570 bool IsZExt = isa<ZExtInst>(I); 4571 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4572 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4573 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4574 Register ResultReg = createResultReg(&AArch64::GPR64RegClass); 4575 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, 4576 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4577 .addImm(0) 4578 .addReg(SrcReg) 4579 .addImm(AArch64::sub_32); 4580 SrcReg = ResultReg; 4581 } 4582 4583 updateValueMap(I, SrcReg); 4584 return true; 4585 } 4586 } 4587 4588 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4589 if (!ResultReg) 4590 return false; 4591 4592 updateValueMap(I, ResultReg); 4593 return true; 4594 } 4595 4596 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4597 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4598 if (!DestEVT.isSimple()) 4599 return false; 4600 4601 MVT DestVT = DestEVT.getSimpleVT(); 4602 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4603 return false; 4604 4605 unsigned DivOpc; 4606 bool Is64bit = (DestVT == MVT::i64); 4607 switch (ISDOpcode) { 4608 default: 4609 return false; 4610 case ISD::SREM: 4611 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4612 break; 4613 case ISD::UREM: 4614 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4615 break; 4616 } 4617 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4618 Register Src0Reg = getRegForValue(I->getOperand(0)); 4619 if (!Src0Reg) 4620 return false; 4621 4622 Register Src1Reg = getRegForValue(I->getOperand(1)); 4623 if (!Src1Reg) 4624 return false; 4625 4626 const TargetRegisterClass *RC = 4627 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4628 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg); 4629 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4630 // The remainder is computed as numerator - (quotient * denominator) using the 4631 // MSUB instruction. 4632 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg); 4633 updateValueMap(I, ResultReg); 4634 return true; 4635 } 4636 4637 bool AArch64FastISel::selectMul(const Instruction *I) { 4638 MVT VT; 4639 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4640 return false; 4641 4642 if (VT.isVector()) 4643 return selectBinaryOp(I, ISD::MUL); 4644 4645 const Value *Src0 = I->getOperand(0); 4646 const Value *Src1 = I->getOperand(1); 4647 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4648 if (C->getValue().isPowerOf2()) 4649 std::swap(Src0, Src1); 4650 4651 // Try to simplify to a shift instruction. 4652 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4653 if (C->getValue().isPowerOf2()) { 4654 uint64_t ShiftVal = C->getValue().logBase2(); 4655 MVT SrcVT = VT; 4656 bool IsZExt = true; 4657 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4658 if (!isIntExtFree(ZExt)) { 4659 MVT VT; 4660 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4661 SrcVT = VT; 4662 IsZExt = true; 4663 Src0 = ZExt->getOperand(0); 4664 } 4665 } 4666 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4667 if (!isIntExtFree(SExt)) { 4668 MVT VT; 4669 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4670 SrcVT = VT; 4671 IsZExt = false; 4672 Src0 = SExt->getOperand(0); 4673 } 4674 } 4675 } 4676 4677 Register Src0Reg = getRegForValue(Src0); 4678 if (!Src0Reg) 4679 return false; 4680 4681 unsigned ResultReg = 4682 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt); 4683 4684 if (ResultReg) { 4685 updateValueMap(I, ResultReg); 4686 return true; 4687 } 4688 } 4689 4690 Register Src0Reg = getRegForValue(I->getOperand(0)); 4691 if (!Src0Reg) 4692 return false; 4693 4694 Register Src1Reg = getRegForValue(I->getOperand(1)); 4695 if (!Src1Reg) 4696 return false; 4697 4698 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg); 4699 4700 if (!ResultReg) 4701 return false; 4702 4703 updateValueMap(I, ResultReg); 4704 return true; 4705 } 4706 4707 bool AArch64FastISel::selectShift(const Instruction *I) { 4708 MVT RetVT; 4709 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4710 return false; 4711 4712 if (RetVT.isVector()) 4713 return selectOperator(I, I->getOpcode()); 4714 4715 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4716 unsigned ResultReg = 0; 4717 uint64_t ShiftVal = C->getZExtValue(); 4718 MVT SrcVT = RetVT; 4719 bool IsZExt = I->getOpcode() != Instruction::AShr; 4720 const Value *Op0 = I->getOperand(0); 4721 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4722 if (!isIntExtFree(ZExt)) { 4723 MVT TmpVT; 4724 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4725 SrcVT = TmpVT; 4726 IsZExt = true; 4727 Op0 = ZExt->getOperand(0); 4728 } 4729 } 4730 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4731 if (!isIntExtFree(SExt)) { 4732 MVT TmpVT; 4733 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4734 SrcVT = TmpVT; 4735 IsZExt = false; 4736 Op0 = SExt->getOperand(0); 4737 } 4738 } 4739 } 4740 4741 Register Op0Reg = getRegForValue(Op0); 4742 if (!Op0Reg) 4743 return false; 4744 4745 switch (I->getOpcode()) { 4746 default: llvm_unreachable("Unexpected instruction."); 4747 case Instruction::Shl: 4748 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4749 break; 4750 case Instruction::AShr: 4751 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4752 break; 4753 case Instruction::LShr: 4754 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4755 break; 4756 } 4757 if (!ResultReg) 4758 return false; 4759 4760 updateValueMap(I, ResultReg); 4761 return true; 4762 } 4763 4764 Register Op0Reg = getRegForValue(I->getOperand(0)); 4765 if (!Op0Reg) 4766 return false; 4767 4768 Register Op1Reg = getRegForValue(I->getOperand(1)); 4769 if (!Op1Reg) 4770 return false; 4771 4772 unsigned ResultReg = 0; 4773 switch (I->getOpcode()) { 4774 default: llvm_unreachable("Unexpected instruction."); 4775 case Instruction::Shl: 4776 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); 4777 break; 4778 case Instruction::AShr: 4779 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); 4780 break; 4781 case Instruction::LShr: 4782 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); 4783 break; 4784 } 4785 4786 if (!ResultReg) 4787 return false; 4788 4789 updateValueMap(I, ResultReg); 4790 return true; 4791 } 4792 4793 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4794 MVT RetVT, SrcVT; 4795 4796 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4797 return false; 4798 if (!isTypeLegal(I->getType(), RetVT)) 4799 return false; 4800 4801 unsigned Opc; 4802 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4803 Opc = AArch64::FMOVWSr; 4804 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4805 Opc = AArch64::FMOVXDr; 4806 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4807 Opc = AArch64::FMOVSWr; 4808 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4809 Opc = AArch64::FMOVDXr; 4810 else 4811 return false; 4812 4813 const TargetRegisterClass *RC = nullptr; 4814 switch (RetVT.SimpleTy) { 4815 default: llvm_unreachable("Unexpected value type."); 4816 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4817 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4818 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4819 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4820 } 4821 Register Op0Reg = getRegForValue(I->getOperand(0)); 4822 if (!Op0Reg) 4823 return false; 4824 4825 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg); 4826 if (!ResultReg) 4827 return false; 4828 4829 updateValueMap(I, ResultReg); 4830 return true; 4831 } 4832 4833 bool AArch64FastISel::selectFRem(const Instruction *I) { 4834 MVT RetVT; 4835 if (!isTypeLegal(I->getType(), RetVT)) 4836 return false; 4837 4838 RTLIB::Libcall LC; 4839 switch (RetVT.SimpleTy) { 4840 default: 4841 return false; 4842 case MVT::f32: 4843 LC = RTLIB::REM_F32; 4844 break; 4845 case MVT::f64: 4846 LC = RTLIB::REM_F64; 4847 break; 4848 } 4849 4850 ArgListTy Args; 4851 Args.reserve(I->getNumOperands()); 4852 4853 // Populate the argument list. 4854 for (auto &Arg : I->operands()) { 4855 ArgListEntry Entry; 4856 Entry.Val = Arg; 4857 Entry.Ty = Arg->getType(); 4858 Args.push_back(Entry); 4859 } 4860 4861 CallLoweringInfo CLI; 4862 MCContext &Ctx = MF->getContext(); 4863 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4864 TLI.getLibcallName(LC), std::move(Args)); 4865 if (!lowerCallTo(CLI)) 4866 return false; 4867 updateValueMap(I, CLI.ResultReg); 4868 return true; 4869 } 4870 4871 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4872 MVT VT; 4873 if (!isTypeLegal(I->getType(), VT)) 4874 return false; 4875 4876 if (!isa<ConstantInt>(I->getOperand(1))) 4877 return selectBinaryOp(I, ISD::SDIV); 4878 4879 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4880 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4881 !(C.isPowerOf2() || C.isNegatedPowerOf2())) 4882 return selectBinaryOp(I, ISD::SDIV); 4883 4884 unsigned Lg2 = C.countr_zero(); 4885 Register Src0Reg = getRegForValue(I->getOperand(0)); 4886 if (!Src0Reg) 4887 return false; 4888 4889 if (cast<BinaryOperator>(I)->isExact()) { 4890 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2); 4891 if (!ResultReg) 4892 return false; 4893 updateValueMap(I, ResultReg); 4894 return true; 4895 } 4896 4897 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4898 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne); 4899 if (!AddReg) 4900 return false; 4901 4902 // (Src0 < 0) ? Pow2 - 1 : 0; 4903 if (!emitICmp_ri(VT, Src0Reg, 0)) 4904 return false; 4905 4906 unsigned SelectOpc; 4907 const TargetRegisterClass *RC; 4908 if (VT == MVT::i64) { 4909 SelectOpc = AArch64::CSELXr; 4910 RC = &AArch64::GPR64RegClass; 4911 } else { 4912 SelectOpc = AArch64::CSELWr; 4913 RC = &AArch64::GPR32RegClass; 4914 } 4915 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg, 4916 AArch64CC::LT); 4917 if (!SelectReg) 4918 return false; 4919 4920 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4921 // negate the result. 4922 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4923 unsigned ResultReg; 4924 if (C.isNegative()) 4925 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg, 4926 AArch64_AM::ASR, Lg2); 4927 else 4928 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2); 4929 4930 if (!ResultReg) 4931 return false; 4932 4933 updateValueMap(I, ResultReg); 4934 return true; 4935 } 4936 4937 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4938 /// have to duplicate it for AArch64, because otherwise we would fail during the 4939 /// sign-extend emission. 4940 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4941 Register IdxN = getRegForValue(Idx); 4942 if (IdxN == 0) 4943 // Unhandled operand. Halt "fast" selection and bail. 4944 return 0; 4945 4946 // If the index is smaller or larger than intptr_t, truncate or extend it. 4947 MVT PtrVT = TLI.getPointerTy(DL); 4948 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4949 if (IdxVT.bitsLT(PtrVT)) { 4950 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 4951 } else if (IdxVT.bitsGT(PtrVT)) 4952 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4953 return IdxN; 4954 } 4955 4956 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4957 /// duplicate it for AArch64, because otherwise we would bail out even for 4958 /// simple cases. This is because the standard fastEmit functions don't cover 4959 /// MUL at all and ADD is lowered very inefficientily. 4960 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4961 if (Subtarget->isTargetILP32()) 4962 return false; 4963 4964 Register N = getRegForValue(I->getOperand(0)); 4965 if (!N) 4966 return false; 4967 4968 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4969 // into a single N = N + TotalOffset. 4970 uint64_t TotalOffs = 0; 4971 MVT VT = TLI.getPointerTy(DL); 4972 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4973 GTI != E; ++GTI) { 4974 const Value *Idx = GTI.getOperand(); 4975 if (auto *StTy = GTI.getStructTypeOrNull()) { 4976 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4977 // N = N + Offset 4978 if (Field) 4979 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4980 } else { 4981 // If this is a constant subscript, handle it quickly. 4982 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4983 if (CI->isZero()) 4984 continue; 4985 // N = N + Offset 4986 TotalOffs += GTI.getSequentialElementStride(DL) * 4987 cast<ConstantInt>(CI)->getSExtValue(); 4988 continue; 4989 } 4990 if (TotalOffs) { 4991 N = emitAdd_ri_(VT, N, TotalOffs); 4992 if (!N) 4993 return false; 4994 TotalOffs = 0; 4995 } 4996 4997 // N = N + Idx * ElementSize; 4998 uint64_t ElementSize = GTI.getSequentialElementStride(DL); 4999 unsigned IdxN = getRegForGEPIndex(Idx); 5000 if (!IdxN) 5001 return false; 5002 5003 if (ElementSize != 1) { 5004 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 5005 if (!C) 5006 return false; 5007 IdxN = emitMul_rr(VT, IdxN, C); 5008 if (!IdxN) 5009 return false; 5010 } 5011 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN); 5012 if (!N) 5013 return false; 5014 } 5015 } 5016 if (TotalOffs) { 5017 N = emitAdd_ri_(VT, N, TotalOffs); 5018 if (!N) 5019 return false; 5020 } 5021 updateValueMap(I, N); 5022 return true; 5023 } 5024 5025 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 5026 assert(TM.getOptLevel() == CodeGenOptLevel::None && 5027 "cmpxchg survived AtomicExpand at optlevel > -O0"); 5028 5029 auto *RetPairTy = cast<StructType>(I->getType()); 5030 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5031 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5032 "cmpxchg has a non-i1 status result"); 5033 5034 MVT VT; 5035 if (!isTypeLegal(RetTy, VT)) 5036 return false; 5037 5038 const TargetRegisterClass *ResRC; 5039 unsigned Opc, CmpOpc; 5040 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5041 // extractvalue selection doesn't support that. 5042 if (VT == MVT::i32) { 5043 Opc = AArch64::CMP_SWAP_32; 5044 CmpOpc = AArch64::SUBSWrs; 5045 ResRC = &AArch64::GPR32RegClass; 5046 } else if (VT == MVT::i64) { 5047 Opc = AArch64::CMP_SWAP_64; 5048 CmpOpc = AArch64::SUBSXrs; 5049 ResRC = &AArch64::GPR64RegClass; 5050 } else { 5051 return false; 5052 } 5053 5054 const MCInstrDesc &II = TII.get(Opc); 5055 5056 const Register AddrReg = constrainOperandRegClass( 5057 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 5058 const Register DesiredReg = constrainOperandRegClass( 5059 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 5060 const Register NewReg = constrainOperandRegClass( 5061 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5062 5063 const Register ResultReg1 = createResultReg(ResRC); 5064 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5065 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5066 5067 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5068 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, II) 5069 .addDef(ResultReg1) 5070 .addDef(ScratchReg) 5071 .addUse(AddrReg) 5072 .addUse(DesiredReg) 5073 .addUse(NewReg); 5074 5075 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(CmpOpc)) 5076 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5077 .addUse(ResultReg1) 5078 .addUse(DesiredReg) 5079 .addImm(0); 5080 5081 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, MIMD, TII.get(AArch64::CSINCWr)) 5082 .addDef(ResultReg2) 5083 .addUse(AArch64::WZR) 5084 .addUse(AArch64::WZR) 5085 .addImm(AArch64CC::NE); 5086 5087 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5088 updateValueMap(I, ResultReg1, 2); 5089 return true; 5090 } 5091 5092 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5093 if (TLI.fallBackToDAGISel(*I)) 5094 return false; 5095 switch (I->getOpcode()) { 5096 default: 5097 break; 5098 case Instruction::Add: 5099 case Instruction::Sub: 5100 return selectAddSub(I); 5101 case Instruction::Mul: 5102 return selectMul(I); 5103 case Instruction::SDiv: 5104 return selectSDiv(I); 5105 case Instruction::SRem: 5106 if (!selectBinaryOp(I, ISD::SREM)) 5107 return selectRem(I, ISD::SREM); 5108 return true; 5109 case Instruction::URem: 5110 if (!selectBinaryOp(I, ISD::UREM)) 5111 return selectRem(I, ISD::UREM); 5112 return true; 5113 case Instruction::Shl: 5114 case Instruction::LShr: 5115 case Instruction::AShr: 5116 return selectShift(I); 5117 case Instruction::And: 5118 case Instruction::Or: 5119 case Instruction::Xor: 5120 return selectLogicalOp(I); 5121 case Instruction::Br: 5122 return selectBranch(I); 5123 case Instruction::IndirectBr: 5124 return selectIndirectBr(I); 5125 case Instruction::BitCast: 5126 if (!FastISel::selectBitCast(I)) 5127 return selectBitCast(I); 5128 return true; 5129 case Instruction::FPToSI: 5130 if (!selectCast(I, ISD::FP_TO_SINT)) 5131 return selectFPToInt(I, /*Signed=*/true); 5132 return true; 5133 case Instruction::FPToUI: 5134 return selectFPToInt(I, /*Signed=*/false); 5135 case Instruction::ZExt: 5136 case Instruction::SExt: 5137 return selectIntExt(I); 5138 case Instruction::Trunc: 5139 if (!selectCast(I, ISD::TRUNCATE)) 5140 return selectTrunc(I); 5141 return true; 5142 case Instruction::FPExt: 5143 return selectFPExt(I); 5144 case Instruction::FPTrunc: 5145 return selectFPTrunc(I); 5146 case Instruction::SIToFP: 5147 if (!selectCast(I, ISD::SINT_TO_FP)) 5148 return selectIntToFP(I, /*Signed=*/true); 5149 return true; 5150 case Instruction::UIToFP: 5151 return selectIntToFP(I, /*Signed=*/false); 5152 case Instruction::Load: 5153 return selectLoad(I); 5154 case Instruction::Store: 5155 return selectStore(I); 5156 case Instruction::FCmp: 5157 case Instruction::ICmp: 5158 return selectCmp(I); 5159 case Instruction::Select: 5160 return selectSelect(I); 5161 case Instruction::Ret: 5162 return selectRet(I); 5163 case Instruction::FRem: 5164 return selectFRem(I); 5165 case Instruction::GetElementPtr: 5166 return selectGetElementPtr(I); 5167 case Instruction::AtomicCmpXchg: 5168 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5169 } 5170 5171 // fall-back to target-independent instruction selection. 5172 return selectOperator(I, I->getOpcode()); 5173 } 5174 5175 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5176 const TargetLibraryInfo *LibInfo) { 5177 5178 SMEAttrs CallerAttrs(*FuncInfo.Fn); 5179 if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() || 5180 CallerAttrs.hasStreamingCompatibleInterface()) 5181 return nullptr; 5182 return new AArch64FastISel(FuncInfo, LibInfo); 5183 } 5184