1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64RegisterInfo.h" 18 #include "AArch64Subtarget.h" 19 #include "MCTargetDesc/AArch64AddressingModes.h" 20 #include "Utils/AArch64BaseInfo.h" 21 #include "llvm/ADT/APFloat.h" 22 #include "llvm/ADT/APInt.h" 23 #include "llvm/ADT/DenseMap.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/Analysis/BranchProbabilityInfo.h" 26 #include "llvm/CodeGen/CallingConvLower.h" 27 #include "llvm/CodeGen/FastISel.h" 28 #include "llvm/CodeGen/FunctionLoweringInfo.h" 29 #include "llvm/CodeGen/ISDOpcodes.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineConstantPool.h" 32 #include "llvm/CodeGen/MachineFrameInfo.h" 33 #include "llvm/CodeGen/MachineInstr.h" 34 #include "llvm/CodeGen/MachineInstrBuilder.h" 35 #include "llvm/CodeGen/MachineMemOperand.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/RuntimeLibcalls.h" 38 #include "llvm/CodeGen/ValueTypes.h" 39 #include "llvm/IR/Argument.h" 40 #include "llvm/IR/Attributes.h" 41 #include "llvm/IR/BasicBlock.h" 42 #include "llvm/IR/CallingConv.h" 43 #include "llvm/IR/Constant.h" 44 #include "llvm/IR/Constants.h" 45 #include "llvm/IR/DataLayout.h" 46 #include "llvm/IR/DerivedTypes.h" 47 #include "llvm/IR/Function.h" 48 #include "llvm/IR/GetElementPtrTypeIterator.h" 49 #include "llvm/IR/GlobalValue.h" 50 #include "llvm/IR/InstrTypes.h" 51 #include "llvm/IR/Instruction.h" 52 #include "llvm/IR/Instructions.h" 53 #include "llvm/IR/IntrinsicInst.h" 54 #include "llvm/IR/Intrinsics.h" 55 #include "llvm/IR/Operator.h" 56 #include "llvm/IR/Type.h" 57 #include "llvm/IR/User.h" 58 #include "llvm/IR/Value.h" 59 #include "llvm/MC/MCInstrDesc.h" 60 #include "llvm/MC/MCRegisterInfo.h" 61 #include "llvm/MC/MCSymbol.h" 62 #include "llvm/Support/AtomicOrdering.h" 63 #include "llvm/Support/Casting.h" 64 #include "llvm/Support/CodeGen.h" 65 #include "llvm/Support/Compiler.h" 66 #include "llvm/Support/ErrorHandling.h" 67 #include "llvm/Support/MachineValueType.h" 68 #include "llvm/Support/MathExtras.h" 69 #include <algorithm> 70 #include <cassert> 71 #include <cstdint> 72 #include <iterator> 73 #include <utility> 74 75 using namespace llvm; 76 77 namespace { 78 79 class AArch64FastISel final : public FastISel { 80 class Address { 81 public: 82 using BaseKind = enum { 83 RegBase, 84 FrameIndexBase 85 }; 86 87 private: 88 BaseKind Kind = RegBase; 89 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 90 union { 91 unsigned Reg; 92 int FI; 93 } Base; 94 unsigned OffsetReg = 0; 95 unsigned Shift = 0; 96 int64_t Offset = 0; 97 const GlobalValue *GV = nullptr; 98 99 public: 100 Address() { Base.Reg = 0; } 101 102 void setKind(BaseKind K) { Kind = K; } 103 BaseKind getKind() const { return Kind; } 104 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 105 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 106 bool isRegBase() const { return Kind == RegBase; } 107 bool isFIBase() const { return Kind == FrameIndexBase; } 108 109 void setReg(unsigned Reg) { 110 assert(isRegBase() && "Invalid base register access!"); 111 Base.Reg = Reg; 112 } 113 114 unsigned getReg() const { 115 assert(isRegBase() && "Invalid base register access!"); 116 return Base.Reg; 117 } 118 119 void setOffsetReg(unsigned Reg) { 120 OffsetReg = Reg; 121 } 122 123 unsigned getOffsetReg() const { 124 return OffsetReg; 125 } 126 127 void setFI(unsigned FI) { 128 assert(isFIBase() && "Invalid base frame index access!"); 129 Base.FI = FI; 130 } 131 132 unsigned getFI() const { 133 assert(isFIBase() && "Invalid base frame index access!"); 134 return Base.FI; 135 } 136 137 void setOffset(int64_t O) { Offset = O; } 138 int64_t getOffset() { return Offset; } 139 void setShift(unsigned S) { Shift = S; } 140 unsigned getShift() { return Shift; } 141 142 void setGlobalValue(const GlobalValue *G) { GV = G; } 143 const GlobalValue *getGlobalValue() { return GV; } 144 }; 145 146 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 147 /// make the right decision when generating code for different targets. 148 const AArch64Subtarget *Subtarget; 149 LLVMContext *Context; 150 151 bool fastLowerArguments() override; 152 bool fastLowerCall(CallLoweringInfo &CLI) override; 153 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 154 155 private: 156 // Selection routines. 157 bool selectAddSub(const Instruction *I); 158 bool selectLogicalOp(const Instruction *I); 159 bool selectLoad(const Instruction *I); 160 bool selectStore(const Instruction *I); 161 bool selectBranch(const Instruction *I); 162 bool selectIndirectBr(const Instruction *I); 163 bool selectCmp(const Instruction *I); 164 bool selectSelect(const Instruction *I); 165 bool selectFPExt(const Instruction *I); 166 bool selectFPTrunc(const Instruction *I); 167 bool selectFPToInt(const Instruction *I, bool Signed); 168 bool selectIntToFP(const Instruction *I, bool Signed); 169 bool selectRem(const Instruction *I, unsigned ISDOpcode); 170 bool selectRet(const Instruction *I); 171 bool selectTrunc(const Instruction *I); 172 bool selectIntExt(const Instruction *I); 173 bool selectMul(const Instruction *I); 174 bool selectShift(const Instruction *I); 175 bool selectBitCast(const Instruction *I); 176 bool selectFRem(const Instruction *I); 177 bool selectSDiv(const Instruction *I); 178 bool selectGetElementPtr(const Instruction *I); 179 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 180 181 // Utility helper routines. 182 bool isTypeLegal(Type *Ty, MVT &VT); 183 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 184 bool isValueAvailable(const Value *V) const; 185 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 186 bool computeCallAddress(const Value *V, Address &Addr); 187 bool simplifyAddress(Address &Addr, MVT VT); 188 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 189 MachineMemOperand::Flags Flags, 190 unsigned ScaleFactor, MachineMemOperand *MMO); 191 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 192 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 193 unsigned Alignment); 194 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 195 const Value *Cond); 196 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 197 bool optimizeSelect(const SelectInst *SI); 198 std::pair<unsigned, bool> getRegForGEPIndex(const Value *Idx); 199 200 // Emit helper routines. 201 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 202 const Value *RHS, bool SetFlags = false, 203 bool WantResult = true, bool IsZExt = false); 204 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 205 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 206 bool SetFlags = false, bool WantResult = true); 207 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 208 bool LHSIsKill, uint64_t Imm, bool SetFlags = false, 209 bool WantResult = true); 210 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 211 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 212 AArch64_AM::ShiftExtendType ShiftType, 213 uint64_t ShiftImm, bool SetFlags = false, 214 bool WantResult = true); 215 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 216 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 217 AArch64_AM::ShiftExtendType ExtType, 218 uint64_t ShiftImm, bool SetFlags = false, 219 bool WantResult = true); 220 221 // Emit functions. 222 bool emitCompareAndBranch(const BranchInst *BI); 223 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 225 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 226 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 227 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 228 MachineMemOperand *MMO = nullptr); 229 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 230 MachineMemOperand *MMO = nullptr); 231 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 232 MachineMemOperand *MMO = nullptr); 233 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 235 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 236 bool SetFlags = false, bool WantResult = true, 237 bool IsZExt = false); 238 unsigned emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, int64_t Imm); 239 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 240 bool SetFlags = false, bool WantResult = true, 241 bool IsZExt = false); 242 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 243 unsigned RHSReg, bool RHSIsKill, bool WantResult = true); 244 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 245 unsigned RHSReg, bool RHSIsKill, 246 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 247 bool WantResult = true); 248 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 249 const Value *RHS); 250 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 251 bool LHSIsKill, uint64_t Imm); 252 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 253 bool LHSIsKill, unsigned RHSReg, bool RHSIsKill, 254 uint64_t ShiftImm); 255 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, uint64_t Imm); 256 unsigned emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 257 unsigned Op1, bool Op1IsKill); 258 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 259 unsigned Op1, bool Op1IsKill); 260 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 261 unsigned Op1, bool Op1IsKill); 262 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 263 unsigned Op1Reg, bool Op1IsKill); 264 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 265 uint64_t Imm, bool IsZExt = true); 266 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 267 unsigned Op1Reg, bool Op1IsKill); 268 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 269 uint64_t Imm, bool IsZExt = true); 270 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 271 unsigned Op1Reg, bool Op1IsKill); 272 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, bool Op0IsKill, 273 uint64_t Imm, bool IsZExt = false); 274 275 unsigned materializeInt(const ConstantInt *CI, MVT VT); 276 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 277 unsigned materializeGV(const GlobalValue *GV); 278 279 // Call handling routines. 280 private: 281 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 282 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 283 unsigned &NumBytes); 284 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 285 286 public: 287 // Backend specific FastISel code. 288 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 289 unsigned fastMaterializeConstant(const Constant *C) override; 290 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 291 292 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 293 const TargetLibraryInfo *LibInfo) 294 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 295 Subtarget = 296 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); 297 Context = &FuncInfo.Fn->getContext(); 298 } 299 300 bool fastSelectInstruction(const Instruction *I) override; 301 302 #include "AArch64GenFastISel.inc" 303 }; 304 305 } // end anonymous namespace 306 307 /// Check if the sign-/zero-extend will be a noop. 308 static bool isIntExtFree(const Instruction *I) { 309 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 310 "Unexpected integer extend instruction."); 311 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 312 "Unexpected value type."); 313 bool IsZExt = isa<ZExtInst>(I); 314 315 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 316 if (LI->hasOneUse()) 317 return true; 318 319 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 320 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 321 return true; 322 323 return false; 324 } 325 326 /// Determine the implicit scale factor that is applied by a memory 327 /// operation for a given value type. 328 static unsigned getImplicitScaleFactor(MVT VT) { 329 switch (VT.SimpleTy) { 330 default: 331 return 0; // invalid 332 case MVT::i1: // fall-through 333 case MVT::i8: 334 return 1; 335 case MVT::i16: 336 return 2; 337 case MVT::i32: // fall-through 338 case MVT::f32: 339 return 4; 340 case MVT::i64: // fall-through 341 case MVT::f64: 342 return 8; 343 } 344 } 345 346 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 347 if (CC == CallingConv::WebKit_JS) 348 return CC_AArch64_WebKit_JS; 349 if (CC == CallingConv::GHC) 350 return CC_AArch64_GHC; 351 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 352 } 353 354 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 355 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 356 "Alloca should always return a pointer."); 357 358 // Don't handle dynamic allocas. 359 if (!FuncInfo.StaticAllocaMap.count(AI)) 360 return 0; 361 362 DenseMap<const AllocaInst *, int>::iterator SI = 363 FuncInfo.StaticAllocaMap.find(AI); 364 365 if (SI != FuncInfo.StaticAllocaMap.end()) { 366 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 367 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 368 ResultReg) 369 .addFrameIndex(SI->second) 370 .addImm(0) 371 .addImm(0); 372 return ResultReg; 373 } 374 375 return 0; 376 } 377 378 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 379 if (VT > MVT::i64) 380 return 0; 381 382 if (!CI->isZero()) 383 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 384 385 // Create a copy from the zero register to materialize a "0" value. 386 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 387 : &AArch64::GPR32RegClass; 388 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 389 unsigned ResultReg = createResultReg(RC); 390 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 391 ResultReg).addReg(ZeroReg, getKillRegState(true)); 392 return ResultReg; 393 } 394 395 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 396 // Positive zero (+0.0) has to be materialized with a fmov from the zero 397 // register, because the immediate version of fmov cannot encode zero. 398 if (CFP->isNullValue()) 399 return fastMaterializeFloatZero(CFP); 400 401 if (VT != MVT::f32 && VT != MVT::f64) 402 return 0; 403 404 const APFloat Val = CFP->getValueAPF(); 405 bool Is64Bit = (VT == MVT::f64); 406 // This checks to see if we can use FMOV instructions to materialize 407 // a constant, otherwise we have to materialize via the constant pool. 408 int Imm = 409 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 410 if (Imm != -1) { 411 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 412 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 413 } 414 415 // For the MachO large code model materialize the FP constant in code. 416 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) { 417 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 418 const TargetRegisterClass *RC = Is64Bit ? 419 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 420 421 unsigned TmpReg = createResultReg(RC); 422 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) 423 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 424 425 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 426 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 427 TII.get(TargetOpcode::COPY), ResultReg) 428 .addReg(TmpReg, getKillRegState(true)); 429 430 return ResultReg; 431 } 432 433 // Materialize via constant pool. MachineConstantPool wants an explicit 434 // alignment. 435 unsigned Align = DL.getPrefTypeAlignment(CFP->getType()); 436 if (Align == 0) 437 Align = DL.getTypeAllocSize(CFP->getType()); 438 439 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 440 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 442 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 443 444 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 445 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 446 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 447 .addReg(ADRPReg) 448 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 449 return ResultReg; 450 } 451 452 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 453 // We can't handle thread-local variables quickly yet. 454 if (GV->isThreadLocal()) 455 return 0; 456 457 // MachO still uses GOT for large code-model accesses, but ELF requires 458 // movz/movk sequences, which FastISel doesn't handle yet. 459 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 460 return 0; 461 462 unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 463 464 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 465 if (!DestEVT.isSimple()) 466 return 0; 467 468 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 469 unsigned ResultReg; 470 471 if (OpFlags & AArch64II::MO_GOT) { 472 // ADRP + LDRX 473 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 474 ADRPReg) 475 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 476 477 ResultReg = createResultReg(&AArch64::GPR64RegClass); 478 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::LDRXui), 479 ResultReg) 480 .addReg(ADRPReg) 481 .addGlobalAddress(GV, 0, 482 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags); 483 } else { 484 // ADRP + ADDX 485 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 486 ADRPReg) 487 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 488 489 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 490 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 491 ResultReg) 492 .addReg(ADRPReg) 493 .addGlobalAddress(GV, 0, 494 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 495 .addImm(0); 496 } 497 return ResultReg; 498 } 499 500 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 501 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 502 503 // Only handle simple types. 504 if (!CEVT.isSimple()) 505 return 0; 506 MVT VT = CEVT.getSimpleVT(); 507 508 if (const auto *CI = dyn_cast<ConstantInt>(C)) 509 return materializeInt(CI, VT); 510 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 511 return materializeFP(CFP, VT); 512 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 513 return materializeGV(GV); 514 515 return 0; 516 } 517 518 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 519 assert(CFP->isNullValue() && 520 "Floating-point constant is not a positive zero."); 521 MVT VT; 522 if (!isTypeLegal(CFP->getType(), VT)) 523 return 0; 524 525 if (VT != MVT::f32 && VT != MVT::f64) 526 return 0; 527 528 bool Is64Bit = (VT == MVT::f64); 529 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 530 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 531 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg, /*IsKill=*/true); 532 } 533 534 /// Check if the multiply is by a power-of-2 constant. 535 static bool isMulPowOf2(const Value *I) { 536 if (const auto *MI = dyn_cast<MulOperator>(I)) { 537 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 538 if (C->getValue().isPowerOf2()) 539 return true; 540 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 541 if (C->getValue().isPowerOf2()) 542 return true; 543 } 544 return false; 545 } 546 547 // Computes the address to get to an object. 548 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 549 { 550 const User *U = nullptr; 551 unsigned Opcode = Instruction::UserOp1; 552 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 553 // Don't walk into other basic blocks unless the object is an alloca from 554 // another block, otherwise it may not have a virtual register assigned. 555 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 556 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 557 Opcode = I->getOpcode(); 558 U = I; 559 } 560 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 561 Opcode = C->getOpcode(); 562 U = C; 563 } 564 565 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 566 if (Ty->getAddressSpace() > 255) 567 // Fast instruction selection doesn't support the special 568 // address spaces. 569 return false; 570 571 switch (Opcode) { 572 default: 573 break; 574 case Instruction::BitCast: 575 // Look through bitcasts. 576 return computeAddress(U->getOperand(0), Addr, Ty); 577 578 case Instruction::IntToPtr: 579 // Look past no-op inttoptrs. 580 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 581 TLI.getPointerTy(DL)) 582 return computeAddress(U->getOperand(0), Addr, Ty); 583 break; 584 585 case Instruction::PtrToInt: 586 // Look past no-op ptrtoints. 587 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 588 return computeAddress(U->getOperand(0), Addr, Ty); 589 break; 590 591 case Instruction::GetElementPtr: { 592 Address SavedAddr = Addr; 593 uint64_t TmpOffset = Addr.getOffset(); 594 595 // Iterate through the GEP folding the constants into offsets where 596 // we can. 597 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 598 GTI != E; ++GTI) { 599 const Value *Op = GTI.getOperand(); 600 if (StructType *STy = GTI.getStructTypeOrNull()) { 601 const StructLayout *SL = DL.getStructLayout(STy); 602 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 603 TmpOffset += SL->getElementOffset(Idx); 604 } else { 605 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 606 while (true) { 607 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 608 // Constant-offset addressing. 609 TmpOffset += CI->getSExtValue() * S; 610 break; 611 } 612 if (canFoldAddIntoGEP(U, Op)) { 613 // A compatible add with a constant operand. Fold the constant. 614 ConstantInt *CI = 615 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 616 TmpOffset += CI->getSExtValue() * S; 617 // Iterate on the other operand. 618 Op = cast<AddOperator>(Op)->getOperand(0); 619 continue; 620 } 621 // Unsupported 622 goto unsupported_gep; 623 } 624 } 625 } 626 627 // Try to grab the base operand now. 628 Addr.setOffset(TmpOffset); 629 if (computeAddress(U->getOperand(0), Addr, Ty)) 630 return true; 631 632 // We failed, restore everything and try the other options. 633 Addr = SavedAddr; 634 635 unsupported_gep: 636 break; 637 } 638 case Instruction::Alloca: { 639 const AllocaInst *AI = cast<AllocaInst>(Obj); 640 DenseMap<const AllocaInst *, int>::iterator SI = 641 FuncInfo.StaticAllocaMap.find(AI); 642 if (SI != FuncInfo.StaticAllocaMap.end()) { 643 Addr.setKind(Address::FrameIndexBase); 644 Addr.setFI(SI->second); 645 return true; 646 } 647 break; 648 } 649 case Instruction::Add: { 650 // Adds of constants are common and easy enough. 651 const Value *LHS = U->getOperand(0); 652 const Value *RHS = U->getOperand(1); 653 654 if (isa<ConstantInt>(LHS)) 655 std::swap(LHS, RHS); 656 657 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 658 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 659 return computeAddress(LHS, Addr, Ty); 660 } 661 662 Address Backup = Addr; 663 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 664 return true; 665 Addr = Backup; 666 667 break; 668 } 669 case Instruction::Sub: { 670 // Subs of constants are common and easy enough. 671 const Value *LHS = U->getOperand(0); 672 const Value *RHS = U->getOperand(1); 673 674 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 675 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 676 return computeAddress(LHS, Addr, Ty); 677 } 678 break; 679 } 680 case Instruction::Shl: { 681 if (Addr.getOffsetReg()) 682 break; 683 684 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 685 if (!CI) 686 break; 687 688 unsigned Val = CI->getZExtValue(); 689 if (Val < 1 || Val > 3) 690 break; 691 692 uint64_t NumBytes = 0; 693 if (Ty && Ty->isSized()) { 694 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 695 NumBytes = NumBits / 8; 696 if (!isPowerOf2_64(NumBits)) 697 NumBytes = 0; 698 } 699 700 if (NumBytes != (1ULL << Val)) 701 break; 702 703 Addr.setShift(Val); 704 Addr.setExtendType(AArch64_AM::LSL); 705 706 const Value *Src = U->getOperand(0); 707 if (const auto *I = dyn_cast<Instruction>(Src)) { 708 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 709 // Fold the zext or sext when it won't become a noop. 710 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 711 if (!isIntExtFree(ZE) && 712 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 713 Addr.setExtendType(AArch64_AM::UXTW); 714 Src = ZE->getOperand(0); 715 } 716 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 717 if (!isIntExtFree(SE) && 718 SE->getOperand(0)->getType()->isIntegerTy(32)) { 719 Addr.setExtendType(AArch64_AM::SXTW); 720 Src = SE->getOperand(0); 721 } 722 } 723 } 724 } 725 726 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 727 if (AI->getOpcode() == Instruction::And) { 728 const Value *LHS = AI->getOperand(0); 729 const Value *RHS = AI->getOperand(1); 730 731 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 732 if (C->getValue() == 0xffffffff) 733 std::swap(LHS, RHS); 734 735 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 736 if (C->getValue() == 0xffffffff) { 737 Addr.setExtendType(AArch64_AM::UXTW); 738 unsigned Reg = getRegForValue(LHS); 739 if (!Reg) 740 return false; 741 bool RegIsKill = hasTrivialKill(LHS); 742 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 743 AArch64::sub_32); 744 Addr.setOffsetReg(Reg); 745 return true; 746 } 747 } 748 749 unsigned Reg = getRegForValue(Src); 750 if (!Reg) 751 return false; 752 Addr.setOffsetReg(Reg); 753 return true; 754 } 755 case Instruction::Mul: { 756 if (Addr.getOffsetReg()) 757 break; 758 759 if (!isMulPowOf2(U)) 760 break; 761 762 const Value *LHS = U->getOperand(0); 763 const Value *RHS = U->getOperand(1); 764 765 // Canonicalize power-of-2 value to the RHS. 766 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 767 if (C->getValue().isPowerOf2()) 768 std::swap(LHS, RHS); 769 770 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 771 const auto *C = cast<ConstantInt>(RHS); 772 unsigned Val = C->getValue().logBase2(); 773 if (Val < 1 || Val > 3) 774 break; 775 776 uint64_t NumBytes = 0; 777 if (Ty && Ty->isSized()) { 778 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 779 NumBytes = NumBits / 8; 780 if (!isPowerOf2_64(NumBits)) 781 NumBytes = 0; 782 } 783 784 if (NumBytes != (1ULL << Val)) 785 break; 786 787 Addr.setShift(Val); 788 Addr.setExtendType(AArch64_AM::LSL); 789 790 const Value *Src = LHS; 791 if (const auto *I = dyn_cast<Instruction>(Src)) { 792 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 793 // Fold the zext or sext when it won't become a noop. 794 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 795 if (!isIntExtFree(ZE) && 796 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 797 Addr.setExtendType(AArch64_AM::UXTW); 798 Src = ZE->getOperand(0); 799 } 800 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 801 if (!isIntExtFree(SE) && 802 SE->getOperand(0)->getType()->isIntegerTy(32)) { 803 Addr.setExtendType(AArch64_AM::SXTW); 804 Src = SE->getOperand(0); 805 } 806 } 807 } 808 } 809 810 unsigned Reg = getRegForValue(Src); 811 if (!Reg) 812 return false; 813 Addr.setOffsetReg(Reg); 814 return true; 815 } 816 case Instruction::And: { 817 if (Addr.getOffsetReg()) 818 break; 819 820 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 821 break; 822 823 const Value *LHS = U->getOperand(0); 824 const Value *RHS = U->getOperand(1); 825 826 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 827 if (C->getValue() == 0xffffffff) 828 std::swap(LHS, RHS); 829 830 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 831 if (C->getValue() == 0xffffffff) { 832 Addr.setShift(0); 833 Addr.setExtendType(AArch64_AM::LSL); 834 Addr.setExtendType(AArch64_AM::UXTW); 835 836 unsigned Reg = getRegForValue(LHS); 837 if (!Reg) 838 return false; 839 bool RegIsKill = hasTrivialKill(LHS); 840 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, RegIsKill, 841 AArch64::sub_32); 842 Addr.setOffsetReg(Reg); 843 return true; 844 } 845 break; 846 } 847 case Instruction::SExt: 848 case Instruction::ZExt: { 849 if (!Addr.getReg() || Addr.getOffsetReg()) 850 break; 851 852 const Value *Src = nullptr; 853 // Fold the zext or sext when it won't become a noop. 854 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 855 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 856 Addr.setExtendType(AArch64_AM::UXTW); 857 Src = ZE->getOperand(0); 858 } 859 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 860 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 861 Addr.setExtendType(AArch64_AM::SXTW); 862 Src = SE->getOperand(0); 863 } 864 } 865 866 if (!Src) 867 break; 868 869 Addr.setShift(0); 870 unsigned Reg = getRegForValue(Src); 871 if (!Reg) 872 return false; 873 Addr.setOffsetReg(Reg); 874 return true; 875 } 876 } // end switch 877 878 if (Addr.isRegBase() && !Addr.getReg()) { 879 unsigned Reg = getRegForValue(Obj); 880 if (!Reg) 881 return false; 882 Addr.setReg(Reg); 883 return true; 884 } 885 886 if (!Addr.getOffsetReg()) { 887 unsigned Reg = getRegForValue(Obj); 888 if (!Reg) 889 return false; 890 Addr.setOffsetReg(Reg); 891 return true; 892 } 893 894 return false; 895 } 896 897 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 898 const User *U = nullptr; 899 unsigned Opcode = Instruction::UserOp1; 900 bool InMBB = true; 901 902 if (const auto *I = dyn_cast<Instruction>(V)) { 903 Opcode = I->getOpcode(); 904 U = I; 905 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 906 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 907 Opcode = C->getOpcode(); 908 U = C; 909 } 910 911 switch (Opcode) { 912 default: break; 913 case Instruction::BitCast: 914 // Look past bitcasts if its operand is in the same BB. 915 if (InMBB) 916 return computeCallAddress(U->getOperand(0), Addr); 917 break; 918 case Instruction::IntToPtr: 919 // Look past no-op inttoptrs if its operand is in the same BB. 920 if (InMBB && 921 TLI.getValueType(DL, U->getOperand(0)->getType()) == 922 TLI.getPointerTy(DL)) 923 return computeCallAddress(U->getOperand(0), Addr); 924 break; 925 case Instruction::PtrToInt: 926 // Look past no-op ptrtoints if its operand is in the same BB. 927 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 928 return computeCallAddress(U->getOperand(0), Addr); 929 break; 930 } 931 932 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 933 Addr.setGlobalValue(GV); 934 return true; 935 } 936 937 // If all else fails, try to materialize the value in a register. 938 if (!Addr.getGlobalValue()) { 939 Addr.setReg(getRegForValue(V)); 940 return Addr.getReg() != 0; 941 } 942 943 return false; 944 } 945 946 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 947 EVT evt = TLI.getValueType(DL, Ty, true); 948 949 // Only handle simple types. 950 if (evt == MVT::Other || !evt.isSimple()) 951 return false; 952 VT = evt.getSimpleVT(); 953 954 // This is a legal type, but it's not something we handle in fast-isel. 955 if (VT == MVT::f128) 956 return false; 957 958 // Handle all other legal types, i.e. a register that will directly hold this 959 // value. 960 return TLI.isTypeLegal(VT); 961 } 962 963 /// Determine if the value type is supported by FastISel. 964 /// 965 /// FastISel for AArch64 can handle more value types than are legal. This adds 966 /// simple value type such as i1, i8, and i16. 967 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 968 if (Ty->isVectorTy() && !IsVectorAllowed) 969 return false; 970 971 if (isTypeLegal(Ty, VT)) 972 return true; 973 974 // If this is a type than can be sign or zero-extended to a basic operation 975 // go ahead and accept it now. 976 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 977 return true; 978 979 return false; 980 } 981 982 bool AArch64FastISel::isValueAvailable(const Value *V) const { 983 if (!isa<Instruction>(V)) 984 return true; 985 986 const auto *I = cast<Instruction>(V); 987 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 988 } 989 990 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 991 unsigned ScaleFactor = getImplicitScaleFactor(VT); 992 if (!ScaleFactor) 993 return false; 994 995 bool ImmediateOffsetNeedsLowering = false; 996 bool RegisterOffsetNeedsLowering = false; 997 int64_t Offset = Addr.getOffset(); 998 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 999 ImmediateOffsetNeedsLowering = true; 1000 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1001 !isUInt<12>(Offset / ScaleFactor)) 1002 ImmediateOffsetNeedsLowering = true; 1003 1004 // Cannot encode an offset register and an immediate offset in the same 1005 // instruction. Fold the immediate offset into the load/store instruction and 1006 // emit an additional add to take care of the offset register. 1007 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1008 RegisterOffsetNeedsLowering = true; 1009 1010 // Cannot encode zero register as base. 1011 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1012 RegisterOffsetNeedsLowering = true; 1013 1014 // If this is a stack pointer and the offset needs to be simplified then put 1015 // the alloca address into a register, set the base type back to register and 1016 // continue. This should almost never happen. 1017 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1018 { 1019 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1020 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 1021 ResultReg) 1022 .addFrameIndex(Addr.getFI()) 1023 .addImm(0) 1024 .addImm(0); 1025 Addr.setKind(Address::RegBase); 1026 Addr.setReg(ResultReg); 1027 } 1028 1029 if (RegisterOffsetNeedsLowering) { 1030 unsigned ResultReg = 0; 1031 if (Addr.getReg()) { 1032 if (Addr.getExtendType() == AArch64_AM::SXTW || 1033 Addr.getExtendType() == AArch64_AM::UXTW ) 1034 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1035 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1036 /*TODO:IsKill=*/false, Addr.getExtendType(), 1037 Addr.getShift()); 1038 else 1039 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1040 /*TODO:IsKill=*/false, Addr.getOffsetReg(), 1041 /*TODO:IsKill=*/false, AArch64_AM::LSL, 1042 Addr.getShift()); 1043 } else { 1044 if (Addr.getExtendType() == AArch64_AM::UXTW) 1045 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1046 /*Op0IsKill=*/false, Addr.getShift(), 1047 /*IsZExt=*/true); 1048 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1049 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1050 /*Op0IsKill=*/false, Addr.getShift(), 1051 /*IsZExt=*/false); 1052 else 1053 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1054 /*Op0IsKill=*/false, Addr.getShift()); 1055 } 1056 if (!ResultReg) 1057 return false; 1058 1059 Addr.setReg(ResultReg); 1060 Addr.setOffsetReg(0); 1061 Addr.setShift(0); 1062 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1063 } 1064 1065 // Since the offset is too large for the load/store instruction get the 1066 // reg+offset into a register. 1067 if (ImmediateOffsetNeedsLowering) { 1068 unsigned ResultReg; 1069 if (Addr.getReg()) 1070 // Try to fold the immediate into the add instruction. 1071 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), /*IsKill=*/false, Offset); 1072 else 1073 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1074 1075 if (!ResultReg) 1076 return false; 1077 Addr.setReg(ResultReg); 1078 Addr.setOffset(0); 1079 } 1080 return true; 1081 } 1082 1083 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1084 const MachineInstrBuilder &MIB, 1085 MachineMemOperand::Flags Flags, 1086 unsigned ScaleFactor, 1087 MachineMemOperand *MMO) { 1088 int64_t Offset = Addr.getOffset() / ScaleFactor; 1089 // Frame base works a bit differently. Handle it separately. 1090 if (Addr.isFIBase()) { 1091 int FI = Addr.getFI(); 1092 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1093 // and alignment should be based on the VT. 1094 MMO = FuncInfo.MF->getMachineMemOperand( 1095 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1096 MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); 1097 // Now add the rest of the operands. 1098 MIB.addFrameIndex(FI).addImm(Offset); 1099 } else { 1100 assert(Addr.isRegBase() && "Unexpected address kind."); 1101 const MCInstrDesc &II = MIB->getDesc(); 1102 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1103 Addr.setReg( 1104 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1105 Addr.setOffsetReg( 1106 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1107 if (Addr.getOffsetReg()) { 1108 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1109 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1110 Addr.getExtendType() == AArch64_AM::SXTX; 1111 MIB.addReg(Addr.getReg()); 1112 MIB.addReg(Addr.getOffsetReg()); 1113 MIB.addImm(IsSigned); 1114 MIB.addImm(Addr.getShift() != 0); 1115 } else 1116 MIB.addReg(Addr.getReg()).addImm(Offset); 1117 } 1118 1119 if (MMO) 1120 MIB.addMemOperand(MMO); 1121 } 1122 1123 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1124 const Value *RHS, bool SetFlags, 1125 bool WantResult, bool IsZExt) { 1126 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1127 bool NeedExtend = false; 1128 switch (RetVT.SimpleTy) { 1129 default: 1130 return 0; 1131 case MVT::i1: 1132 NeedExtend = true; 1133 break; 1134 case MVT::i8: 1135 NeedExtend = true; 1136 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1137 break; 1138 case MVT::i16: 1139 NeedExtend = true; 1140 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1141 break; 1142 case MVT::i32: // fall-through 1143 case MVT::i64: 1144 break; 1145 } 1146 MVT SrcVT = RetVT; 1147 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1148 1149 // Canonicalize immediates to the RHS first. 1150 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1151 std::swap(LHS, RHS); 1152 1153 // Canonicalize mul by power of 2 to the RHS. 1154 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1155 if (isMulPowOf2(LHS)) 1156 std::swap(LHS, RHS); 1157 1158 // Canonicalize shift immediate to the RHS. 1159 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1160 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1161 if (isa<ConstantInt>(SI->getOperand(1))) 1162 if (SI->getOpcode() == Instruction::Shl || 1163 SI->getOpcode() == Instruction::LShr || 1164 SI->getOpcode() == Instruction::AShr ) 1165 std::swap(LHS, RHS); 1166 1167 unsigned LHSReg = getRegForValue(LHS); 1168 if (!LHSReg) 1169 return 0; 1170 bool LHSIsKill = hasTrivialKill(LHS); 1171 1172 if (NeedExtend) 1173 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1174 1175 unsigned ResultReg = 0; 1176 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1177 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1178 if (C->isNegative()) 1179 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, LHSIsKill, -Imm, 1180 SetFlags, WantResult); 1181 else 1182 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, Imm, SetFlags, 1183 WantResult); 1184 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1185 if (C->isNullValue()) 1186 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, LHSIsKill, 0, SetFlags, 1187 WantResult); 1188 1189 if (ResultReg) 1190 return ResultReg; 1191 1192 // Only extend the RHS within the instruction if there is a valid extend type. 1193 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1194 isValueAvailable(RHS)) { 1195 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1196 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1197 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1198 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1199 if (!RHSReg) 1200 return 0; 1201 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1202 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1203 RHSIsKill, ExtendType, C->getZExtValue(), 1204 SetFlags, WantResult); 1205 } 1206 unsigned RHSReg = getRegForValue(RHS); 1207 if (!RHSReg) 1208 return 0; 1209 bool RHSIsKill = hasTrivialKill(RHS); 1210 return emitAddSub_rx(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1211 ExtendType, 0, SetFlags, WantResult); 1212 } 1213 1214 // Check if the mul can be folded into the instruction. 1215 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1216 if (isMulPowOf2(RHS)) { 1217 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1218 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1219 1220 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1221 if (C->getValue().isPowerOf2()) 1222 std::swap(MulLHS, MulRHS); 1223 1224 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1225 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1226 unsigned RHSReg = getRegForValue(MulLHS); 1227 if (!RHSReg) 1228 return 0; 1229 bool RHSIsKill = hasTrivialKill(MulLHS); 1230 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1231 RHSIsKill, AArch64_AM::LSL, ShiftVal, SetFlags, 1232 WantResult); 1233 if (ResultReg) 1234 return ResultReg; 1235 } 1236 } 1237 1238 // Check if the shift can be folded into the instruction. 1239 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1240 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1241 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1242 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1243 switch (SI->getOpcode()) { 1244 default: break; 1245 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1246 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1247 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1248 } 1249 uint64_t ShiftVal = C->getZExtValue(); 1250 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1251 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1252 if (!RHSReg) 1253 return 0; 1254 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1255 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, 1256 RHSIsKill, ShiftType, ShiftVal, SetFlags, 1257 WantResult); 1258 if (ResultReg) 1259 return ResultReg; 1260 } 1261 } 1262 } 1263 } 1264 1265 unsigned RHSReg = getRegForValue(RHS); 1266 if (!RHSReg) 1267 return 0; 1268 bool RHSIsKill = hasTrivialKill(RHS); 1269 1270 if (NeedExtend) 1271 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1272 1273 return emitAddSub_rr(UseAdd, RetVT, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1274 SetFlags, WantResult); 1275 } 1276 1277 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1278 bool LHSIsKill, unsigned RHSReg, 1279 bool RHSIsKill, bool SetFlags, 1280 bool WantResult) { 1281 assert(LHSReg && RHSReg && "Invalid register number."); 1282 1283 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1284 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1285 return 0; 1286 1287 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1288 return 0; 1289 1290 static const unsigned OpcTable[2][2][2] = { 1291 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1292 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1293 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1294 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1295 }; 1296 bool Is64Bit = RetVT == MVT::i64; 1297 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1298 const TargetRegisterClass *RC = 1299 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1300 unsigned ResultReg; 1301 if (WantResult) 1302 ResultReg = createResultReg(RC); 1303 else 1304 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1305 1306 const MCInstrDesc &II = TII.get(Opc); 1307 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1308 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1309 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1310 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1311 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1312 return ResultReg; 1313 } 1314 1315 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1316 bool LHSIsKill, uint64_t Imm, 1317 bool SetFlags, bool WantResult) { 1318 assert(LHSReg && "Invalid register number."); 1319 1320 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1321 return 0; 1322 1323 unsigned ShiftImm; 1324 if (isUInt<12>(Imm)) 1325 ShiftImm = 0; 1326 else if ((Imm & 0xfff000) == Imm) { 1327 ShiftImm = 12; 1328 Imm >>= 12; 1329 } else 1330 return 0; 1331 1332 static const unsigned OpcTable[2][2][2] = { 1333 { { AArch64::SUBWri, AArch64::SUBXri }, 1334 { AArch64::ADDWri, AArch64::ADDXri } }, 1335 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1336 { AArch64::ADDSWri, AArch64::ADDSXri } } 1337 }; 1338 bool Is64Bit = RetVT == MVT::i64; 1339 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1340 const TargetRegisterClass *RC; 1341 if (SetFlags) 1342 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1343 else 1344 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1345 unsigned ResultReg; 1346 if (WantResult) 1347 ResultReg = createResultReg(RC); 1348 else 1349 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1350 1351 const MCInstrDesc &II = TII.get(Opc); 1352 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1353 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1354 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1355 .addImm(Imm) 1356 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1357 return ResultReg; 1358 } 1359 1360 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1361 bool LHSIsKill, unsigned RHSReg, 1362 bool RHSIsKill, 1363 AArch64_AM::ShiftExtendType ShiftType, 1364 uint64_t ShiftImm, bool SetFlags, 1365 bool WantResult) { 1366 assert(LHSReg && RHSReg && "Invalid register number."); 1367 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1368 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1369 1370 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1371 return 0; 1372 1373 // Don't deal with undefined shifts. 1374 if (ShiftImm >= RetVT.getSizeInBits()) 1375 return 0; 1376 1377 static const unsigned OpcTable[2][2][2] = { 1378 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1379 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1380 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1381 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1382 }; 1383 bool Is64Bit = RetVT == MVT::i64; 1384 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1385 const TargetRegisterClass *RC = 1386 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1387 unsigned ResultReg; 1388 if (WantResult) 1389 ResultReg = createResultReg(RC); 1390 else 1391 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1392 1393 const MCInstrDesc &II = TII.get(Opc); 1394 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1395 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1396 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1397 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1398 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1399 .addImm(getShifterImm(ShiftType, ShiftImm)); 1400 return ResultReg; 1401 } 1402 1403 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1404 bool LHSIsKill, unsigned RHSReg, 1405 bool RHSIsKill, 1406 AArch64_AM::ShiftExtendType ExtType, 1407 uint64_t ShiftImm, bool SetFlags, 1408 bool WantResult) { 1409 assert(LHSReg && RHSReg && "Invalid register number."); 1410 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1411 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1412 1413 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1414 return 0; 1415 1416 if (ShiftImm >= 4) 1417 return 0; 1418 1419 static const unsigned OpcTable[2][2][2] = { 1420 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1421 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1422 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1423 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1424 }; 1425 bool Is64Bit = RetVT == MVT::i64; 1426 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1427 const TargetRegisterClass *RC = nullptr; 1428 if (SetFlags) 1429 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1430 else 1431 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1432 unsigned ResultReg; 1433 if (WantResult) 1434 ResultReg = createResultReg(RC); 1435 else 1436 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1437 1438 const MCInstrDesc &II = TII.get(Opc); 1439 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1440 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1442 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1443 .addReg(RHSReg, getKillRegState(RHSIsKill)) 1444 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1445 return ResultReg; 1446 } 1447 1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1449 Type *Ty = LHS->getType(); 1450 EVT EVT = TLI.getValueType(DL, Ty, true); 1451 if (!EVT.isSimple()) 1452 return false; 1453 MVT VT = EVT.getSimpleVT(); 1454 1455 switch (VT.SimpleTy) { 1456 default: 1457 return false; 1458 case MVT::i1: 1459 case MVT::i8: 1460 case MVT::i16: 1461 case MVT::i32: 1462 case MVT::i64: 1463 return emitICmp(VT, LHS, RHS, IsZExt); 1464 case MVT::f32: 1465 case MVT::f64: 1466 return emitFCmp(VT, LHS, RHS); 1467 } 1468 } 1469 1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1471 bool IsZExt) { 1472 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1473 IsZExt) != 0; 1474 } 1475 1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1477 uint64_t Imm) { 1478 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, Imm, 1479 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1480 } 1481 1482 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1483 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1484 return false; 1485 1486 // Check to see if the 2nd operand is a constant that we can encode directly 1487 // in the compare. 1488 bool UseImm = false; 1489 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1490 if (CFP->isZero() && !CFP->isNegative()) 1491 UseImm = true; 1492 1493 unsigned LHSReg = getRegForValue(LHS); 1494 if (!LHSReg) 1495 return false; 1496 bool LHSIsKill = hasTrivialKill(LHS); 1497 1498 if (UseImm) { 1499 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1500 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1501 .addReg(LHSReg, getKillRegState(LHSIsKill)); 1502 return true; 1503 } 1504 1505 unsigned RHSReg = getRegForValue(RHS); 1506 if (!RHSReg) 1507 return false; 1508 bool RHSIsKill = hasTrivialKill(RHS); 1509 1510 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1512 .addReg(LHSReg, getKillRegState(LHSIsKill)) 1513 .addReg(RHSReg, getKillRegState(RHSIsKill)); 1514 return true; 1515 } 1516 1517 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1518 bool SetFlags, bool WantResult, bool IsZExt) { 1519 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1520 IsZExt); 1521 } 1522 1523 /// This method is a wrapper to simplify add emission. 1524 /// 1525 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1526 /// that fails, then try to materialize the immediate into a register and use 1527 /// emitAddSub_rr instead. 1528 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, bool Op0IsKill, 1529 int64_t Imm) { 1530 unsigned ResultReg; 1531 if (Imm < 0) 1532 ResultReg = emitAddSub_ri(false, VT, Op0, Op0IsKill, -Imm); 1533 else 1534 ResultReg = emitAddSub_ri(true, VT, Op0, Op0IsKill, Imm); 1535 1536 if (ResultReg) 1537 return ResultReg; 1538 1539 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1540 if (!CReg) 1541 return 0; 1542 1543 ResultReg = emitAddSub_rr(true, VT, Op0, Op0IsKill, CReg, true); 1544 return ResultReg; 1545 } 1546 1547 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1548 bool SetFlags, bool WantResult, bool IsZExt) { 1549 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1550 IsZExt); 1551 } 1552 1553 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1554 bool LHSIsKill, unsigned RHSReg, 1555 bool RHSIsKill, bool WantResult) { 1556 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1557 RHSIsKill, /*SetFlags=*/true, WantResult); 1558 } 1559 1560 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1561 bool LHSIsKill, unsigned RHSReg, 1562 bool RHSIsKill, 1563 AArch64_AM::ShiftExtendType ShiftType, 1564 uint64_t ShiftImm, bool WantResult) { 1565 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, LHSIsKill, RHSReg, 1566 RHSIsKill, ShiftType, ShiftImm, /*SetFlags=*/true, 1567 WantResult); 1568 } 1569 1570 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1571 const Value *LHS, const Value *RHS) { 1572 // Canonicalize immediates to the RHS first. 1573 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1574 std::swap(LHS, RHS); 1575 1576 // Canonicalize mul by power-of-2 to the RHS. 1577 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1578 if (isMulPowOf2(LHS)) 1579 std::swap(LHS, RHS); 1580 1581 // Canonicalize shift immediate to the RHS. 1582 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1583 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1584 if (isa<ConstantInt>(SI->getOperand(1))) 1585 std::swap(LHS, RHS); 1586 1587 unsigned LHSReg = getRegForValue(LHS); 1588 if (!LHSReg) 1589 return 0; 1590 bool LHSIsKill = hasTrivialKill(LHS); 1591 1592 unsigned ResultReg = 0; 1593 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1594 uint64_t Imm = C->getZExtValue(); 1595 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, LHSIsKill, Imm); 1596 } 1597 if (ResultReg) 1598 return ResultReg; 1599 1600 // Check if the mul can be folded into the instruction. 1601 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1602 if (isMulPowOf2(RHS)) { 1603 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1604 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1605 1606 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1607 if (C->getValue().isPowerOf2()) 1608 std::swap(MulLHS, MulRHS); 1609 1610 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1611 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1612 1613 unsigned RHSReg = getRegForValue(MulLHS); 1614 if (!RHSReg) 1615 return 0; 1616 bool RHSIsKill = hasTrivialKill(MulLHS); 1617 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1618 RHSIsKill, ShiftVal); 1619 if (ResultReg) 1620 return ResultReg; 1621 } 1622 } 1623 1624 // Check if the shift can be folded into the instruction. 1625 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1626 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1627 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1628 uint64_t ShiftVal = C->getZExtValue(); 1629 unsigned RHSReg = getRegForValue(SI->getOperand(0)); 1630 if (!RHSReg) 1631 return 0; 1632 bool RHSIsKill = hasTrivialKill(SI->getOperand(0)); 1633 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, LHSIsKill, RHSReg, 1634 RHSIsKill, ShiftVal); 1635 if (ResultReg) 1636 return ResultReg; 1637 } 1638 } 1639 1640 unsigned RHSReg = getRegForValue(RHS); 1641 if (!RHSReg) 1642 return 0; 1643 bool RHSIsKill = hasTrivialKill(RHS); 1644 1645 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1646 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 1647 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1648 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1649 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1650 } 1651 return ResultReg; 1652 } 1653 1654 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1655 unsigned LHSReg, bool LHSIsKill, 1656 uint64_t Imm) { 1657 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1658 "ISD nodes are not consecutive!"); 1659 static const unsigned OpcTable[3][2] = { 1660 { AArch64::ANDWri, AArch64::ANDXri }, 1661 { AArch64::ORRWri, AArch64::ORRXri }, 1662 { AArch64::EORWri, AArch64::EORXri } 1663 }; 1664 const TargetRegisterClass *RC; 1665 unsigned Opc; 1666 unsigned RegSize; 1667 switch (RetVT.SimpleTy) { 1668 default: 1669 return 0; 1670 case MVT::i1: 1671 case MVT::i8: 1672 case MVT::i16: 1673 case MVT::i32: { 1674 unsigned Idx = ISDOpc - ISD::AND; 1675 Opc = OpcTable[Idx][0]; 1676 RC = &AArch64::GPR32spRegClass; 1677 RegSize = 32; 1678 break; 1679 } 1680 case MVT::i64: 1681 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1682 RC = &AArch64::GPR64spRegClass; 1683 RegSize = 64; 1684 break; 1685 } 1686 1687 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1688 return 0; 1689 1690 unsigned ResultReg = 1691 fastEmitInst_ri(Opc, RC, LHSReg, LHSIsKill, 1692 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1693 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1694 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1695 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1696 } 1697 return ResultReg; 1698 } 1699 1700 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1701 unsigned LHSReg, bool LHSIsKill, 1702 unsigned RHSReg, bool RHSIsKill, 1703 uint64_t ShiftImm) { 1704 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1705 "ISD nodes are not consecutive!"); 1706 static const unsigned OpcTable[3][2] = { 1707 { AArch64::ANDWrs, AArch64::ANDXrs }, 1708 { AArch64::ORRWrs, AArch64::ORRXrs }, 1709 { AArch64::EORWrs, AArch64::EORXrs } 1710 }; 1711 1712 // Don't deal with undefined shifts. 1713 if (ShiftImm >= RetVT.getSizeInBits()) 1714 return 0; 1715 1716 const TargetRegisterClass *RC; 1717 unsigned Opc; 1718 switch (RetVT.SimpleTy) { 1719 default: 1720 return 0; 1721 case MVT::i1: 1722 case MVT::i8: 1723 case MVT::i16: 1724 case MVT::i32: 1725 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1726 RC = &AArch64::GPR32RegClass; 1727 break; 1728 case MVT::i64: 1729 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1730 RC = &AArch64::GPR64RegClass; 1731 break; 1732 } 1733 unsigned ResultReg = 1734 fastEmitInst_rri(Opc, RC, LHSReg, LHSIsKill, RHSReg, RHSIsKill, 1735 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1736 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1737 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1738 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 1739 } 1740 return ResultReg; 1741 } 1742 1743 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, bool LHSIsKill, 1744 uint64_t Imm) { 1745 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, LHSIsKill, Imm); 1746 } 1747 1748 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1749 bool WantZExt, MachineMemOperand *MMO) { 1750 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1751 return 0; 1752 1753 // Simplify this down to something we can handle. 1754 if (!simplifyAddress(Addr, VT)) 1755 return 0; 1756 1757 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1758 if (!ScaleFactor) 1759 llvm_unreachable("Unexpected value type."); 1760 1761 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1762 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1763 bool UseScaled = true; 1764 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1765 UseScaled = false; 1766 ScaleFactor = 1; 1767 } 1768 1769 static const unsigned GPOpcTable[2][8][4] = { 1770 // Sign-extend. 1771 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1772 AArch64::LDURXi }, 1773 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1774 AArch64::LDURXi }, 1775 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1776 AArch64::LDRXui }, 1777 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1778 AArch64::LDRXui }, 1779 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1780 AArch64::LDRXroX }, 1781 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1782 AArch64::LDRXroX }, 1783 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1784 AArch64::LDRXroW }, 1785 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1786 AArch64::LDRXroW } 1787 }, 1788 // Zero-extend. 1789 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1790 AArch64::LDURXi }, 1791 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1792 AArch64::LDURXi }, 1793 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1794 AArch64::LDRXui }, 1795 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1796 AArch64::LDRXui }, 1797 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1798 AArch64::LDRXroX }, 1799 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1800 AArch64::LDRXroX }, 1801 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1802 AArch64::LDRXroW }, 1803 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1804 AArch64::LDRXroW } 1805 } 1806 }; 1807 1808 static const unsigned FPOpcTable[4][2] = { 1809 { AArch64::LDURSi, AArch64::LDURDi }, 1810 { AArch64::LDRSui, AArch64::LDRDui }, 1811 { AArch64::LDRSroX, AArch64::LDRDroX }, 1812 { AArch64::LDRSroW, AArch64::LDRDroW } 1813 }; 1814 1815 unsigned Opc; 1816 const TargetRegisterClass *RC; 1817 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1818 Addr.getOffsetReg(); 1819 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1820 if (Addr.getExtendType() == AArch64_AM::UXTW || 1821 Addr.getExtendType() == AArch64_AM::SXTW) 1822 Idx++; 1823 1824 bool IsRet64Bit = RetVT == MVT::i64; 1825 switch (VT.SimpleTy) { 1826 default: 1827 llvm_unreachable("Unexpected value type."); 1828 case MVT::i1: // Intentional fall-through. 1829 case MVT::i8: 1830 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1831 RC = (IsRet64Bit && !WantZExt) ? 1832 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1833 break; 1834 case MVT::i16: 1835 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1836 RC = (IsRet64Bit && !WantZExt) ? 1837 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1838 break; 1839 case MVT::i32: 1840 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1841 RC = (IsRet64Bit && !WantZExt) ? 1842 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1843 break; 1844 case MVT::i64: 1845 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1846 RC = &AArch64::GPR64RegClass; 1847 break; 1848 case MVT::f32: 1849 Opc = FPOpcTable[Idx][0]; 1850 RC = &AArch64::FPR32RegClass; 1851 break; 1852 case MVT::f64: 1853 Opc = FPOpcTable[Idx][1]; 1854 RC = &AArch64::FPR64RegClass; 1855 break; 1856 } 1857 1858 // Create the base instruction, then add the operands. 1859 unsigned ResultReg = createResultReg(RC); 1860 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1861 TII.get(Opc), ResultReg); 1862 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1863 1864 // Loading an i1 requires special handling. 1865 if (VT == MVT::i1) { 1866 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, 1); 1867 assert(ANDReg && "Unexpected AND instruction emission failure."); 1868 ResultReg = ANDReg; 1869 } 1870 1871 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1872 // the 32bit reg to a 64bit reg. 1873 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1874 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 1875 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1876 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1877 .addImm(0) 1878 .addReg(ResultReg, getKillRegState(true)) 1879 .addImm(AArch64::sub_32); 1880 ResultReg = Reg64; 1881 } 1882 return ResultReg; 1883 } 1884 1885 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1886 MVT VT; 1887 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1888 return false; 1889 1890 if (VT.isVector()) 1891 return selectOperator(I, I->getOpcode()); 1892 1893 unsigned ResultReg; 1894 switch (I->getOpcode()) { 1895 default: 1896 llvm_unreachable("Unexpected instruction."); 1897 case Instruction::Add: 1898 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1899 break; 1900 case Instruction::Sub: 1901 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1902 break; 1903 } 1904 if (!ResultReg) 1905 return false; 1906 1907 updateValueMap(I, ResultReg); 1908 return true; 1909 } 1910 1911 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1912 MVT VT; 1913 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1914 return false; 1915 1916 if (VT.isVector()) 1917 return selectOperator(I, I->getOpcode()); 1918 1919 unsigned ResultReg; 1920 switch (I->getOpcode()) { 1921 default: 1922 llvm_unreachable("Unexpected instruction."); 1923 case Instruction::And: 1924 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1925 break; 1926 case Instruction::Or: 1927 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1928 break; 1929 case Instruction::Xor: 1930 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1931 break; 1932 } 1933 if (!ResultReg) 1934 return false; 1935 1936 updateValueMap(I, ResultReg); 1937 return true; 1938 } 1939 1940 bool AArch64FastISel::selectLoad(const Instruction *I) { 1941 MVT VT; 1942 // Verify we have a legal type before going any further. Currently, we handle 1943 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1944 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1945 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1946 cast<LoadInst>(I)->isAtomic()) 1947 return false; 1948 1949 const Value *SV = I->getOperand(0); 1950 if (TLI.supportSwiftError()) { 1951 // Swifterror values can come from either a function parameter with 1952 // swifterror attribute or an alloca with swifterror attribute. 1953 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1954 if (Arg->hasSwiftErrorAttr()) 1955 return false; 1956 } 1957 1958 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1959 if (Alloca->isSwiftError()) 1960 return false; 1961 } 1962 } 1963 1964 // See if we can handle this address. 1965 Address Addr; 1966 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1967 return false; 1968 1969 // Fold the following sign-/zero-extend into the load instruction. 1970 bool WantZExt = true; 1971 MVT RetVT = VT; 1972 const Value *IntExtVal = nullptr; 1973 if (I->hasOneUse()) { 1974 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1975 if (isTypeSupported(ZE->getType(), RetVT)) 1976 IntExtVal = ZE; 1977 else 1978 RetVT = VT; 1979 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1980 if (isTypeSupported(SE->getType(), RetVT)) 1981 IntExtVal = SE; 1982 else 1983 RetVT = VT; 1984 WantZExt = false; 1985 } 1986 } 1987 1988 unsigned ResultReg = 1989 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1990 if (!ResultReg) 1991 return false; 1992 1993 // There are a few different cases we have to handle, because the load or the 1994 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1995 // SelectionDAG. There is also an ordering issue when both instructions are in 1996 // different basic blocks. 1997 // 1.) The load instruction is selected by FastISel, but the integer extend 1998 // not. This usually happens when the integer extend is in a different 1999 // basic block and SelectionDAG took over for that basic block. 2000 // 2.) The load instruction is selected before the integer extend. This only 2001 // happens when the integer extend is in a different basic block. 2002 // 3.) The load instruction is selected by SelectionDAG and the integer extend 2003 // by FastISel. This happens if there are instructions between the load 2004 // and the integer extend that couldn't be selected by FastISel. 2005 if (IntExtVal) { 2006 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 2007 // could select it. Emit a copy to subreg if necessary. FastISel will remove 2008 // it when it selects the integer extend. 2009 unsigned Reg = lookUpRegForValue(IntExtVal); 2010 auto *MI = MRI.getUniqueVRegDef(Reg); 2011 if (!MI) { 2012 if (RetVT == MVT::i64 && VT <= MVT::i32) { 2013 if (WantZExt) { 2014 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2015 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2016 ResultReg = std::prev(I)->getOperand(0).getReg(); 2017 removeDeadCode(I, std::next(I)); 2018 } else 2019 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2020 /*IsKill=*/true, 2021 AArch64::sub_32); 2022 } 2023 updateValueMap(I, ResultReg); 2024 return true; 2025 } 2026 2027 // The integer extend has already been emitted - delete all the instructions 2028 // that have been emitted by the integer extend lowering code and use the 2029 // result from the load instruction directly. 2030 while (MI) { 2031 Reg = 0; 2032 for (auto &Opnd : MI->uses()) { 2033 if (Opnd.isReg()) { 2034 Reg = Opnd.getReg(); 2035 break; 2036 } 2037 } 2038 MachineBasicBlock::iterator I(MI); 2039 removeDeadCode(I, std::next(I)); 2040 MI = nullptr; 2041 if (Reg) 2042 MI = MRI.getUniqueVRegDef(Reg); 2043 } 2044 updateValueMap(IntExtVal, ResultReg); 2045 return true; 2046 } 2047 2048 updateValueMap(I, ResultReg); 2049 return true; 2050 } 2051 2052 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2053 unsigned AddrReg, 2054 MachineMemOperand *MMO) { 2055 unsigned Opc; 2056 switch (VT.SimpleTy) { 2057 default: return false; 2058 case MVT::i8: Opc = AArch64::STLRB; break; 2059 case MVT::i16: Opc = AArch64::STLRH; break; 2060 case MVT::i32: Opc = AArch64::STLRW; break; 2061 case MVT::i64: Opc = AArch64::STLRX; break; 2062 } 2063 2064 const MCInstrDesc &II = TII.get(Opc); 2065 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2066 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2067 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2068 .addReg(SrcReg) 2069 .addReg(AddrReg) 2070 .addMemOperand(MMO); 2071 return true; 2072 } 2073 2074 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2075 MachineMemOperand *MMO) { 2076 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2077 return false; 2078 2079 // Simplify this down to something we can handle. 2080 if (!simplifyAddress(Addr, VT)) 2081 return false; 2082 2083 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2084 if (!ScaleFactor) 2085 llvm_unreachable("Unexpected value type."); 2086 2087 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2088 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2089 bool UseScaled = true; 2090 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2091 UseScaled = false; 2092 ScaleFactor = 1; 2093 } 2094 2095 static const unsigned OpcTable[4][6] = { 2096 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2097 AArch64::STURSi, AArch64::STURDi }, 2098 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2099 AArch64::STRSui, AArch64::STRDui }, 2100 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2101 AArch64::STRSroX, AArch64::STRDroX }, 2102 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2103 AArch64::STRSroW, AArch64::STRDroW } 2104 }; 2105 2106 unsigned Opc; 2107 bool VTIsi1 = false; 2108 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2109 Addr.getOffsetReg(); 2110 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2111 if (Addr.getExtendType() == AArch64_AM::UXTW || 2112 Addr.getExtendType() == AArch64_AM::SXTW) 2113 Idx++; 2114 2115 switch (VT.SimpleTy) { 2116 default: llvm_unreachable("Unexpected value type."); 2117 case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; 2118 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2119 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2120 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2121 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2122 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2123 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2124 } 2125 2126 // Storing an i1 requires special handling. 2127 if (VTIsi1 && SrcReg != AArch64::WZR) { 2128 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 2129 assert(ANDReg && "Unexpected AND instruction emission failure."); 2130 SrcReg = ANDReg; 2131 } 2132 // Create the base instruction, then add the operands. 2133 const MCInstrDesc &II = TII.get(Opc); 2134 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2135 MachineInstrBuilder MIB = 2136 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 2137 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2138 2139 return true; 2140 } 2141 2142 bool AArch64FastISel::selectStore(const Instruction *I) { 2143 MVT VT; 2144 const Value *Op0 = I->getOperand(0); 2145 // Verify we have a legal type before going any further. Currently, we handle 2146 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2147 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2148 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2149 return false; 2150 2151 const Value *PtrV = I->getOperand(1); 2152 if (TLI.supportSwiftError()) { 2153 // Swifterror values can come from either a function parameter with 2154 // swifterror attribute or an alloca with swifterror attribute. 2155 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2156 if (Arg->hasSwiftErrorAttr()) 2157 return false; 2158 } 2159 2160 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2161 if (Alloca->isSwiftError()) 2162 return false; 2163 } 2164 } 2165 2166 // Get the value to be stored into a register. Use the zero register directly 2167 // when possible to avoid an unnecessary copy and a wasted register. 2168 unsigned SrcReg = 0; 2169 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2170 if (CI->isZero()) 2171 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2172 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2173 if (CF->isZero() && !CF->isNegative()) { 2174 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2175 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2176 } 2177 } 2178 2179 if (!SrcReg) 2180 SrcReg = getRegForValue(Op0); 2181 2182 if (!SrcReg) 2183 return false; 2184 2185 auto *SI = cast<StoreInst>(I); 2186 2187 // Try to emit a STLR for seq_cst/release. 2188 if (SI->isAtomic()) { 2189 AtomicOrdering Ord = SI->getOrdering(); 2190 // The non-atomic instructions are sufficient for relaxed stores. 2191 if (isReleaseOrStronger(Ord)) { 2192 // The STLR addressing mode only supports a base reg; pass that directly. 2193 unsigned AddrReg = getRegForValue(PtrV); 2194 return emitStoreRelease(VT, SrcReg, AddrReg, 2195 createMachineMemOperandFor(I)); 2196 } 2197 } 2198 2199 // See if we can handle this address. 2200 Address Addr; 2201 if (!computeAddress(PtrV, Addr, Op0->getType())) 2202 return false; 2203 2204 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2205 return false; 2206 return true; 2207 } 2208 2209 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2210 switch (Pred) { 2211 case CmpInst::FCMP_ONE: 2212 case CmpInst::FCMP_UEQ: 2213 default: 2214 // AL is our "false" for now. The other two need more compares. 2215 return AArch64CC::AL; 2216 case CmpInst::ICMP_EQ: 2217 case CmpInst::FCMP_OEQ: 2218 return AArch64CC::EQ; 2219 case CmpInst::ICMP_SGT: 2220 case CmpInst::FCMP_OGT: 2221 return AArch64CC::GT; 2222 case CmpInst::ICMP_SGE: 2223 case CmpInst::FCMP_OGE: 2224 return AArch64CC::GE; 2225 case CmpInst::ICMP_UGT: 2226 case CmpInst::FCMP_UGT: 2227 return AArch64CC::HI; 2228 case CmpInst::FCMP_OLT: 2229 return AArch64CC::MI; 2230 case CmpInst::ICMP_ULE: 2231 case CmpInst::FCMP_OLE: 2232 return AArch64CC::LS; 2233 case CmpInst::FCMP_ORD: 2234 return AArch64CC::VC; 2235 case CmpInst::FCMP_UNO: 2236 return AArch64CC::VS; 2237 case CmpInst::FCMP_UGE: 2238 return AArch64CC::PL; 2239 case CmpInst::ICMP_SLT: 2240 case CmpInst::FCMP_ULT: 2241 return AArch64CC::LT; 2242 case CmpInst::ICMP_SLE: 2243 case CmpInst::FCMP_ULE: 2244 return AArch64CC::LE; 2245 case CmpInst::FCMP_UNE: 2246 case CmpInst::ICMP_NE: 2247 return AArch64CC::NE; 2248 case CmpInst::ICMP_UGE: 2249 return AArch64CC::HS; 2250 case CmpInst::ICMP_ULT: 2251 return AArch64CC::LO; 2252 } 2253 } 2254 2255 /// Try to emit a combined compare-and-branch instruction. 2256 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2257 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2258 // will not be produced, as they are conditional branch instructions that do 2259 // not set flags. 2260 if (FuncInfo.MF->getFunction().hasFnAttribute( 2261 Attribute::SpeculativeLoadHardening)) 2262 return false; 2263 2264 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2265 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2266 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2267 2268 const Value *LHS = CI->getOperand(0); 2269 const Value *RHS = CI->getOperand(1); 2270 2271 MVT VT; 2272 if (!isTypeSupported(LHS->getType(), VT)) 2273 return false; 2274 2275 unsigned BW = VT.getSizeInBits(); 2276 if (BW > 64) 2277 return false; 2278 2279 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2280 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2281 2282 // Try to take advantage of fallthrough opportunities. 2283 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2284 std::swap(TBB, FBB); 2285 Predicate = CmpInst::getInversePredicate(Predicate); 2286 } 2287 2288 int TestBit = -1; 2289 bool IsCmpNE; 2290 switch (Predicate) { 2291 default: 2292 return false; 2293 case CmpInst::ICMP_EQ: 2294 case CmpInst::ICMP_NE: 2295 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2296 std::swap(LHS, RHS); 2297 2298 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2299 return false; 2300 2301 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2302 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2303 const Value *AndLHS = AI->getOperand(0); 2304 const Value *AndRHS = AI->getOperand(1); 2305 2306 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2307 if (C->getValue().isPowerOf2()) 2308 std::swap(AndLHS, AndRHS); 2309 2310 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2311 if (C->getValue().isPowerOf2()) { 2312 TestBit = C->getValue().logBase2(); 2313 LHS = AndLHS; 2314 } 2315 } 2316 2317 if (VT == MVT::i1) 2318 TestBit = 0; 2319 2320 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2321 break; 2322 case CmpInst::ICMP_SLT: 2323 case CmpInst::ICMP_SGE: 2324 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2325 return false; 2326 2327 TestBit = BW - 1; 2328 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2329 break; 2330 case CmpInst::ICMP_SGT: 2331 case CmpInst::ICMP_SLE: 2332 if (!isa<ConstantInt>(RHS)) 2333 return false; 2334 2335 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2336 return false; 2337 2338 TestBit = BW - 1; 2339 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2340 break; 2341 } // end switch 2342 2343 static const unsigned OpcTable[2][2][2] = { 2344 { {AArch64::CBZW, AArch64::CBZX }, 2345 {AArch64::CBNZW, AArch64::CBNZX} }, 2346 { {AArch64::TBZW, AArch64::TBZX }, 2347 {AArch64::TBNZW, AArch64::TBNZX} } 2348 }; 2349 2350 bool IsBitTest = TestBit != -1; 2351 bool Is64Bit = BW == 64; 2352 if (TestBit < 32 && TestBit >= 0) 2353 Is64Bit = false; 2354 2355 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2356 const MCInstrDesc &II = TII.get(Opc); 2357 2358 unsigned SrcReg = getRegForValue(LHS); 2359 if (!SrcReg) 2360 return false; 2361 bool SrcIsKill = hasTrivialKill(LHS); 2362 2363 if (BW == 64 && !Is64Bit) 2364 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 2365 AArch64::sub_32); 2366 2367 if ((BW < 32) && !IsBitTest) 2368 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2369 2370 // Emit the combined compare and branch instruction. 2371 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2372 MachineInstrBuilder MIB = 2373 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 2374 .addReg(SrcReg, getKillRegState(SrcIsKill)); 2375 if (IsBitTest) 2376 MIB.addImm(TestBit); 2377 MIB.addMBB(TBB); 2378 2379 finishCondBranch(BI->getParent(), TBB, FBB); 2380 return true; 2381 } 2382 2383 bool AArch64FastISel::selectBranch(const Instruction *I) { 2384 const BranchInst *BI = cast<BranchInst>(I); 2385 if (BI->isUnconditional()) { 2386 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2387 fastEmitBranch(MSucc, BI->getDebugLoc()); 2388 return true; 2389 } 2390 2391 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2392 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2393 2394 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2395 if (CI->hasOneUse() && isValueAvailable(CI)) { 2396 // Try to optimize or fold the cmp. 2397 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2398 switch (Predicate) { 2399 default: 2400 break; 2401 case CmpInst::FCMP_FALSE: 2402 fastEmitBranch(FBB, DbgLoc); 2403 return true; 2404 case CmpInst::FCMP_TRUE: 2405 fastEmitBranch(TBB, DbgLoc); 2406 return true; 2407 } 2408 2409 // Try to emit a combined compare-and-branch first. 2410 if (emitCompareAndBranch(BI)) 2411 return true; 2412 2413 // Try to take advantage of fallthrough opportunities. 2414 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2415 std::swap(TBB, FBB); 2416 Predicate = CmpInst::getInversePredicate(Predicate); 2417 } 2418 2419 // Emit the cmp. 2420 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2421 return false; 2422 2423 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2424 // instruction. 2425 AArch64CC::CondCode CC = getCompareCC(Predicate); 2426 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2427 switch (Predicate) { 2428 default: 2429 break; 2430 case CmpInst::FCMP_UEQ: 2431 ExtraCC = AArch64CC::EQ; 2432 CC = AArch64CC::VS; 2433 break; 2434 case CmpInst::FCMP_ONE: 2435 ExtraCC = AArch64CC::MI; 2436 CC = AArch64CC::GT; 2437 break; 2438 } 2439 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2440 2441 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2442 if (ExtraCC != AArch64CC::AL) { 2443 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2444 .addImm(ExtraCC) 2445 .addMBB(TBB); 2446 } 2447 2448 // Emit the branch. 2449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2450 .addImm(CC) 2451 .addMBB(TBB); 2452 2453 finishCondBranch(BI->getParent(), TBB, FBB); 2454 return true; 2455 } 2456 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2457 uint64_t Imm = CI->getZExtValue(); 2458 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2459 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 2460 .addMBB(Target); 2461 2462 // Obtain the branch probability and add the target to the successor list. 2463 if (FuncInfo.BPI) { 2464 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2465 BI->getParent(), Target->getBasicBlock()); 2466 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2467 } else 2468 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2469 return true; 2470 } else { 2471 AArch64CC::CondCode CC = AArch64CC::NE; 2472 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2473 // Fake request the condition, otherwise the intrinsic might be completely 2474 // optimized away. 2475 unsigned CondReg = getRegForValue(BI->getCondition()); 2476 if (!CondReg) 2477 return false; 2478 2479 // Emit the branch. 2480 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2481 .addImm(CC) 2482 .addMBB(TBB); 2483 2484 finishCondBranch(BI->getParent(), TBB, FBB); 2485 return true; 2486 } 2487 } 2488 2489 unsigned CondReg = getRegForValue(BI->getCondition()); 2490 if (CondReg == 0) 2491 return false; 2492 bool CondRegIsKill = hasTrivialKill(BI->getCondition()); 2493 2494 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2495 unsigned Opcode = AArch64::TBNZW; 2496 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2497 std::swap(TBB, FBB); 2498 Opcode = AArch64::TBZW; 2499 } 2500 2501 const MCInstrDesc &II = TII.get(Opcode); 2502 unsigned ConstrainedCondReg 2503 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2504 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2505 .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) 2506 .addImm(0) 2507 .addMBB(TBB); 2508 2509 finishCondBranch(BI->getParent(), TBB, FBB); 2510 return true; 2511 } 2512 2513 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2514 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2515 unsigned AddrReg = getRegForValue(BI->getOperand(0)); 2516 if (AddrReg == 0) 2517 return false; 2518 2519 // Emit the indirect branch. 2520 const MCInstrDesc &II = TII.get(AArch64::BR); 2521 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2522 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 2523 2524 // Make sure the CFG is up-to-date. 2525 for (auto *Succ : BI->successors()) 2526 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2527 2528 return true; 2529 } 2530 2531 bool AArch64FastISel::selectCmp(const Instruction *I) { 2532 const CmpInst *CI = cast<CmpInst>(I); 2533 2534 // Vectors of i1 are weird: bail out. 2535 if (CI->getType()->isVectorTy()) 2536 return false; 2537 2538 // Try to optimize or fold the cmp. 2539 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2540 unsigned ResultReg = 0; 2541 switch (Predicate) { 2542 default: 2543 break; 2544 case CmpInst::FCMP_FALSE: 2545 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2546 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2547 TII.get(TargetOpcode::COPY), ResultReg) 2548 .addReg(AArch64::WZR, getKillRegState(true)); 2549 break; 2550 case CmpInst::FCMP_TRUE: 2551 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2552 break; 2553 } 2554 2555 if (ResultReg) { 2556 updateValueMap(I, ResultReg); 2557 return true; 2558 } 2559 2560 // Emit the cmp. 2561 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2562 return false; 2563 2564 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2565 2566 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2567 // condition codes are inverted, because they are used by CSINC. 2568 static unsigned CondCodeTable[2][2] = { 2569 { AArch64CC::NE, AArch64CC::VC }, 2570 { AArch64CC::PL, AArch64CC::LE } 2571 }; 2572 unsigned *CondCodes = nullptr; 2573 switch (Predicate) { 2574 default: 2575 break; 2576 case CmpInst::FCMP_UEQ: 2577 CondCodes = &CondCodeTable[0][0]; 2578 break; 2579 case CmpInst::FCMP_ONE: 2580 CondCodes = &CondCodeTable[1][0]; 2581 break; 2582 } 2583 2584 if (CondCodes) { 2585 unsigned TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2586 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2587 TmpReg1) 2588 .addReg(AArch64::WZR, getKillRegState(true)) 2589 .addReg(AArch64::WZR, getKillRegState(true)) 2590 .addImm(CondCodes[0]); 2591 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2592 ResultReg) 2593 .addReg(TmpReg1, getKillRegState(true)) 2594 .addReg(AArch64::WZR, getKillRegState(true)) 2595 .addImm(CondCodes[1]); 2596 2597 updateValueMap(I, ResultReg); 2598 return true; 2599 } 2600 2601 // Now set a register based on the comparison. 2602 AArch64CC::CondCode CC = getCompareCC(Predicate); 2603 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2604 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2605 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2606 ResultReg) 2607 .addReg(AArch64::WZR, getKillRegState(true)) 2608 .addReg(AArch64::WZR, getKillRegState(true)) 2609 .addImm(invertedCC); 2610 2611 updateValueMap(I, ResultReg); 2612 return true; 2613 } 2614 2615 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2616 /// value. 2617 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2618 if (!SI->getType()->isIntegerTy(1)) 2619 return false; 2620 2621 const Value *Src1Val, *Src2Val; 2622 unsigned Opc = 0; 2623 bool NeedExtraOp = false; 2624 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2625 if (CI->isOne()) { 2626 Src1Val = SI->getCondition(); 2627 Src2Val = SI->getFalseValue(); 2628 Opc = AArch64::ORRWrr; 2629 } else { 2630 assert(CI->isZero()); 2631 Src1Val = SI->getFalseValue(); 2632 Src2Val = SI->getCondition(); 2633 Opc = AArch64::BICWrr; 2634 } 2635 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2636 if (CI->isOne()) { 2637 Src1Val = SI->getCondition(); 2638 Src2Val = SI->getTrueValue(); 2639 Opc = AArch64::ORRWrr; 2640 NeedExtraOp = true; 2641 } else { 2642 assert(CI->isZero()); 2643 Src1Val = SI->getCondition(); 2644 Src2Val = SI->getTrueValue(); 2645 Opc = AArch64::ANDWrr; 2646 } 2647 } 2648 2649 if (!Opc) 2650 return false; 2651 2652 unsigned Src1Reg = getRegForValue(Src1Val); 2653 if (!Src1Reg) 2654 return false; 2655 bool Src1IsKill = hasTrivialKill(Src1Val); 2656 2657 unsigned Src2Reg = getRegForValue(Src2Val); 2658 if (!Src2Reg) 2659 return false; 2660 bool Src2IsKill = hasTrivialKill(Src2Val); 2661 2662 if (NeedExtraOp) { 2663 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, Src1IsKill, 1); 2664 Src1IsKill = true; 2665 } 2666 unsigned ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2667 Src1IsKill, Src2Reg, Src2IsKill); 2668 updateValueMap(SI, ResultReg); 2669 return true; 2670 } 2671 2672 bool AArch64FastISel::selectSelect(const Instruction *I) { 2673 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2674 MVT VT; 2675 if (!isTypeSupported(I->getType(), VT)) 2676 return false; 2677 2678 unsigned Opc; 2679 const TargetRegisterClass *RC; 2680 switch (VT.SimpleTy) { 2681 default: 2682 return false; 2683 case MVT::i1: 2684 case MVT::i8: 2685 case MVT::i16: 2686 case MVT::i32: 2687 Opc = AArch64::CSELWr; 2688 RC = &AArch64::GPR32RegClass; 2689 break; 2690 case MVT::i64: 2691 Opc = AArch64::CSELXr; 2692 RC = &AArch64::GPR64RegClass; 2693 break; 2694 case MVT::f32: 2695 Opc = AArch64::FCSELSrrr; 2696 RC = &AArch64::FPR32RegClass; 2697 break; 2698 case MVT::f64: 2699 Opc = AArch64::FCSELDrrr; 2700 RC = &AArch64::FPR64RegClass; 2701 break; 2702 } 2703 2704 const SelectInst *SI = cast<SelectInst>(I); 2705 const Value *Cond = SI->getCondition(); 2706 AArch64CC::CondCode CC = AArch64CC::NE; 2707 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2708 2709 if (optimizeSelect(SI)) 2710 return true; 2711 2712 // Try to pickup the flags, so we don't have to emit another compare. 2713 if (foldXALUIntrinsic(CC, I, Cond)) { 2714 // Fake request the condition to force emission of the XALU intrinsic. 2715 unsigned CondReg = getRegForValue(Cond); 2716 if (!CondReg) 2717 return false; 2718 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2719 isValueAvailable(Cond)) { 2720 const auto *Cmp = cast<CmpInst>(Cond); 2721 // Try to optimize or fold the cmp. 2722 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2723 const Value *FoldSelect = nullptr; 2724 switch (Predicate) { 2725 default: 2726 break; 2727 case CmpInst::FCMP_FALSE: 2728 FoldSelect = SI->getFalseValue(); 2729 break; 2730 case CmpInst::FCMP_TRUE: 2731 FoldSelect = SI->getTrueValue(); 2732 break; 2733 } 2734 2735 if (FoldSelect) { 2736 unsigned SrcReg = getRegForValue(FoldSelect); 2737 if (!SrcReg) 2738 return false; 2739 unsigned UseReg = lookUpRegForValue(SI); 2740 if (UseReg) 2741 MRI.clearKillFlags(UseReg); 2742 2743 updateValueMap(I, SrcReg); 2744 return true; 2745 } 2746 2747 // Emit the cmp. 2748 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2749 return false; 2750 2751 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2752 CC = getCompareCC(Predicate); 2753 switch (Predicate) { 2754 default: 2755 break; 2756 case CmpInst::FCMP_UEQ: 2757 ExtraCC = AArch64CC::EQ; 2758 CC = AArch64CC::VS; 2759 break; 2760 case CmpInst::FCMP_ONE: 2761 ExtraCC = AArch64CC::MI; 2762 CC = AArch64CC::GT; 2763 break; 2764 } 2765 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2766 } else { 2767 unsigned CondReg = getRegForValue(Cond); 2768 if (!CondReg) 2769 return false; 2770 bool CondIsKill = hasTrivialKill(Cond); 2771 2772 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2773 CondReg = constrainOperandRegClass(II, CondReg, 1); 2774 2775 // Emit a TST instruction (ANDS wzr, reg, #imm). 2776 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, 2777 AArch64::WZR) 2778 .addReg(CondReg, getKillRegState(CondIsKill)) 2779 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2780 } 2781 2782 unsigned Src1Reg = getRegForValue(SI->getTrueValue()); 2783 bool Src1IsKill = hasTrivialKill(SI->getTrueValue()); 2784 2785 unsigned Src2Reg = getRegForValue(SI->getFalseValue()); 2786 bool Src2IsKill = hasTrivialKill(SI->getFalseValue()); 2787 2788 if (!Src1Reg || !Src2Reg) 2789 return false; 2790 2791 if (ExtraCC != AArch64CC::AL) { 2792 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2793 Src2IsKill, ExtraCC); 2794 Src2IsKill = true; 2795 } 2796 unsigned ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src1IsKill, Src2Reg, 2797 Src2IsKill, CC); 2798 updateValueMap(I, ResultReg); 2799 return true; 2800 } 2801 2802 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2803 Value *V = I->getOperand(0); 2804 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2805 return false; 2806 2807 unsigned Op = getRegForValue(V); 2808 if (Op == 0) 2809 return false; 2810 2811 unsigned ResultReg = createResultReg(&AArch64::FPR64RegClass); 2812 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 2813 ResultReg).addReg(Op); 2814 updateValueMap(I, ResultReg); 2815 return true; 2816 } 2817 2818 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2819 Value *V = I->getOperand(0); 2820 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2821 return false; 2822 2823 unsigned Op = getRegForValue(V); 2824 if (Op == 0) 2825 return false; 2826 2827 unsigned ResultReg = createResultReg(&AArch64::FPR32RegClass); 2828 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 2829 ResultReg).addReg(Op); 2830 updateValueMap(I, ResultReg); 2831 return true; 2832 } 2833 2834 // FPToUI and FPToSI 2835 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2836 MVT DestVT; 2837 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2838 return false; 2839 2840 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2841 if (SrcReg == 0) 2842 return false; 2843 2844 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2845 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2846 return false; 2847 2848 unsigned Opc; 2849 if (SrcVT == MVT::f64) { 2850 if (Signed) 2851 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2852 else 2853 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2854 } else { 2855 if (Signed) 2856 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2857 else 2858 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2859 } 2860 unsigned ResultReg = createResultReg( 2861 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2862 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2863 .addReg(SrcReg); 2864 updateValueMap(I, ResultReg); 2865 return true; 2866 } 2867 2868 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2869 MVT DestVT; 2870 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2871 return false; 2872 // Let regular ISEL handle FP16 2873 if (DestVT == MVT::f16) 2874 return false; 2875 2876 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2877 "Unexpected value type."); 2878 2879 unsigned SrcReg = getRegForValue(I->getOperand(0)); 2880 if (!SrcReg) 2881 return false; 2882 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 2883 2884 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2885 2886 // Handle sign-extension. 2887 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2888 SrcReg = 2889 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2890 if (!SrcReg) 2891 return false; 2892 SrcIsKill = true; 2893 } 2894 2895 unsigned Opc; 2896 if (SrcVT == MVT::i64) { 2897 if (Signed) 2898 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2899 else 2900 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2901 } else { 2902 if (Signed) 2903 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2904 else 2905 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2906 } 2907 2908 unsigned ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg, 2909 SrcIsKill); 2910 updateValueMap(I, ResultReg); 2911 return true; 2912 } 2913 2914 bool AArch64FastISel::fastLowerArguments() { 2915 if (!FuncInfo.CanLowerReturn) 2916 return false; 2917 2918 const Function *F = FuncInfo.Fn; 2919 if (F->isVarArg()) 2920 return false; 2921 2922 CallingConv::ID CC = F->getCallingConv(); 2923 if (CC != CallingConv::C && CC != CallingConv::Swift) 2924 return false; 2925 2926 if (Subtarget->hasCustomCallingConv()) 2927 return false; 2928 2929 // Only handle simple cases of up to 8 GPR and FPR each. 2930 unsigned GPRCnt = 0; 2931 unsigned FPRCnt = 0; 2932 for (auto const &Arg : F->args()) { 2933 if (Arg.hasAttribute(Attribute::ByVal) || 2934 Arg.hasAttribute(Attribute::InReg) || 2935 Arg.hasAttribute(Attribute::StructRet) || 2936 Arg.hasAttribute(Attribute::SwiftSelf) || 2937 Arg.hasAttribute(Attribute::SwiftError) || 2938 Arg.hasAttribute(Attribute::Nest)) 2939 return false; 2940 2941 Type *ArgTy = Arg.getType(); 2942 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2943 return false; 2944 2945 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2946 if (!ArgVT.isSimple()) 2947 return false; 2948 2949 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2950 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2951 return false; 2952 2953 if (VT.isVector() && 2954 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2955 return false; 2956 2957 if (VT >= MVT::i1 && VT <= MVT::i64) 2958 ++GPRCnt; 2959 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2960 VT.is128BitVector()) 2961 ++FPRCnt; 2962 else 2963 return false; 2964 2965 if (GPRCnt > 8 || FPRCnt > 8) 2966 return false; 2967 } 2968 2969 static const MCPhysReg Registers[6][8] = { 2970 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2971 AArch64::W5, AArch64::W6, AArch64::W7 }, 2972 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2973 AArch64::X5, AArch64::X6, AArch64::X7 }, 2974 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2975 AArch64::H5, AArch64::H6, AArch64::H7 }, 2976 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2977 AArch64::S5, AArch64::S6, AArch64::S7 }, 2978 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2979 AArch64::D5, AArch64::D6, AArch64::D7 }, 2980 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2981 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2982 }; 2983 2984 unsigned GPRIdx = 0; 2985 unsigned FPRIdx = 0; 2986 for (auto const &Arg : F->args()) { 2987 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2988 unsigned SrcReg; 2989 const TargetRegisterClass *RC; 2990 if (VT >= MVT::i1 && VT <= MVT::i32) { 2991 SrcReg = Registers[0][GPRIdx++]; 2992 RC = &AArch64::GPR32RegClass; 2993 VT = MVT::i32; 2994 } else if (VT == MVT::i64) { 2995 SrcReg = Registers[1][GPRIdx++]; 2996 RC = &AArch64::GPR64RegClass; 2997 } else if (VT == MVT::f16) { 2998 SrcReg = Registers[2][FPRIdx++]; 2999 RC = &AArch64::FPR16RegClass; 3000 } else if (VT == MVT::f32) { 3001 SrcReg = Registers[3][FPRIdx++]; 3002 RC = &AArch64::FPR32RegClass; 3003 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 3004 SrcReg = Registers[4][FPRIdx++]; 3005 RC = &AArch64::FPR64RegClass; 3006 } else if (VT.is128BitVector()) { 3007 SrcReg = Registers[5][FPRIdx++]; 3008 RC = &AArch64::FPR128RegClass; 3009 } else 3010 llvm_unreachable("Unexpected value type."); 3011 3012 unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 3013 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 3014 // Without this, EmitLiveInCopies may eliminate the livein if its only 3015 // use is a bitcast (which isn't turned into an instruction). 3016 unsigned ResultReg = createResultReg(RC); 3017 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3018 TII.get(TargetOpcode::COPY), ResultReg) 3019 .addReg(DstReg, getKillRegState(true)); 3020 updateValueMap(&Arg, ResultReg); 3021 } 3022 return true; 3023 } 3024 3025 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 3026 SmallVectorImpl<MVT> &OutVTs, 3027 unsigned &NumBytes) { 3028 CallingConv::ID CC = CLI.CallConv; 3029 SmallVector<CCValAssign, 16> ArgLocs; 3030 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 3031 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 3032 3033 // Get a count of how many bytes are to be pushed on the stack. 3034 NumBytes = CCInfo.getNextStackOffset(); 3035 3036 // Issue CALLSEQ_START 3037 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3038 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 3039 .addImm(NumBytes).addImm(0); 3040 3041 // Process the args. 3042 for (CCValAssign &VA : ArgLocs) { 3043 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3044 MVT ArgVT = OutVTs[VA.getValNo()]; 3045 3046 unsigned ArgReg = getRegForValue(ArgVal); 3047 if (!ArgReg) 3048 return false; 3049 3050 // Handle arg promotion: SExt, ZExt, AExt. 3051 switch (VA.getLocInfo()) { 3052 case CCValAssign::Full: 3053 break; 3054 case CCValAssign::SExt: { 3055 MVT DestVT = VA.getLocVT(); 3056 MVT SrcVT = ArgVT; 3057 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3058 if (!ArgReg) 3059 return false; 3060 break; 3061 } 3062 case CCValAssign::AExt: 3063 // Intentional fall-through. 3064 case CCValAssign::ZExt: { 3065 MVT DestVT = VA.getLocVT(); 3066 MVT SrcVT = ArgVT; 3067 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3068 if (!ArgReg) 3069 return false; 3070 break; 3071 } 3072 default: 3073 llvm_unreachable("Unknown arg promotion!"); 3074 } 3075 3076 // Now copy/store arg to correct locations. 3077 if (VA.isRegLoc() && !VA.needsCustom()) { 3078 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3079 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3080 CLI.OutRegs.push_back(VA.getLocReg()); 3081 } else if (VA.needsCustom()) { 3082 // FIXME: Handle custom args. 3083 return false; 3084 } else { 3085 assert(VA.isMemLoc() && "Assuming store on stack."); 3086 3087 // Don't emit stores for undef values. 3088 if (isa<UndefValue>(ArgVal)) 3089 continue; 3090 3091 // Need to store on the stack. 3092 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3093 3094 unsigned BEAlign = 0; 3095 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3096 BEAlign = 8 - ArgSize; 3097 3098 Address Addr; 3099 Addr.setKind(Address::RegBase); 3100 Addr.setReg(AArch64::SP); 3101 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3102 3103 unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType()); 3104 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3105 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3106 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3107 3108 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3109 return false; 3110 } 3111 } 3112 return true; 3113 } 3114 3115 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 3116 unsigned NumBytes) { 3117 CallingConv::ID CC = CLI.CallConv; 3118 3119 // Issue CALLSEQ_END 3120 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3121 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3122 .addImm(NumBytes).addImm(0); 3123 3124 // Now the return value. 3125 if (RetVT != MVT::isVoid) { 3126 SmallVector<CCValAssign, 16> RVLocs; 3127 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3128 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3129 3130 // Only handle a single return value. 3131 if (RVLocs.size() != 1) 3132 return false; 3133 3134 // Copy all of the result registers out of their specified physreg. 3135 MVT CopyVT = RVLocs[0].getValVT(); 3136 3137 // TODO: Handle big-endian results 3138 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3139 return false; 3140 3141 unsigned ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3142 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3143 TII.get(TargetOpcode::COPY), ResultReg) 3144 .addReg(RVLocs[0].getLocReg()); 3145 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3146 3147 CLI.ResultReg = ResultReg; 3148 CLI.NumResultRegs = 1; 3149 } 3150 3151 return true; 3152 } 3153 3154 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3155 CallingConv::ID CC = CLI.CallConv; 3156 bool IsTailCall = CLI.IsTailCall; 3157 bool IsVarArg = CLI.IsVarArg; 3158 const Value *Callee = CLI.Callee; 3159 MCSymbol *Symbol = CLI.Symbol; 3160 3161 if (!Callee && !Symbol) 3162 return false; 3163 3164 // Allow SelectionDAG isel to handle tail calls. 3165 if (IsTailCall) 3166 return false; 3167 3168 CodeModel::Model CM = TM.getCodeModel(); 3169 // Only support the small-addressing and large code models. 3170 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3171 return false; 3172 3173 // FIXME: Add large code model support for ELF. 3174 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3175 return false; 3176 3177 // Let SDISel handle vararg functions. 3178 if (IsVarArg) 3179 return false; 3180 3181 // FIXME: Only handle *simple* calls for now. 3182 MVT RetVT; 3183 if (CLI.RetTy->isVoidTy()) 3184 RetVT = MVT::isVoid; 3185 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3186 return false; 3187 3188 for (auto Flag : CLI.OutFlags) 3189 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3190 Flag.isSwiftSelf() || Flag.isSwiftError()) 3191 return false; 3192 3193 // Set up the argument vectors. 3194 SmallVector<MVT, 16> OutVTs; 3195 OutVTs.reserve(CLI.OutVals.size()); 3196 3197 for (auto *Val : CLI.OutVals) { 3198 MVT VT; 3199 if (!isTypeLegal(Val->getType(), VT) && 3200 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3201 return false; 3202 3203 // We don't handle vector parameters yet. 3204 if (VT.isVector() || VT.getSizeInBits() > 64) 3205 return false; 3206 3207 OutVTs.push_back(VT); 3208 } 3209 3210 Address Addr; 3211 if (Callee && !computeCallAddress(Callee, Addr)) 3212 return false; 3213 3214 // Handle the arguments now that we've gotten them. 3215 unsigned NumBytes; 3216 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3217 return false; 3218 3219 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3220 if (RegInfo->isAnyArgRegReserved(*MF)) 3221 RegInfo->emitReservedArgRegCallError(*MF); 3222 3223 // Issue the call. 3224 MachineInstrBuilder MIB; 3225 if (Subtarget->useSmallAddressing()) { 3226 const MCInstrDesc &II = TII.get(Addr.getReg() ? AArch64::BLR : AArch64::BL); 3227 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 3228 if (Symbol) 3229 MIB.addSym(Symbol, 0); 3230 else if (Addr.getGlobalValue()) 3231 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3232 else if (Addr.getReg()) { 3233 unsigned Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3234 MIB.addReg(Reg); 3235 } else 3236 return false; 3237 } else { 3238 unsigned CallReg = 0; 3239 if (Symbol) { 3240 unsigned ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3241 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 3242 ADRPReg) 3243 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3244 3245 CallReg = createResultReg(&AArch64::GPR64RegClass); 3246 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3247 TII.get(AArch64::LDRXui), CallReg) 3248 .addReg(ADRPReg) 3249 .addSym(Symbol, 3250 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3251 } else if (Addr.getGlobalValue()) 3252 CallReg = materializeGV(Addr.getGlobalValue()); 3253 else if (Addr.getReg()) 3254 CallReg = Addr.getReg(); 3255 3256 if (!CallReg) 3257 return false; 3258 3259 const MCInstrDesc &II = TII.get(AArch64::BLR); 3260 CallReg = constrainOperandRegClass(II, CallReg, 0); 3261 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 3262 } 3263 3264 // Add implicit physical register uses to the call. 3265 for (auto Reg : CLI.OutRegs) 3266 MIB.addReg(Reg, RegState::Implicit); 3267 3268 // Add a register mask with the call-preserved registers. 3269 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3270 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3271 3272 CLI.Call = MIB; 3273 3274 // Finish off the call including any return values. 3275 return finishCall(CLI, RetVT, NumBytes); 3276 } 3277 3278 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 3279 if (Alignment) 3280 return Len / Alignment <= 4; 3281 else 3282 return Len < 32; 3283 } 3284 3285 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3286 uint64_t Len, unsigned Alignment) { 3287 // Make sure we don't bloat code by inlining very large memcpy's. 3288 if (!isMemCpySmall(Len, Alignment)) 3289 return false; 3290 3291 int64_t UnscaledOffset = 0; 3292 Address OrigDest = Dest; 3293 Address OrigSrc = Src; 3294 3295 while (Len) { 3296 MVT VT; 3297 if (!Alignment || Alignment >= 8) { 3298 if (Len >= 8) 3299 VT = MVT::i64; 3300 else if (Len >= 4) 3301 VT = MVT::i32; 3302 else if (Len >= 2) 3303 VT = MVT::i16; 3304 else { 3305 VT = MVT::i8; 3306 } 3307 } else { 3308 // Bound based on alignment. 3309 if (Len >= 4 && Alignment == 4) 3310 VT = MVT::i32; 3311 else if (Len >= 2 && Alignment == 2) 3312 VT = MVT::i16; 3313 else { 3314 VT = MVT::i8; 3315 } 3316 } 3317 3318 unsigned ResultReg = emitLoad(VT, VT, Src); 3319 if (!ResultReg) 3320 return false; 3321 3322 if (!emitStore(VT, ResultReg, Dest)) 3323 return false; 3324 3325 int64_t Size = VT.getSizeInBits() / 8; 3326 Len -= Size; 3327 UnscaledOffset += Size; 3328 3329 // We need to recompute the unscaled offset for each iteration. 3330 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3331 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3332 } 3333 3334 return true; 3335 } 3336 3337 /// Check if it is possible to fold the condition from the XALU intrinsic 3338 /// into the user. The condition code will only be updated on success. 3339 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3340 const Instruction *I, 3341 const Value *Cond) { 3342 if (!isa<ExtractValueInst>(Cond)) 3343 return false; 3344 3345 const auto *EV = cast<ExtractValueInst>(Cond); 3346 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3347 return false; 3348 3349 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3350 MVT RetVT; 3351 const Function *Callee = II->getCalledFunction(); 3352 Type *RetTy = 3353 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3354 if (!isTypeLegal(RetTy, RetVT)) 3355 return false; 3356 3357 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3358 return false; 3359 3360 const Value *LHS = II->getArgOperand(0); 3361 const Value *RHS = II->getArgOperand(1); 3362 3363 // Canonicalize immediate to the RHS. 3364 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3365 isCommutativeIntrinsic(II)) 3366 std::swap(LHS, RHS); 3367 3368 // Simplify multiplies. 3369 Intrinsic::ID IID = II->getIntrinsicID(); 3370 switch (IID) { 3371 default: 3372 break; 3373 case Intrinsic::smul_with_overflow: 3374 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3375 if (C->getValue() == 2) 3376 IID = Intrinsic::sadd_with_overflow; 3377 break; 3378 case Intrinsic::umul_with_overflow: 3379 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3380 if (C->getValue() == 2) 3381 IID = Intrinsic::uadd_with_overflow; 3382 break; 3383 } 3384 3385 AArch64CC::CondCode TmpCC; 3386 switch (IID) { 3387 default: 3388 return false; 3389 case Intrinsic::sadd_with_overflow: 3390 case Intrinsic::ssub_with_overflow: 3391 TmpCC = AArch64CC::VS; 3392 break; 3393 case Intrinsic::uadd_with_overflow: 3394 TmpCC = AArch64CC::HS; 3395 break; 3396 case Intrinsic::usub_with_overflow: 3397 TmpCC = AArch64CC::LO; 3398 break; 3399 case Intrinsic::smul_with_overflow: 3400 case Intrinsic::umul_with_overflow: 3401 TmpCC = AArch64CC::NE; 3402 break; 3403 } 3404 3405 // Check if both instructions are in the same basic block. 3406 if (!isValueAvailable(II)) 3407 return false; 3408 3409 // Make sure nothing is in the way 3410 BasicBlock::const_iterator Start(I); 3411 BasicBlock::const_iterator End(II); 3412 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3413 // We only expect extractvalue instructions between the intrinsic and the 3414 // instruction to be selected. 3415 if (!isa<ExtractValueInst>(Itr)) 3416 return false; 3417 3418 // Check that the extractvalue operand comes from the intrinsic. 3419 const auto *EVI = cast<ExtractValueInst>(Itr); 3420 if (EVI->getAggregateOperand() != II) 3421 return false; 3422 } 3423 3424 CC = TmpCC; 3425 return true; 3426 } 3427 3428 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3429 // FIXME: Handle more intrinsics. 3430 switch (II->getIntrinsicID()) { 3431 default: return false; 3432 case Intrinsic::frameaddress: { 3433 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3434 MFI.setFrameAddressIsTaken(true); 3435 3436 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3437 unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3438 unsigned SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3439 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3440 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3441 // Recursively load frame address 3442 // ldr x0, [fp] 3443 // ldr x0, [x0] 3444 // ldr x0, [x0] 3445 // ... 3446 unsigned DestReg; 3447 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3448 while (Depth--) { 3449 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3450 SrcReg, /*IsKill=*/true, 0); 3451 assert(DestReg && "Unexpected LDR instruction emission failure."); 3452 SrcReg = DestReg; 3453 } 3454 3455 updateValueMap(II, SrcReg); 3456 return true; 3457 } 3458 case Intrinsic::sponentry: { 3459 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3460 3461 // SP = FP + Fixed Object + 16 3462 int FI = MFI.CreateFixedObject(4, 0, false); 3463 unsigned ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3464 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3465 TII.get(AArch64::ADDXri), ResultReg) 3466 .addFrameIndex(FI) 3467 .addImm(0) 3468 .addImm(0); 3469 3470 updateValueMap(II, ResultReg); 3471 return true; 3472 } 3473 case Intrinsic::memcpy: 3474 case Intrinsic::memmove: { 3475 const auto *MTI = cast<MemTransferInst>(II); 3476 // Don't handle volatile. 3477 if (MTI->isVolatile()) 3478 return false; 3479 3480 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3481 // we would emit dead code because we don't currently handle memmoves. 3482 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3483 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3484 // Small memcpy's are common enough that we want to do them without a call 3485 // if possible. 3486 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3487 unsigned Alignment = MinAlign(MTI->getDestAlignment(), 3488 MTI->getSourceAlignment()); 3489 if (isMemCpySmall(Len, Alignment)) { 3490 Address Dest, Src; 3491 if (!computeAddress(MTI->getRawDest(), Dest) || 3492 !computeAddress(MTI->getRawSource(), Src)) 3493 return false; 3494 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3495 return true; 3496 } 3497 } 3498 3499 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3500 return false; 3501 3502 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3503 // Fast instruction selection doesn't support the special 3504 // address spaces. 3505 return false; 3506 3507 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3508 return lowerCallTo(II, IntrMemName, II->getNumArgOperands() - 1); 3509 } 3510 case Intrinsic::memset: { 3511 const MemSetInst *MSI = cast<MemSetInst>(II); 3512 // Don't handle volatile. 3513 if (MSI->isVolatile()) 3514 return false; 3515 3516 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3517 return false; 3518 3519 if (MSI->getDestAddressSpace() > 255) 3520 // Fast instruction selection doesn't support the special 3521 // address spaces. 3522 return false; 3523 3524 return lowerCallTo(II, "memset", II->getNumArgOperands() - 1); 3525 } 3526 case Intrinsic::sin: 3527 case Intrinsic::cos: 3528 case Intrinsic::pow: { 3529 MVT RetVT; 3530 if (!isTypeLegal(II->getType(), RetVT)) 3531 return false; 3532 3533 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3534 return false; 3535 3536 static const RTLIB::Libcall LibCallTable[3][2] = { 3537 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3538 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3539 { RTLIB::POW_F32, RTLIB::POW_F64 } 3540 }; 3541 RTLIB::Libcall LC; 3542 bool Is64Bit = RetVT == MVT::f64; 3543 switch (II->getIntrinsicID()) { 3544 default: 3545 llvm_unreachable("Unexpected intrinsic."); 3546 case Intrinsic::sin: 3547 LC = LibCallTable[0][Is64Bit]; 3548 break; 3549 case Intrinsic::cos: 3550 LC = LibCallTable[1][Is64Bit]; 3551 break; 3552 case Intrinsic::pow: 3553 LC = LibCallTable[2][Is64Bit]; 3554 break; 3555 } 3556 3557 ArgListTy Args; 3558 Args.reserve(II->getNumArgOperands()); 3559 3560 // Populate the argument list. 3561 for (auto &Arg : II->arg_operands()) { 3562 ArgListEntry Entry; 3563 Entry.Val = Arg; 3564 Entry.Ty = Arg->getType(); 3565 Args.push_back(Entry); 3566 } 3567 3568 CallLoweringInfo CLI; 3569 MCContext &Ctx = MF->getContext(); 3570 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3571 TLI.getLibcallName(LC), std::move(Args)); 3572 if (!lowerCallTo(CLI)) 3573 return false; 3574 updateValueMap(II, CLI.ResultReg); 3575 return true; 3576 } 3577 case Intrinsic::fabs: { 3578 MVT VT; 3579 if (!isTypeLegal(II->getType(), VT)) 3580 return false; 3581 3582 unsigned Opc; 3583 switch (VT.SimpleTy) { 3584 default: 3585 return false; 3586 case MVT::f32: 3587 Opc = AArch64::FABSSr; 3588 break; 3589 case MVT::f64: 3590 Opc = AArch64::FABSDr; 3591 break; 3592 } 3593 unsigned SrcReg = getRegForValue(II->getOperand(0)); 3594 if (!SrcReg) 3595 return false; 3596 bool SrcRegIsKill = hasTrivialKill(II->getOperand(0)); 3597 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3598 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 3599 .addReg(SrcReg, getKillRegState(SrcRegIsKill)); 3600 updateValueMap(II, ResultReg); 3601 return true; 3602 } 3603 case Intrinsic::trap: 3604 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3605 .addImm(1); 3606 return true; 3607 case Intrinsic::debugtrap: { 3608 if (Subtarget->isTargetWindows()) { 3609 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3610 .addImm(0xF000); 3611 return true; 3612 } 3613 break; 3614 } 3615 3616 case Intrinsic::sqrt: { 3617 Type *RetTy = II->getCalledFunction()->getReturnType(); 3618 3619 MVT VT; 3620 if (!isTypeLegal(RetTy, VT)) 3621 return false; 3622 3623 unsigned Op0Reg = getRegForValue(II->getOperand(0)); 3624 if (!Op0Reg) 3625 return false; 3626 bool Op0IsKill = hasTrivialKill(II->getOperand(0)); 3627 3628 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg, Op0IsKill); 3629 if (!ResultReg) 3630 return false; 3631 3632 updateValueMap(II, ResultReg); 3633 return true; 3634 } 3635 case Intrinsic::sadd_with_overflow: 3636 case Intrinsic::uadd_with_overflow: 3637 case Intrinsic::ssub_with_overflow: 3638 case Intrinsic::usub_with_overflow: 3639 case Intrinsic::smul_with_overflow: 3640 case Intrinsic::umul_with_overflow: { 3641 // This implements the basic lowering of the xalu with overflow intrinsics. 3642 const Function *Callee = II->getCalledFunction(); 3643 auto *Ty = cast<StructType>(Callee->getReturnType()); 3644 Type *RetTy = Ty->getTypeAtIndex(0U); 3645 3646 MVT VT; 3647 if (!isTypeLegal(RetTy, VT)) 3648 return false; 3649 3650 if (VT != MVT::i32 && VT != MVT::i64) 3651 return false; 3652 3653 const Value *LHS = II->getArgOperand(0); 3654 const Value *RHS = II->getArgOperand(1); 3655 // Canonicalize immediate to the RHS. 3656 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && 3657 isCommutativeIntrinsic(II)) 3658 std::swap(LHS, RHS); 3659 3660 // Simplify multiplies. 3661 Intrinsic::ID IID = II->getIntrinsicID(); 3662 switch (IID) { 3663 default: 3664 break; 3665 case Intrinsic::smul_with_overflow: 3666 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3667 if (C->getValue() == 2) { 3668 IID = Intrinsic::sadd_with_overflow; 3669 RHS = LHS; 3670 } 3671 break; 3672 case Intrinsic::umul_with_overflow: 3673 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3674 if (C->getValue() == 2) { 3675 IID = Intrinsic::uadd_with_overflow; 3676 RHS = LHS; 3677 } 3678 break; 3679 } 3680 3681 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3682 AArch64CC::CondCode CC = AArch64CC::Invalid; 3683 switch (IID) { 3684 default: llvm_unreachable("Unexpected intrinsic!"); 3685 case Intrinsic::sadd_with_overflow: 3686 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3687 CC = AArch64CC::VS; 3688 break; 3689 case Intrinsic::uadd_with_overflow: 3690 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3691 CC = AArch64CC::HS; 3692 break; 3693 case Intrinsic::ssub_with_overflow: 3694 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3695 CC = AArch64CC::VS; 3696 break; 3697 case Intrinsic::usub_with_overflow: 3698 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3699 CC = AArch64CC::LO; 3700 break; 3701 case Intrinsic::smul_with_overflow: { 3702 CC = AArch64CC::NE; 3703 unsigned LHSReg = getRegForValue(LHS); 3704 if (!LHSReg) 3705 return false; 3706 bool LHSIsKill = hasTrivialKill(LHS); 3707 3708 unsigned RHSReg = getRegForValue(RHS); 3709 if (!RHSReg) 3710 return false; 3711 bool RHSIsKill = hasTrivialKill(RHS); 3712 3713 if (VT == MVT::i32) { 3714 MulReg = emitSMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3715 unsigned ShiftReg = emitLSR_ri(MVT::i64, MVT::i64, MulReg, 3716 /*IsKill=*/false, 32); 3717 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3718 AArch64::sub_32); 3719 ShiftReg = fastEmitInst_extractsubreg(VT, ShiftReg, /*IsKill=*/true, 3720 AArch64::sub_32); 3721 emitSubs_rs(VT, ShiftReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3722 AArch64_AM::ASR, 31, /*WantResult=*/false); 3723 } else { 3724 assert(VT == MVT::i64 && "Unexpected value type."); 3725 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3726 // reused in the next instruction. 3727 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3728 /*IsKill=*/false); 3729 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, LHSIsKill, 3730 RHSReg, RHSIsKill); 3731 emitSubs_rs(VT, SMULHReg, /*IsKill=*/true, MulReg, /*IsKill=*/false, 3732 AArch64_AM::ASR, 63, /*WantResult=*/false); 3733 } 3734 break; 3735 } 3736 case Intrinsic::umul_with_overflow: { 3737 CC = AArch64CC::NE; 3738 unsigned LHSReg = getRegForValue(LHS); 3739 if (!LHSReg) 3740 return false; 3741 bool LHSIsKill = hasTrivialKill(LHS); 3742 3743 unsigned RHSReg = getRegForValue(RHS); 3744 if (!RHSReg) 3745 return false; 3746 bool RHSIsKill = hasTrivialKill(RHS); 3747 3748 if (VT == MVT::i32) { 3749 MulReg = emitUMULL_rr(MVT::i64, LHSReg, LHSIsKill, RHSReg, RHSIsKill); 3750 emitSubs_rs(MVT::i64, AArch64::XZR, /*IsKill=*/true, MulReg, 3751 /*IsKill=*/false, AArch64_AM::LSR, 32, 3752 /*WantResult=*/false); 3753 MulReg = fastEmitInst_extractsubreg(VT, MulReg, /*IsKill=*/true, 3754 AArch64::sub_32); 3755 } else { 3756 assert(VT == MVT::i64 && "Unexpected value type."); 3757 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3758 // reused in the next instruction. 3759 MulReg = emitMul_rr(VT, LHSReg, /*IsKill=*/false, RHSReg, 3760 /*IsKill=*/false); 3761 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, LHSIsKill, 3762 RHSReg, RHSIsKill); 3763 emitSubs_rr(VT, AArch64::XZR, /*IsKill=*/true, UMULHReg, 3764 /*IsKill=*/false, /*WantResult=*/false); 3765 } 3766 break; 3767 } 3768 } 3769 3770 if (MulReg) { 3771 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3772 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3773 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3774 } 3775 3776 if (!ResultReg1) 3777 return false; 3778 3779 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3780 AArch64::WZR, /*IsKill=*/true, AArch64::WZR, 3781 /*IsKill=*/true, getInvertedCondCode(CC)); 3782 (void)ResultReg2; 3783 assert((ResultReg1 + 1) == ResultReg2 && 3784 "Nonconsecutive result registers."); 3785 updateValueMap(II, ResultReg1, 2); 3786 return true; 3787 } 3788 } 3789 return false; 3790 } 3791 3792 bool AArch64FastISel::selectRet(const Instruction *I) { 3793 const ReturnInst *Ret = cast<ReturnInst>(I); 3794 const Function &F = *I->getParent()->getParent(); 3795 3796 if (!FuncInfo.CanLowerReturn) 3797 return false; 3798 3799 if (F.isVarArg()) 3800 return false; 3801 3802 if (TLI.supportSwiftError() && 3803 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3804 return false; 3805 3806 if (TLI.supportSplitCSR(FuncInfo.MF)) 3807 return false; 3808 3809 // Build a list of return value registers. 3810 SmallVector<unsigned, 4> RetRegs; 3811 3812 if (Ret->getNumOperands() > 0) { 3813 CallingConv::ID CC = F.getCallingConv(); 3814 SmallVector<ISD::OutputArg, 4> Outs; 3815 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3816 3817 // Analyze operands of the call, assigning locations to each operand. 3818 SmallVector<CCValAssign, 16> ValLocs; 3819 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3820 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3821 : RetCC_AArch64_AAPCS; 3822 CCInfo.AnalyzeReturn(Outs, RetCC); 3823 3824 // Only handle a single return value for now. 3825 if (ValLocs.size() != 1) 3826 return false; 3827 3828 CCValAssign &VA = ValLocs[0]; 3829 const Value *RV = Ret->getOperand(0); 3830 3831 // Don't bother handling odd stuff for now. 3832 if ((VA.getLocInfo() != CCValAssign::Full) && 3833 (VA.getLocInfo() != CCValAssign::BCvt)) 3834 return false; 3835 3836 // Only handle register returns for now. 3837 if (!VA.isRegLoc()) 3838 return false; 3839 3840 unsigned Reg = getRegForValue(RV); 3841 if (Reg == 0) 3842 return false; 3843 3844 unsigned SrcReg = Reg + VA.getValNo(); 3845 unsigned DestReg = VA.getLocReg(); 3846 // Avoid a cross-class copy. This is very unlikely. 3847 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3848 return false; 3849 3850 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3851 if (!RVEVT.isSimple()) 3852 return false; 3853 3854 // Vectors (of > 1 lane) in big endian need tricky handling. 3855 if (RVEVT.isVector() && RVEVT.getVectorNumElements() > 1 && 3856 !Subtarget->isLittleEndian()) 3857 return false; 3858 3859 MVT RVVT = RVEVT.getSimpleVT(); 3860 if (RVVT == MVT::f128) 3861 return false; 3862 3863 MVT DestVT = VA.getValVT(); 3864 // Special handling for extended integers. 3865 if (RVVT != DestVT) { 3866 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3867 return false; 3868 3869 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3870 return false; 3871 3872 bool IsZExt = Outs[0].Flags.isZExt(); 3873 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3874 if (SrcReg == 0) 3875 return false; 3876 } 3877 3878 // Make the copy. 3879 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3880 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3881 3882 // Add register to return instruction. 3883 RetRegs.push_back(VA.getLocReg()); 3884 } 3885 3886 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3887 TII.get(AArch64::RET_ReallyLR)); 3888 for (unsigned RetReg : RetRegs) 3889 MIB.addReg(RetReg, RegState::Implicit); 3890 return true; 3891 } 3892 3893 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3894 Type *DestTy = I->getType(); 3895 Value *Op = I->getOperand(0); 3896 Type *SrcTy = Op->getType(); 3897 3898 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3899 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3900 if (!SrcEVT.isSimple()) 3901 return false; 3902 if (!DestEVT.isSimple()) 3903 return false; 3904 3905 MVT SrcVT = SrcEVT.getSimpleVT(); 3906 MVT DestVT = DestEVT.getSimpleVT(); 3907 3908 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3909 SrcVT != MVT::i8) 3910 return false; 3911 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3912 DestVT != MVT::i1) 3913 return false; 3914 3915 unsigned SrcReg = getRegForValue(Op); 3916 if (!SrcReg) 3917 return false; 3918 bool SrcIsKill = hasTrivialKill(Op); 3919 3920 // If we're truncating from i64 to a smaller non-legal type then generate an 3921 // AND. Otherwise, we know the high bits are undefined and a truncate only 3922 // generate a COPY. We cannot mark the source register also as result 3923 // register, because this can incorrectly transfer the kill flag onto the 3924 // source register. 3925 unsigned ResultReg; 3926 if (SrcVT == MVT::i64) { 3927 uint64_t Mask = 0; 3928 switch (DestVT.SimpleTy) { 3929 default: 3930 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3931 return false; 3932 case MVT::i1: 3933 Mask = 0x1; 3934 break; 3935 case MVT::i8: 3936 Mask = 0xff; 3937 break; 3938 case MVT::i16: 3939 Mask = 0xffff; 3940 break; 3941 } 3942 // Issue an extract_subreg to get the lower 32-bits. 3943 unsigned Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, SrcIsKill, 3944 AArch64::sub_32); 3945 // Create the AND instruction which performs the actual truncation. 3946 ResultReg = emitAnd_ri(MVT::i32, Reg32, /*IsKill=*/true, Mask); 3947 assert(ResultReg && "Unexpected AND instruction emission failure."); 3948 } else { 3949 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3950 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3951 TII.get(TargetOpcode::COPY), ResultReg) 3952 .addReg(SrcReg, getKillRegState(SrcIsKill)); 3953 } 3954 3955 updateValueMap(I, ResultReg); 3956 return true; 3957 } 3958 3959 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 3960 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 3961 DestVT == MVT::i64) && 3962 "Unexpected value type."); 3963 // Handle i8 and i16 as i32. 3964 if (DestVT == MVT::i8 || DestVT == MVT::i16) 3965 DestVT = MVT::i32; 3966 3967 if (IsZExt) { 3968 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, /*TODO:IsKill=*/false, 1); 3969 assert(ResultReg && "Unexpected AND instruction emission failure."); 3970 if (DestVT == MVT::i64) { 3971 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 3972 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 3973 unsigned Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3974 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3975 TII.get(AArch64::SUBREG_TO_REG), Reg64) 3976 .addImm(0) 3977 .addReg(ResultReg) 3978 .addImm(AArch64::sub_32); 3979 ResultReg = Reg64; 3980 } 3981 return ResultReg; 3982 } else { 3983 if (DestVT == MVT::i64) { 3984 // FIXME: We're SExt i1 to i64. 3985 return 0; 3986 } 3987 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 3988 /*TODO:IsKill=*/false, 0, 0); 3989 } 3990 } 3991 3992 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 3993 unsigned Op1, bool Op1IsKill) { 3994 unsigned Opc, ZReg; 3995 switch (RetVT.SimpleTy) { 3996 default: return 0; 3997 case MVT::i8: 3998 case MVT::i16: 3999 case MVT::i32: 4000 RetVT = MVT::i32; 4001 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 4002 case MVT::i64: 4003 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 4004 } 4005 4006 const TargetRegisterClass *RC = 4007 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4008 return fastEmitInst_rrr(Opc, RC, Op0, Op0IsKill, Op1, Op1IsKill, 4009 /*IsKill=*/ZReg, true); 4010 } 4011 4012 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4013 unsigned Op1, bool Op1IsKill) { 4014 if (RetVT != MVT::i64) 4015 return 0; 4016 4017 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 4018 Op0, Op0IsKill, Op1, Op1IsKill, 4019 AArch64::XZR, /*IsKill=*/true); 4020 } 4021 4022 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, bool Op0IsKill, 4023 unsigned Op1, bool Op1IsKill) { 4024 if (RetVT != MVT::i64) 4025 return 0; 4026 4027 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 4028 Op0, Op0IsKill, Op1, Op1IsKill, 4029 AArch64::XZR, /*IsKill=*/true); 4030 } 4031 4032 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4033 unsigned Op1Reg, bool Op1IsKill) { 4034 unsigned Opc = 0; 4035 bool NeedTrunc = false; 4036 uint64_t Mask = 0; 4037 switch (RetVT.SimpleTy) { 4038 default: return 0; 4039 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4040 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4041 case MVT::i32: Opc = AArch64::LSLVWr; break; 4042 case MVT::i64: Opc = AArch64::LSLVXr; break; 4043 } 4044 4045 const TargetRegisterClass *RC = 4046 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4047 if (NeedTrunc) { 4048 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4049 Op1IsKill = true; 4050 } 4051 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4052 Op1IsKill); 4053 if (NeedTrunc) 4054 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4055 return ResultReg; 4056 } 4057 4058 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4059 bool Op0IsKill, uint64_t Shift, 4060 bool IsZExt) { 4061 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4062 "Unexpected source/return type pair."); 4063 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4064 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4065 "Unexpected source value type."); 4066 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4067 RetVT == MVT::i64) && "Unexpected return value type."); 4068 4069 bool Is64Bit = (RetVT == MVT::i64); 4070 unsigned RegSize = Is64Bit ? 64 : 32; 4071 unsigned DstBits = RetVT.getSizeInBits(); 4072 unsigned SrcBits = SrcVT.getSizeInBits(); 4073 const TargetRegisterClass *RC = 4074 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4075 4076 // Just emit a copy for "zero" shifts. 4077 if (Shift == 0) { 4078 if (RetVT == SrcVT) { 4079 unsigned ResultReg = createResultReg(RC); 4080 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4081 TII.get(TargetOpcode::COPY), ResultReg) 4082 .addReg(Op0, getKillRegState(Op0IsKill)); 4083 return ResultReg; 4084 } else 4085 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4086 } 4087 4088 // Don't deal with undefined shifts. 4089 if (Shift >= DstBits) 4090 return 0; 4091 4092 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4093 // {S|U}BFM Wd, Wn, #r, #s 4094 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4095 4096 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4097 // %2 = shl i16 %1, 4 4098 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4099 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4100 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4101 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4102 4103 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4104 // %2 = shl i16 %1, 8 4105 // Wd<32+7-24,32-24> = Wn<7:0> 4106 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4107 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4108 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4109 4110 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4111 // %2 = shl i16 %1, 12 4112 // Wd<32+3-20,32-20> = Wn<3:0> 4113 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4114 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4115 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4116 4117 unsigned ImmR = RegSize - Shift; 4118 // Limit the width to the length of the source type. 4119 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4120 static const unsigned OpcTable[2][2] = { 4121 {AArch64::SBFMWri, AArch64::SBFMXri}, 4122 {AArch64::UBFMWri, AArch64::UBFMXri} 4123 }; 4124 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4125 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4126 unsigned TmpReg = MRI.createVirtualRegister(RC); 4127 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4128 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4129 .addImm(0) 4130 .addReg(Op0, getKillRegState(Op0IsKill)) 4131 .addImm(AArch64::sub_32); 4132 Op0 = TmpReg; 4133 Op0IsKill = true; 4134 } 4135 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4136 } 4137 4138 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4139 unsigned Op1Reg, bool Op1IsKill) { 4140 unsigned Opc = 0; 4141 bool NeedTrunc = false; 4142 uint64_t Mask = 0; 4143 switch (RetVT.SimpleTy) { 4144 default: return 0; 4145 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4146 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4147 case MVT::i32: Opc = AArch64::LSRVWr; break; 4148 case MVT::i64: Opc = AArch64::LSRVXr; break; 4149 } 4150 4151 const TargetRegisterClass *RC = 4152 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4153 if (NeedTrunc) { 4154 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Op0IsKill, Mask); 4155 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4156 Op0IsKill = Op1IsKill = true; 4157 } 4158 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4159 Op1IsKill); 4160 if (NeedTrunc) 4161 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4162 return ResultReg; 4163 } 4164 4165 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4166 bool Op0IsKill, uint64_t Shift, 4167 bool IsZExt) { 4168 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4169 "Unexpected source/return type pair."); 4170 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4171 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4172 "Unexpected source value type."); 4173 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4174 RetVT == MVT::i64) && "Unexpected return value type."); 4175 4176 bool Is64Bit = (RetVT == MVT::i64); 4177 unsigned RegSize = Is64Bit ? 64 : 32; 4178 unsigned DstBits = RetVT.getSizeInBits(); 4179 unsigned SrcBits = SrcVT.getSizeInBits(); 4180 const TargetRegisterClass *RC = 4181 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4182 4183 // Just emit a copy for "zero" shifts. 4184 if (Shift == 0) { 4185 if (RetVT == SrcVT) { 4186 unsigned ResultReg = createResultReg(RC); 4187 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4188 TII.get(TargetOpcode::COPY), ResultReg) 4189 .addReg(Op0, getKillRegState(Op0IsKill)); 4190 return ResultReg; 4191 } else 4192 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4193 } 4194 4195 // Don't deal with undefined shifts. 4196 if (Shift >= DstBits) 4197 return 0; 4198 4199 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4200 // {S|U}BFM Wd, Wn, #r, #s 4201 // Wd<s-r:0> = Wn<s:r> when r <= s 4202 4203 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4204 // %2 = lshr i16 %1, 4 4205 // Wd<7-4:0> = Wn<7:4> 4206 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4207 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4208 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4209 4210 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4211 // %2 = lshr i16 %1, 8 4212 // Wd<7-7,0> = Wn<7:7> 4213 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4214 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4215 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4216 4217 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4218 // %2 = lshr i16 %1, 12 4219 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4220 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4221 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4222 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4223 4224 if (Shift >= SrcBits && IsZExt) 4225 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4226 4227 // It is not possible to fold a sign-extend into the LShr instruction. In this 4228 // case emit a sign-extend. 4229 if (!IsZExt) { 4230 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4231 if (!Op0) 4232 return 0; 4233 Op0IsKill = true; 4234 SrcVT = RetVT; 4235 SrcBits = SrcVT.getSizeInBits(); 4236 IsZExt = true; 4237 } 4238 4239 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4240 unsigned ImmS = SrcBits - 1; 4241 static const unsigned OpcTable[2][2] = { 4242 {AArch64::SBFMWri, AArch64::SBFMXri}, 4243 {AArch64::UBFMWri, AArch64::UBFMXri} 4244 }; 4245 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4246 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4247 unsigned TmpReg = MRI.createVirtualRegister(RC); 4248 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4249 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4250 .addImm(0) 4251 .addReg(Op0, getKillRegState(Op0IsKill)) 4252 .addImm(AArch64::sub_32); 4253 Op0 = TmpReg; 4254 Op0IsKill = true; 4255 } 4256 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4257 } 4258 4259 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, bool Op0IsKill, 4260 unsigned Op1Reg, bool Op1IsKill) { 4261 unsigned Opc = 0; 4262 bool NeedTrunc = false; 4263 uint64_t Mask = 0; 4264 switch (RetVT.SimpleTy) { 4265 default: return 0; 4266 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4267 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4268 case MVT::i32: Opc = AArch64::ASRVWr; break; 4269 case MVT::i64: Opc = AArch64::ASRVXr; break; 4270 } 4271 4272 const TargetRegisterClass *RC = 4273 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4274 if (NeedTrunc) { 4275 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4276 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Op1IsKill, Mask); 4277 Op0IsKill = Op1IsKill = true; 4278 } 4279 unsigned ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op0IsKill, Op1Reg, 4280 Op1IsKill); 4281 if (NeedTrunc) 4282 ResultReg = emitAnd_ri(MVT::i32, ResultReg, /*IsKill=*/true, Mask); 4283 return ResultReg; 4284 } 4285 4286 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4287 bool Op0IsKill, uint64_t Shift, 4288 bool IsZExt) { 4289 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4290 "Unexpected source/return type pair."); 4291 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4292 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4293 "Unexpected source value type."); 4294 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4295 RetVT == MVT::i64) && "Unexpected return value type."); 4296 4297 bool Is64Bit = (RetVT == MVT::i64); 4298 unsigned RegSize = Is64Bit ? 64 : 32; 4299 unsigned DstBits = RetVT.getSizeInBits(); 4300 unsigned SrcBits = SrcVT.getSizeInBits(); 4301 const TargetRegisterClass *RC = 4302 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4303 4304 // Just emit a copy for "zero" shifts. 4305 if (Shift == 0) { 4306 if (RetVT == SrcVT) { 4307 unsigned ResultReg = createResultReg(RC); 4308 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4309 TII.get(TargetOpcode::COPY), ResultReg) 4310 .addReg(Op0, getKillRegState(Op0IsKill)); 4311 return ResultReg; 4312 } else 4313 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4314 } 4315 4316 // Don't deal with undefined shifts. 4317 if (Shift >= DstBits) 4318 return 0; 4319 4320 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4321 // {S|U}BFM Wd, Wn, #r, #s 4322 // Wd<s-r:0> = Wn<s:r> when r <= s 4323 4324 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4325 // %2 = ashr i16 %1, 4 4326 // Wd<7-4:0> = Wn<7:4> 4327 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4328 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4329 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4330 4331 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4332 // %2 = ashr i16 %1, 8 4333 // Wd<7-7,0> = Wn<7:7> 4334 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4335 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4336 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4337 4338 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4339 // %2 = ashr i16 %1, 12 4340 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4341 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4342 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4343 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4344 4345 if (Shift >= SrcBits && IsZExt) 4346 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4347 4348 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4349 unsigned ImmS = SrcBits - 1; 4350 static const unsigned OpcTable[2][2] = { 4351 {AArch64::SBFMWri, AArch64::SBFMXri}, 4352 {AArch64::UBFMWri, AArch64::UBFMXri} 4353 }; 4354 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4355 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4356 unsigned TmpReg = MRI.createVirtualRegister(RC); 4357 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4358 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4359 .addImm(0) 4360 .addReg(Op0, getKillRegState(Op0IsKill)) 4361 .addImm(AArch64::sub_32); 4362 Op0 = TmpReg; 4363 Op0IsKill = true; 4364 } 4365 return fastEmitInst_rii(Opc, RC, Op0, Op0IsKill, ImmR, ImmS); 4366 } 4367 4368 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4369 bool IsZExt) { 4370 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4371 4372 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4373 // DestVT are odd things, so test to make sure that they are both types we can 4374 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4375 // bail out to SelectionDAG. 4376 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4377 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4378 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4379 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4380 return 0; 4381 4382 unsigned Opc; 4383 unsigned Imm = 0; 4384 4385 switch (SrcVT.SimpleTy) { 4386 default: 4387 return 0; 4388 case MVT::i1: 4389 return emiti1Ext(SrcReg, DestVT, IsZExt); 4390 case MVT::i8: 4391 if (DestVT == MVT::i64) 4392 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4393 else 4394 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4395 Imm = 7; 4396 break; 4397 case MVT::i16: 4398 if (DestVT == MVT::i64) 4399 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4400 else 4401 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4402 Imm = 15; 4403 break; 4404 case MVT::i32: 4405 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4406 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4407 Imm = 31; 4408 break; 4409 } 4410 4411 // Handle i8 and i16 as i32. 4412 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4413 DestVT = MVT::i32; 4414 else if (DestVT == MVT::i64) { 4415 unsigned Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4416 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4417 TII.get(AArch64::SUBREG_TO_REG), Src64) 4418 .addImm(0) 4419 .addReg(SrcReg) 4420 .addImm(AArch64::sub_32); 4421 SrcReg = Src64; 4422 } 4423 4424 const TargetRegisterClass *RC = 4425 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4426 return fastEmitInst_rii(Opc, RC, SrcReg, /*TODO:IsKill=*/false, 0, Imm); 4427 } 4428 4429 static bool isZExtLoad(const MachineInstr *LI) { 4430 switch (LI->getOpcode()) { 4431 default: 4432 return false; 4433 case AArch64::LDURBBi: 4434 case AArch64::LDURHHi: 4435 case AArch64::LDURWi: 4436 case AArch64::LDRBBui: 4437 case AArch64::LDRHHui: 4438 case AArch64::LDRWui: 4439 case AArch64::LDRBBroX: 4440 case AArch64::LDRHHroX: 4441 case AArch64::LDRWroX: 4442 case AArch64::LDRBBroW: 4443 case AArch64::LDRHHroW: 4444 case AArch64::LDRWroW: 4445 return true; 4446 } 4447 } 4448 4449 static bool isSExtLoad(const MachineInstr *LI) { 4450 switch (LI->getOpcode()) { 4451 default: 4452 return false; 4453 case AArch64::LDURSBWi: 4454 case AArch64::LDURSHWi: 4455 case AArch64::LDURSBXi: 4456 case AArch64::LDURSHXi: 4457 case AArch64::LDURSWi: 4458 case AArch64::LDRSBWui: 4459 case AArch64::LDRSHWui: 4460 case AArch64::LDRSBXui: 4461 case AArch64::LDRSHXui: 4462 case AArch64::LDRSWui: 4463 case AArch64::LDRSBWroX: 4464 case AArch64::LDRSHWroX: 4465 case AArch64::LDRSBXroX: 4466 case AArch64::LDRSHXroX: 4467 case AArch64::LDRSWroX: 4468 case AArch64::LDRSBWroW: 4469 case AArch64::LDRSHWroW: 4470 case AArch64::LDRSBXroW: 4471 case AArch64::LDRSHXroW: 4472 case AArch64::LDRSWroW: 4473 return true; 4474 } 4475 } 4476 4477 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4478 MVT SrcVT) { 4479 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4480 if (!LI || !LI->hasOneUse()) 4481 return false; 4482 4483 // Check if the load instruction has already been selected. 4484 unsigned Reg = lookUpRegForValue(LI); 4485 if (!Reg) 4486 return false; 4487 4488 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4489 if (!MI) 4490 return false; 4491 4492 // Check if the correct load instruction has been emitted - SelectionDAG might 4493 // have emitted a zero-extending load, but we need a sign-extending load. 4494 bool IsZExt = isa<ZExtInst>(I); 4495 const auto *LoadMI = MI; 4496 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4497 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4498 unsigned LoadReg = MI->getOperand(1).getReg(); 4499 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4500 assert(LoadMI && "Expected valid instruction"); 4501 } 4502 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4503 return false; 4504 4505 // Nothing to be done. 4506 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4507 updateValueMap(I, Reg); 4508 return true; 4509 } 4510 4511 if (IsZExt) { 4512 unsigned Reg64 = createResultReg(&AArch64::GPR64RegClass); 4513 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4514 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4515 .addImm(0) 4516 .addReg(Reg, getKillRegState(true)) 4517 .addImm(AArch64::sub_32); 4518 Reg = Reg64; 4519 } else { 4520 assert((MI->getOpcode() == TargetOpcode::COPY && 4521 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4522 "Expected copy instruction"); 4523 Reg = MI->getOperand(1).getReg(); 4524 MachineBasicBlock::iterator I(MI); 4525 removeDeadCode(I, std::next(I)); 4526 } 4527 updateValueMap(I, Reg); 4528 return true; 4529 } 4530 4531 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4532 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4533 "Unexpected integer extend instruction."); 4534 MVT RetVT; 4535 MVT SrcVT; 4536 if (!isTypeSupported(I->getType(), RetVT)) 4537 return false; 4538 4539 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4540 return false; 4541 4542 // Try to optimize already sign-/zero-extended values from load instructions. 4543 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4544 return true; 4545 4546 unsigned SrcReg = getRegForValue(I->getOperand(0)); 4547 if (!SrcReg) 4548 return false; 4549 bool SrcIsKill = hasTrivialKill(I->getOperand(0)); 4550 4551 // Try to optimize already sign-/zero-extended values from function arguments. 4552 bool IsZExt = isa<ZExtInst>(I); 4553 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4554 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4555 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4556 unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); 4557 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4558 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4559 .addImm(0) 4560 .addReg(SrcReg, getKillRegState(SrcIsKill)) 4561 .addImm(AArch64::sub_32); 4562 SrcReg = ResultReg; 4563 } 4564 // Conservatively clear all kill flags from all uses, because we are 4565 // replacing a sign-/zero-extend instruction at IR level with a nop at MI 4566 // level. The result of the instruction at IR level might have been 4567 // trivially dead, which is now not longer true. 4568 unsigned UseReg = lookUpRegForValue(I); 4569 if (UseReg) 4570 MRI.clearKillFlags(UseReg); 4571 4572 updateValueMap(I, SrcReg); 4573 return true; 4574 } 4575 } 4576 4577 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4578 if (!ResultReg) 4579 return false; 4580 4581 updateValueMap(I, ResultReg); 4582 return true; 4583 } 4584 4585 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4586 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4587 if (!DestEVT.isSimple()) 4588 return false; 4589 4590 MVT DestVT = DestEVT.getSimpleVT(); 4591 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4592 return false; 4593 4594 unsigned DivOpc; 4595 bool Is64bit = (DestVT == MVT::i64); 4596 switch (ISDOpcode) { 4597 default: 4598 return false; 4599 case ISD::SREM: 4600 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4601 break; 4602 case ISD::UREM: 4603 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4604 break; 4605 } 4606 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4607 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4608 if (!Src0Reg) 4609 return false; 4610 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4611 4612 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4613 if (!Src1Reg) 4614 return false; 4615 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4616 4617 const TargetRegisterClass *RC = 4618 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4619 unsigned QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, /*IsKill=*/false, 4620 Src1Reg, /*IsKill=*/false); 4621 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4622 // The remainder is computed as numerator - (quotient * denominator) using the 4623 // MSUB instruction. 4624 unsigned ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, /*IsKill=*/true, 4625 Src1Reg, Src1IsKill, Src0Reg, 4626 Src0IsKill); 4627 updateValueMap(I, ResultReg); 4628 return true; 4629 } 4630 4631 bool AArch64FastISel::selectMul(const Instruction *I) { 4632 MVT VT; 4633 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4634 return false; 4635 4636 if (VT.isVector()) 4637 return selectBinaryOp(I, ISD::MUL); 4638 4639 const Value *Src0 = I->getOperand(0); 4640 const Value *Src1 = I->getOperand(1); 4641 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4642 if (C->getValue().isPowerOf2()) 4643 std::swap(Src0, Src1); 4644 4645 // Try to simplify to a shift instruction. 4646 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4647 if (C->getValue().isPowerOf2()) { 4648 uint64_t ShiftVal = C->getValue().logBase2(); 4649 MVT SrcVT = VT; 4650 bool IsZExt = true; 4651 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4652 if (!isIntExtFree(ZExt)) { 4653 MVT VT; 4654 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4655 SrcVT = VT; 4656 IsZExt = true; 4657 Src0 = ZExt->getOperand(0); 4658 } 4659 } 4660 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4661 if (!isIntExtFree(SExt)) { 4662 MVT VT; 4663 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4664 SrcVT = VT; 4665 IsZExt = false; 4666 Src0 = SExt->getOperand(0); 4667 } 4668 } 4669 } 4670 4671 unsigned Src0Reg = getRegForValue(Src0); 4672 if (!Src0Reg) 4673 return false; 4674 bool Src0IsKill = hasTrivialKill(Src0); 4675 4676 unsigned ResultReg = 4677 emitLSL_ri(VT, SrcVT, Src0Reg, Src0IsKill, ShiftVal, IsZExt); 4678 4679 if (ResultReg) { 4680 updateValueMap(I, ResultReg); 4681 return true; 4682 } 4683 } 4684 4685 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4686 if (!Src0Reg) 4687 return false; 4688 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4689 4690 unsigned Src1Reg = getRegForValue(I->getOperand(1)); 4691 if (!Src1Reg) 4692 return false; 4693 bool Src1IsKill = hasTrivialKill(I->getOperand(1)); 4694 4695 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src0IsKill, Src1Reg, Src1IsKill); 4696 4697 if (!ResultReg) 4698 return false; 4699 4700 updateValueMap(I, ResultReg); 4701 return true; 4702 } 4703 4704 bool AArch64FastISel::selectShift(const Instruction *I) { 4705 MVT RetVT; 4706 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4707 return false; 4708 4709 if (RetVT.isVector()) 4710 return selectOperator(I, I->getOpcode()); 4711 4712 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4713 unsigned ResultReg = 0; 4714 uint64_t ShiftVal = C->getZExtValue(); 4715 MVT SrcVT = RetVT; 4716 bool IsZExt = I->getOpcode() != Instruction::AShr; 4717 const Value *Op0 = I->getOperand(0); 4718 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4719 if (!isIntExtFree(ZExt)) { 4720 MVT TmpVT; 4721 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4722 SrcVT = TmpVT; 4723 IsZExt = true; 4724 Op0 = ZExt->getOperand(0); 4725 } 4726 } 4727 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4728 if (!isIntExtFree(SExt)) { 4729 MVT TmpVT; 4730 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4731 SrcVT = TmpVT; 4732 IsZExt = false; 4733 Op0 = SExt->getOperand(0); 4734 } 4735 } 4736 } 4737 4738 unsigned Op0Reg = getRegForValue(Op0); 4739 if (!Op0Reg) 4740 return false; 4741 bool Op0IsKill = hasTrivialKill(Op0); 4742 4743 switch (I->getOpcode()) { 4744 default: llvm_unreachable("Unexpected instruction."); 4745 case Instruction::Shl: 4746 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4747 break; 4748 case Instruction::AShr: 4749 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4750 break; 4751 case Instruction::LShr: 4752 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, Op0IsKill, ShiftVal, IsZExt); 4753 break; 4754 } 4755 if (!ResultReg) 4756 return false; 4757 4758 updateValueMap(I, ResultReg); 4759 return true; 4760 } 4761 4762 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4763 if (!Op0Reg) 4764 return false; 4765 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4766 4767 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 4768 if (!Op1Reg) 4769 return false; 4770 bool Op1IsKill = hasTrivialKill(I->getOperand(1)); 4771 4772 unsigned ResultReg = 0; 4773 switch (I->getOpcode()) { 4774 default: llvm_unreachable("Unexpected instruction."); 4775 case Instruction::Shl: 4776 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4777 break; 4778 case Instruction::AShr: 4779 ResultReg = emitASR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4780 break; 4781 case Instruction::LShr: 4782 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op0IsKill, Op1Reg, Op1IsKill); 4783 break; 4784 } 4785 4786 if (!ResultReg) 4787 return false; 4788 4789 updateValueMap(I, ResultReg); 4790 return true; 4791 } 4792 4793 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4794 MVT RetVT, SrcVT; 4795 4796 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4797 return false; 4798 if (!isTypeLegal(I->getType(), RetVT)) 4799 return false; 4800 4801 unsigned Opc; 4802 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4803 Opc = AArch64::FMOVWSr; 4804 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4805 Opc = AArch64::FMOVXDr; 4806 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4807 Opc = AArch64::FMOVSWr; 4808 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4809 Opc = AArch64::FMOVDXr; 4810 else 4811 return false; 4812 4813 const TargetRegisterClass *RC = nullptr; 4814 switch (RetVT.SimpleTy) { 4815 default: llvm_unreachable("Unexpected value type."); 4816 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4817 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4818 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4819 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4820 } 4821 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 4822 if (!Op0Reg) 4823 return false; 4824 bool Op0IsKill = hasTrivialKill(I->getOperand(0)); 4825 unsigned ResultReg = fastEmitInst_r(Opc, RC, Op0Reg, Op0IsKill); 4826 4827 if (!ResultReg) 4828 return false; 4829 4830 updateValueMap(I, ResultReg); 4831 return true; 4832 } 4833 4834 bool AArch64FastISel::selectFRem(const Instruction *I) { 4835 MVT RetVT; 4836 if (!isTypeLegal(I->getType(), RetVT)) 4837 return false; 4838 4839 RTLIB::Libcall LC; 4840 switch (RetVT.SimpleTy) { 4841 default: 4842 return false; 4843 case MVT::f32: 4844 LC = RTLIB::REM_F32; 4845 break; 4846 case MVT::f64: 4847 LC = RTLIB::REM_F64; 4848 break; 4849 } 4850 4851 ArgListTy Args; 4852 Args.reserve(I->getNumOperands()); 4853 4854 // Populate the argument list. 4855 for (auto &Arg : I->operands()) { 4856 ArgListEntry Entry; 4857 Entry.Val = Arg; 4858 Entry.Ty = Arg->getType(); 4859 Args.push_back(Entry); 4860 } 4861 4862 CallLoweringInfo CLI; 4863 MCContext &Ctx = MF->getContext(); 4864 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4865 TLI.getLibcallName(LC), std::move(Args)); 4866 if (!lowerCallTo(CLI)) 4867 return false; 4868 updateValueMap(I, CLI.ResultReg); 4869 return true; 4870 } 4871 4872 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4873 MVT VT; 4874 if (!isTypeLegal(I->getType(), VT)) 4875 return false; 4876 4877 if (!isa<ConstantInt>(I->getOperand(1))) 4878 return selectBinaryOp(I, ISD::SDIV); 4879 4880 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4881 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4882 !(C.isPowerOf2() || (-C).isPowerOf2())) 4883 return selectBinaryOp(I, ISD::SDIV); 4884 4885 unsigned Lg2 = C.countTrailingZeros(); 4886 unsigned Src0Reg = getRegForValue(I->getOperand(0)); 4887 if (!Src0Reg) 4888 return false; 4889 bool Src0IsKill = hasTrivialKill(I->getOperand(0)); 4890 4891 if (cast<BinaryOperator>(I)->isExact()) { 4892 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2); 4893 if (!ResultReg) 4894 return false; 4895 updateValueMap(I, ResultReg); 4896 return true; 4897 } 4898 4899 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4900 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, /*IsKill=*/false, Pow2MinusOne); 4901 if (!AddReg) 4902 return false; 4903 4904 // (Src0 < 0) ? Pow2 - 1 : 0; 4905 if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0)) 4906 return false; 4907 4908 unsigned SelectOpc; 4909 const TargetRegisterClass *RC; 4910 if (VT == MVT::i64) { 4911 SelectOpc = AArch64::CSELXr; 4912 RC = &AArch64::GPR64RegClass; 4913 } else { 4914 SelectOpc = AArch64::CSELWr; 4915 RC = &AArch64::GPR32RegClass; 4916 } 4917 unsigned SelectReg = 4918 fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg, 4919 Src0IsKill, AArch64CC::LT); 4920 if (!SelectReg) 4921 return false; 4922 4923 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4924 // negate the result. 4925 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4926 unsigned ResultReg; 4927 if (C.isNegative()) 4928 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true, 4929 SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2); 4930 else 4931 ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2); 4932 4933 if (!ResultReg) 4934 return false; 4935 4936 updateValueMap(I, ResultReg); 4937 return true; 4938 } 4939 4940 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4941 /// have to duplicate it for AArch64, because otherwise we would fail during the 4942 /// sign-extend emission. 4943 std::pair<unsigned, bool> AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4944 unsigned IdxN = getRegForValue(Idx); 4945 if (IdxN == 0) 4946 // Unhandled operand. Halt "fast" selection and bail. 4947 return std::pair<unsigned, bool>(0, false); 4948 4949 bool IdxNIsKill = hasTrivialKill(Idx); 4950 4951 // If the index is smaller or larger than intptr_t, truncate or extend it. 4952 MVT PtrVT = TLI.getPointerTy(DL); 4953 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4954 if (IdxVT.bitsLT(PtrVT)) { 4955 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 4956 IdxNIsKill = true; 4957 } else if (IdxVT.bitsGT(PtrVT)) 4958 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4959 return std::pair<unsigned, bool>(IdxN, IdxNIsKill); 4960 } 4961 4962 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4963 /// duplicate it for AArch64, because otherwise we would bail out even for 4964 /// simple cases. This is because the standard fastEmit functions don't cover 4965 /// MUL at all and ADD is lowered very inefficientily. 4966 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4967 unsigned N = getRegForValue(I->getOperand(0)); 4968 if (!N) 4969 return false; 4970 bool NIsKill = hasTrivialKill(I->getOperand(0)); 4971 4972 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4973 // into a single N = N + TotalOffset. 4974 uint64_t TotalOffs = 0; 4975 MVT VT = TLI.getPointerTy(DL); 4976 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4977 GTI != E; ++GTI) { 4978 const Value *Idx = GTI.getOperand(); 4979 if (auto *StTy = GTI.getStructTypeOrNull()) { 4980 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4981 // N = N + Offset 4982 if (Field) 4983 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4984 } else { 4985 Type *Ty = GTI.getIndexedType(); 4986 4987 // If this is a constant subscript, handle it quickly. 4988 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4989 if (CI->isZero()) 4990 continue; 4991 // N = N + Offset 4992 TotalOffs += 4993 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 4994 continue; 4995 } 4996 if (TotalOffs) { 4997 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 4998 if (!N) 4999 return false; 5000 NIsKill = true; 5001 TotalOffs = 0; 5002 } 5003 5004 // N = N + Idx * ElementSize; 5005 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 5006 std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx); 5007 unsigned IdxN = Pair.first; 5008 bool IdxNIsKill = Pair.second; 5009 if (!IdxN) 5010 return false; 5011 5012 if (ElementSize != 1) { 5013 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 5014 if (!C) 5015 return false; 5016 IdxN = emitMul_rr(VT, IdxN, IdxNIsKill, C, true); 5017 if (!IdxN) 5018 return false; 5019 IdxNIsKill = true; 5020 } 5021 N = fastEmit_rr(VT, VT, ISD::ADD, N, NIsKill, IdxN, IdxNIsKill); 5022 if (!N) 5023 return false; 5024 } 5025 } 5026 if (TotalOffs) { 5027 N = emitAdd_ri_(VT, N, NIsKill, TotalOffs); 5028 if (!N) 5029 return false; 5030 } 5031 updateValueMap(I, N); 5032 return true; 5033 } 5034 5035 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 5036 assert(TM.getOptLevel() == CodeGenOpt::None && 5037 "cmpxchg survived AtomicExpand at optlevel > -O0"); 5038 5039 auto *RetPairTy = cast<StructType>(I->getType()); 5040 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 5041 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 5042 "cmpxchg has a non-i1 status result"); 5043 5044 MVT VT; 5045 if (!isTypeLegal(RetTy, VT)) 5046 return false; 5047 5048 const TargetRegisterClass *ResRC; 5049 unsigned Opc, CmpOpc; 5050 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 5051 // extractvalue selection doesn't support that. 5052 if (VT == MVT::i32) { 5053 Opc = AArch64::CMP_SWAP_32; 5054 CmpOpc = AArch64::SUBSWrs; 5055 ResRC = &AArch64::GPR32RegClass; 5056 } else if (VT == MVT::i64) { 5057 Opc = AArch64::CMP_SWAP_64; 5058 CmpOpc = AArch64::SUBSXrs; 5059 ResRC = &AArch64::GPR64RegClass; 5060 } else { 5061 return false; 5062 } 5063 5064 const MCInstrDesc &II = TII.get(Opc); 5065 5066 const unsigned AddrReg = constrainOperandRegClass( 5067 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 5068 const unsigned DesiredReg = constrainOperandRegClass( 5069 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 5070 const unsigned NewReg = constrainOperandRegClass( 5071 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5072 5073 const unsigned ResultReg1 = createResultReg(ResRC); 5074 const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5075 const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5076 5077 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5078 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 5079 .addDef(ResultReg1) 5080 .addDef(ScratchReg) 5081 .addUse(AddrReg) 5082 .addUse(DesiredReg) 5083 .addUse(NewReg); 5084 5085 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 5086 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5087 .addUse(ResultReg1) 5088 .addUse(DesiredReg) 5089 .addImm(0); 5090 5091 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) 5092 .addDef(ResultReg2) 5093 .addUse(AArch64::WZR) 5094 .addUse(AArch64::WZR) 5095 .addImm(AArch64CC::NE); 5096 5097 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5098 updateValueMap(I, ResultReg1, 2); 5099 return true; 5100 } 5101 5102 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5103 switch (I->getOpcode()) { 5104 default: 5105 break; 5106 case Instruction::Add: 5107 case Instruction::Sub: 5108 return selectAddSub(I); 5109 case Instruction::Mul: 5110 return selectMul(I); 5111 case Instruction::SDiv: 5112 return selectSDiv(I); 5113 case Instruction::SRem: 5114 if (!selectBinaryOp(I, ISD::SREM)) 5115 return selectRem(I, ISD::SREM); 5116 return true; 5117 case Instruction::URem: 5118 if (!selectBinaryOp(I, ISD::UREM)) 5119 return selectRem(I, ISD::UREM); 5120 return true; 5121 case Instruction::Shl: 5122 case Instruction::LShr: 5123 case Instruction::AShr: 5124 return selectShift(I); 5125 case Instruction::And: 5126 case Instruction::Or: 5127 case Instruction::Xor: 5128 return selectLogicalOp(I); 5129 case Instruction::Br: 5130 return selectBranch(I); 5131 case Instruction::IndirectBr: 5132 return selectIndirectBr(I); 5133 case Instruction::BitCast: 5134 if (!FastISel::selectBitCast(I)) 5135 return selectBitCast(I); 5136 return true; 5137 case Instruction::FPToSI: 5138 if (!selectCast(I, ISD::FP_TO_SINT)) 5139 return selectFPToInt(I, /*Signed=*/true); 5140 return true; 5141 case Instruction::FPToUI: 5142 return selectFPToInt(I, /*Signed=*/false); 5143 case Instruction::ZExt: 5144 case Instruction::SExt: 5145 return selectIntExt(I); 5146 case Instruction::Trunc: 5147 if (!selectCast(I, ISD::TRUNCATE)) 5148 return selectTrunc(I); 5149 return true; 5150 case Instruction::FPExt: 5151 return selectFPExt(I); 5152 case Instruction::FPTrunc: 5153 return selectFPTrunc(I); 5154 case Instruction::SIToFP: 5155 if (!selectCast(I, ISD::SINT_TO_FP)) 5156 return selectIntToFP(I, /*Signed=*/true); 5157 return true; 5158 case Instruction::UIToFP: 5159 return selectIntToFP(I, /*Signed=*/false); 5160 case Instruction::Load: 5161 return selectLoad(I); 5162 case Instruction::Store: 5163 return selectStore(I); 5164 case Instruction::FCmp: 5165 case Instruction::ICmp: 5166 return selectCmp(I); 5167 case Instruction::Select: 5168 return selectSelect(I); 5169 case Instruction::Ret: 5170 return selectRet(I); 5171 case Instruction::FRem: 5172 return selectFRem(I); 5173 case Instruction::GetElementPtr: 5174 return selectGetElementPtr(I); 5175 case Instruction::AtomicCmpXchg: 5176 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5177 } 5178 5179 // fall-back to target-independent instruction selection. 5180 return selectOperator(I, I->getOpcode()); 5181 } 5182 5183 namespace llvm { 5184 5185 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5186 const TargetLibraryInfo *LibInfo) { 5187 return new AArch64FastISel(FuncInfo, LibInfo); 5188 } 5189 5190 } // end namespace llvm 5191