1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/APFloat.h" 23 #include "llvm/ADT/APInt.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/Analysis/BranchProbabilityInfo.h" 27 #include "llvm/CodeGen/CallingConvLower.h" 28 #include "llvm/CodeGen/FastISel.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineConstantPool.h" 33 #include "llvm/CodeGen/MachineFrameInfo.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineMemOperand.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/RuntimeLibcalls.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/Argument.h" 41 #include "llvm/IR/Attributes.h" 42 #include "llvm/IR/BasicBlock.h" 43 #include "llvm/IR/CallingConv.h" 44 #include "llvm/IR/Constant.h" 45 #include "llvm/IR/Constants.h" 46 #include "llvm/IR/DataLayout.h" 47 #include "llvm/IR/DerivedTypes.h" 48 #include "llvm/IR/Function.h" 49 #include "llvm/IR/GetElementPtrTypeIterator.h" 50 #include "llvm/IR/GlobalValue.h" 51 #include "llvm/IR/InstrTypes.h" 52 #include "llvm/IR/Instruction.h" 53 #include "llvm/IR/Instructions.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/Intrinsics.h" 56 #include "llvm/IR/Operator.h" 57 #include "llvm/IR/Type.h" 58 #include "llvm/IR/User.h" 59 #include "llvm/IR/Value.h" 60 #include "llvm/MC/MCInstrDesc.h" 61 #include "llvm/MC/MCRegisterInfo.h" 62 #include "llvm/MC/MCSymbol.h" 63 #include "llvm/Support/AtomicOrdering.h" 64 #include "llvm/Support/Casting.h" 65 #include "llvm/Support/CodeGen.h" 66 #include "llvm/Support/Compiler.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/MachineValueType.h" 69 #include "llvm/Support/MathExtras.h" 70 #include <algorithm> 71 #include <cassert> 72 #include <cstdint> 73 #include <iterator> 74 #include <utility> 75 76 using namespace llvm; 77 78 namespace { 79 80 class AArch64FastISel final : public FastISel { 81 class Address { 82 public: 83 using BaseKind = enum { 84 RegBase, 85 FrameIndexBase 86 }; 87 88 private: 89 BaseKind Kind = RegBase; 90 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 91 union { 92 unsigned Reg; 93 int FI; 94 } Base; 95 unsigned OffsetReg = 0; 96 unsigned Shift = 0; 97 int64_t Offset = 0; 98 const GlobalValue *GV = nullptr; 99 100 public: 101 Address() { Base.Reg = 0; } 102 103 void setKind(BaseKind K) { Kind = K; } 104 BaseKind getKind() const { return Kind; } 105 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 106 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 107 bool isRegBase() const { return Kind == RegBase; } 108 bool isFIBase() const { return Kind == FrameIndexBase; } 109 110 void setReg(unsigned Reg) { 111 assert(isRegBase() && "Invalid base register access!"); 112 Base.Reg = Reg; 113 } 114 115 unsigned getReg() const { 116 assert(isRegBase() && "Invalid base register access!"); 117 return Base.Reg; 118 } 119 120 void setOffsetReg(unsigned Reg) { 121 OffsetReg = Reg; 122 } 123 124 unsigned getOffsetReg() const { 125 return OffsetReg; 126 } 127 128 void setFI(unsigned FI) { 129 assert(isFIBase() && "Invalid base frame index access!"); 130 Base.FI = FI; 131 } 132 133 unsigned getFI() const { 134 assert(isFIBase() && "Invalid base frame index access!"); 135 return Base.FI; 136 } 137 138 void setOffset(int64_t O) { Offset = O; } 139 int64_t getOffset() { return Offset; } 140 void setShift(unsigned S) { Shift = S; } 141 unsigned getShift() { return Shift; } 142 143 void setGlobalValue(const GlobalValue *G) { GV = G; } 144 const GlobalValue *getGlobalValue() { return GV; } 145 }; 146 147 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 148 /// make the right decision when generating code for different targets. 149 const AArch64Subtarget *Subtarget; 150 LLVMContext *Context; 151 152 bool fastLowerArguments() override; 153 bool fastLowerCall(CallLoweringInfo &CLI) override; 154 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 155 156 private: 157 // Selection routines. 158 bool selectAddSub(const Instruction *I); 159 bool selectLogicalOp(const Instruction *I); 160 bool selectLoad(const Instruction *I); 161 bool selectStore(const Instruction *I); 162 bool selectBranch(const Instruction *I); 163 bool selectIndirectBr(const Instruction *I); 164 bool selectCmp(const Instruction *I); 165 bool selectSelect(const Instruction *I); 166 bool selectFPExt(const Instruction *I); 167 bool selectFPTrunc(const Instruction *I); 168 bool selectFPToInt(const Instruction *I, bool Signed); 169 bool selectIntToFP(const Instruction *I, bool Signed); 170 bool selectRem(const Instruction *I, unsigned ISDOpcode); 171 bool selectRet(const Instruction *I); 172 bool selectTrunc(const Instruction *I); 173 bool selectIntExt(const Instruction *I); 174 bool selectMul(const Instruction *I); 175 bool selectShift(const Instruction *I); 176 bool selectBitCast(const Instruction *I); 177 bool selectFRem(const Instruction *I); 178 bool selectSDiv(const Instruction *I); 179 bool selectGetElementPtr(const Instruction *I); 180 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 181 182 // Utility helper routines. 183 bool isTypeLegal(Type *Ty, MVT &VT); 184 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 185 bool isValueAvailable(const Value *V) const; 186 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 187 bool computeCallAddress(const Value *V, Address &Addr); 188 bool simplifyAddress(Address &Addr, MVT VT); 189 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 190 MachineMemOperand::Flags Flags, 191 unsigned ScaleFactor, MachineMemOperand *MMO); 192 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 193 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 194 unsigned Alignment); 195 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 196 const Value *Cond); 197 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 198 bool optimizeSelect(const SelectInst *SI); 199 unsigned getRegForGEPIndex(const Value *Idx); 200 201 // Emit helper routines. 202 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 203 const Value *RHS, bool SetFlags = false, 204 bool WantResult = true, bool IsZExt = false); 205 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 206 unsigned RHSReg, bool SetFlags = false, 207 bool WantResult = true); 208 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 209 uint64_t Imm, bool SetFlags = false, 210 bool WantResult = true); 211 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 212 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, 213 uint64_t ShiftImm, bool SetFlags = false, 214 bool WantResult = true); 215 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 216 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, 217 uint64_t ShiftImm, bool SetFlags = false, 218 bool WantResult = true); 219 220 // Emit functions. 221 bool emitCompareAndBranch(const BranchInst *BI); 222 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 223 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 225 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 226 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 227 MachineMemOperand *MMO = nullptr); 228 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 229 MachineMemOperand *MMO = nullptr); 230 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 231 MachineMemOperand *MMO = nullptr); 232 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 233 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 235 bool SetFlags = false, bool WantResult = true, 236 bool IsZExt = false); 237 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); 238 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 239 bool SetFlags = false, bool WantResult = true, 240 bool IsZExt = false); 241 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 242 bool WantResult = true); 243 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 244 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 245 bool WantResult = true); 246 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 247 const Value *RHS); 248 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 249 uint64_t Imm); 250 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 251 unsigned RHSReg, uint64_t ShiftImm); 252 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 253 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); 254 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 255 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 256 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 257 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 258 bool IsZExt = true); 259 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 260 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 261 bool IsZExt = true); 262 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 263 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 264 bool IsZExt = false); 265 266 unsigned materializeInt(const ConstantInt *CI, MVT VT); 267 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 268 unsigned materializeGV(const GlobalValue *GV); 269 270 // Call handling routines. 271 private: 272 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 273 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 274 unsigned &NumBytes); 275 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 276 277 public: 278 // Backend specific FastISel code. 279 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 280 unsigned fastMaterializeConstant(const Constant *C) override; 281 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 282 283 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 284 const TargetLibraryInfo *LibInfo) 285 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 286 Subtarget = 287 &static_cast<const AArch64Subtarget &>(FuncInfo.MF->getSubtarget()); 288 Context = &FuncInfo.Fn->getContext(); 289 } 290 291 bool fastSelectInstruction(const Instruction *I) override; 292 293 #include "AArch64GenFastISel.inc" 294 }; 295 296 } // end anonymous namespace 297 298 /// Check if the sign-/zero-extend will be a noop. 299 static bool isIntExtFree(const Instruction *I) { 300 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 301 "Unexpected integer extend instruction."); 302 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 303 "Unexpected value type."); 304 bool IsZExt = isa<ZExtInst>(I); 305 306 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 307 if (LI->hasOneUse()) 308 return true; 309 310 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 311 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 312 return true; 313 314 return false; 315 } 316 317 /// Determine the implicit scale factor that is applied by a memory 318 /// operation for a given value type. 319 static unsigned getImplicitScaleFactor(MVT VT) { 320 switch (VT.SimpleTy) { 321 default: 322 return 0; // invalid 323 case MVT::i1: // fall-through 324 case MVT::i8: 325 return 1; 326 case MVT::i16: 327 return 2; 328 case MVT::i32: // fall-through 329 case MVT::f32: 330 return 4; 331 case MVT::i64: // fall-through 332 case MVT::f64: 333 return 8; 334 } 335 } 336 337 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 338 if (CC == CallingConv::WebKit_JS) 339 return CC_AArch64_WebKit_JS; 340 if (CC == CallingConv::GHC) 341 return CC_AArch64_GHC; 342 if (CC == CallingConv::CFGuard_Check) 343 return CC_AArch64_Win64_CFGuard_Check; 344 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 345 } 346 347 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 348 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 349 "Alloca should always return a pointer."); 350 351 // Don't handle dynamic allocas. 352 if (!FuncInfo.StaticAllocaMap.count(AI)) 353 return 0; 354 355 DenseMap<const AllocaInst *, int>::iterator SI = 356 FuncInfo.StaticAllocaMap.find(AI); 357 358 if (SI != FuncInfo.StaticAllocaMap.end()) { 359 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 360 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 361 ResultReg) 362 .addFrameIndex(SI->second) 363 .addImm(0) 364 .addImm(0); 365 return ResultReg; 366 } 367 368 return 0; 369 } 370 371 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 372 if (VT > MVT::i64) 373 return 0; 374 375 if (!CI->isZero()) 376 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 377 378 // Create a copy from the zero register to materialize a "0" value. 379 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 380 : &AArch64::GPR32RegClass; 381 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 382 Register ResultReg = createResultReg(RC); 383 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 384 ResultReg).addReg(ZeroReg, getKillRegState(true)); 385 return ResultReg; 386 } 387 388 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 389 // Positive zero (+0.0) has to be materialized with a fmov from the zero 390 // register, because the immediate version of fmov cannot encode zero. 391 if (CFP->isNullValue()) 392 return fastMaterializeFloatZero(CFP); 393 394 if (VT != MVT::f32 && VT != MVT::f64) 395 return 0; 396 397 const APFloat Val = CFP->getValueAPF(); 398 bool Is64Bit = (VT == MVT::f64); 399 // This checks to see if we can use FMOV instructions to materialize 400 // a constant, otherwise we have to materialize via the constant pool. 401 int Imm = 402 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 403 if (Imm != -1) { 404 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 405 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 406 } 407 408 // For the large code model materialize the FP constant in code. 409 if (TM.getCodeModel() == CodeModel::Large) { 410 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 411 const TargetRegisterClass *RC = Is64Bit ? 412 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 413 414 Register TmpReg = createResultReg(RC); 415 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) 416 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 417 418 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 419 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 420 TII.get(TargetOpcode::COPY), ResultReg) 421 .addReg(TmpReg, getKillRegState(true)); 422 423 return ResultReg; 424 } 425 426 // Materialize via constant pool. MachineConstantPool wants an explicit 427 // alignment. 428 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 429 430 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 431 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 432 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 433 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 434 435 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 436 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 437 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 438 .addReg(ADRPReg) 439 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 440 return ResultReg; 441 } 442 443 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 444 // We can't handle thread-local variables quickly yet. 445 if (GV->isThreadLocal()) 446 return 0; 447 448 // MachO still uses GOT for large code-model accesses, but ELF requires 449 // movz/movk sequences, which FastISel doesn't handle yet. 450 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 451 return 0; 452 453 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 454 455 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 456 if (!DestEVT.isSimple()) 457 return 0; 458 459 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 460 unsigned ResultReg; 461 462 if (OpFlags & AArch64II::MO_GOT) { 463 // ADRP + LDRX 464 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 465 ADRPReg) 466 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 467 468 unsigned LdrOpc; 469 if (Subtarget->isTargetILP32()) { 470 ResultReg = createResultReg(&AArch64::GPR32RegClass); 471 LdrOpc = AArch64::LDRWui; 472 } else { 473 ResultReg = createResultReg(&AArch64::GPR64RegClass); 474 LdrOpc = AArch64::LDRXui; 475 } 476 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), 477 ResultReg) 478 .addReg(ADRPReg) 479 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 480 AArch64II::MO_NC | OpFlags); 481 if (!Subtarget->isTargetILP32()) 482 return ResultReg; 483 484 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 485 // so we must extend the result on ILP32. 486 Register Result64 = createResultReg(&AArch64::GPR64RegClass); 487 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 488 TII.get(TargetOpcode::SUBREG_TO_REG)) 489 .addDef(Result64) 490 .addImm(0) 491 .addReg(ResultReg, RegState::Kill) 492 .addImm(AArch64::sub_32); 493 return Result64; 494 } else { 495 // ADRP + ADDX 496 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 497 ADRPReg) 498 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 499 500 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 501 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 502 ResultReg) 503 .addReg(ADRPReg) 504 .addGlobalAddress(GV, 0, 505 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 506 .addImm(0); 507 } 508 return ResultReg; 509 } 510 511 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 512 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 513 514 // Only handle simple types. 515 if (!CEVT.isSimple()) 516 return 0; 517 MVT VT = CEVT.getSimpleVT(); 518 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 519 // 'null' pointers need to have a somewhat special treatment. 520 if (isa<ConstantPointerNull>(C)) { 521 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 522 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 523 } 524 525 if (const auto *CI = dyn_cast<ConstantInt>(C)) 526 return materializeInt(CI, VT); 527 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 528 return materializeFP(CFP, VT); 529 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 530 return materializeGV(GV); 531 532 return 0; 533 } 534 535 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 536 assert(CFP->isNullValue() && 537 "Floating-point constant is not a positive zero."); 538 MVT VT; 539 if (!isTypeLegal(CFP->getType(), VT)) 540 return 0; 541 542 if (VT != MVT::f32 && VT != MVT::f64) 543 return 0; 544 545 bool Is64Bit = (VT == MVT::f64); 546 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 547 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 548 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg); 549 } 550 551 /// Check if the multiply is by a power-of-2 constant. 552 static bool isMulPowOf2(const Value *I) { 553 if (const auto *MI = dyn_cast<MulOperator>(I)) { 554 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 555 if (C->getValue().isPowerOf2()) 556 return true; 557 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 558 if (C->getValue().isPowerOf2()) 559 return true; 560 } 561 return false; 562 } 563 564 // Computes the address to get to an object. 565 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 566 { 567 const User *U = nullptr; 568 unsigned Opcode = Instruction::UserOp1; 569 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 570 // Don't walk into other basic blocks unless the object is an alloca from 571 // another block, otherwise it may not have a virtual register assigned. 572 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 573 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 574 Opcode = I->getOpcode(); 575 U = I; 576 } 577 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 578 Opcode = C->getOpcode(); 579 U = C; 580 } 581 582 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 583 if (Ty->getAddressSpace() > 255) 584 // Fast instruction selection doesn't support the special 585 // address spaces. 586 return false; 587 588 switch (Opcode) { 589 default: 590 break; 591 case Instruction::BitCast: 592 // Look through bitcasts. 593 return computeAddress(U->getOperand(0), Addr, Ty); 594 595 case Instruction::IntToPtr: 596 // Look past no-op inttoptrs. 597 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 598 TLI.getPointerTy(DL)) 599 return computeAddress(U->getOperand(0), Addr, Ty); 600 break; 601 602 case Instruction::PtrToInt: 603 // Look past no-op ptrtoints. 604 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 605 return computeAddress(U->getOperand(0), Addr, Ty); 606 break; 607 608 case Instruction::GetElementPtr: { 609 Address SavedAddr = Addr; 610 uint64_t TmpOffset = Addr.getOffset(); 611 612 // Iterate through the GEP folding the constants into offsets where 613 // we can. 614 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 615 GTI != E; ++GTI) { 616 const Value *Op = GTI.getOperand(); 617 if (StructType *STy = GTI.getStructTypeOrNull()) { 618 const StructLayout *SL = DL.getStructLayout(STy); 619 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 620 TmpOffset += SL->getElementOffset(Idx); 621 } else { 622 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 623 while (true) { 624 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 625 // Constant-offset addressing. 626 TmpOffset += CI->getSExtValue() * S; 627 break; 628 } 629 if (canFoldAddIntoGEP(U, Op)) { 630 // A compatible add with a constant operand. Fold the constant. 631 ConstantInt *CI = 632 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 633 TmpOffset += CI->getSExtValue() * S; 634 // Iterate on the other operand. 635 Op = cast<AddOperator>(Op)->getOperand(0); 636 continue; 637 } 638 // Unsupported 639 goto unsupported_gep; 640 } 641 } 642 } 643 644 // Try to grab the base operand now. 645 Addr.setOffset(TmpOffset); 646 if (computeAddress(U->getOperand(0), Addr, Ty)) 647 return true; 648 649 // We failed, restore everything and try the other options. 650 Addr = SavedAddr; 651 652 unsupported_gep: 653 break; 654 } 655 case Instruction::Alloca: { 656 const AllocaInst *AI = cast<AllocaInst>(Obj); 657 DenseMap<const AllocaInst *, int>::iterator SI = 658 FuncInfo.StaticAllocaMap.find(AI); 659 if (SI != FuncInfo.StaticAllocaMap.end()) { 660 Addr.setKind(Address::FrameIndexBase); 661 Addr.setFI(SI->second); 662 return true; 663 } 664 break; 665 } 666 case Instruction::Add: { 667 // Adds of constants are common and easy enough. 668 const Value *LHS = U->getOperand(0); 669 const Value *RHS = U->getOperand(1); 670 671 if (isa<ConstantInt>(LHS)) 672 std::swap(LHS, RHS); 673 674 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 675 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 676 return computeAddress(LHS, Addr, Ty); 677 } 678 679 Address Backup = Addr; 680 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 681 return true; 682 Addr = Backup; 683 684 break; 685 } 686 case Instruction::Sub: { 687 // Subs of constants are common and easy enough. 688 const Value *LHS = U->getOperand(0); 689 const Value *RHS = U->getOperand(1); 690 691 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 692 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 693 return computeAddress(LHS, Addr, Ty); 694 } 695 break; 696 } 697 case Instruction::Shl: { 698 if (Addr.getOffsetReg()) 699 break; 700 701 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 702 if (!CI) 703 break; 704 705 unsigned Val = CI->getZExtValue(); 706 if (Val < 1 || Val > 3) 707 break; 708 709 uint64_t NumBytes = 0; 710 if (Ty && Ty->isSized()) { 711 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 712 NumBytes = NumBits / 8; 713 if (!isPowerOf2_64(NumBits)) 714 NumBytes = 0; 715 } 716 717 if (NumBytes != (1ULL << Val)) 718 break; 719 720 Addr.setShift(Val); 721 Addr.setExtendType(AArch64_AM::LSL); 722 723 const Value *Src = U->getOperand(0); 724 if (const auto *I = dyn_cast<Instruction>(Src)) { 725 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 726 // Fold the zext or sext when it won't become a noop. 727 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 728 if (!isIntExtFree(ZE) && 729 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 730 Addr.setExtendType(AArch64_AM::UXTW); 731 Src = ZE->getOperand(0); 732 } 733 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 734 if (!isIntExtFree(SE) && 735 SE->getOperand(0)->getType()->isIntegerTy(32)) { 736 Addr.setExtendType(AArch64_AM::SXTW); 737 Src = SE->getOperand(0); 738 } 739 } 740 } 741 } 742 743 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 744 if (AI->getOpcode() == Instruction::And) { 745 const Value *LHS = AI->getOperand(0); 746 const Value *RHS = AI->getOperand(1); 747 748 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 749 if (C->getValue() == 0xffffffff) 750 std::swap(LHS, RHS); 751 752 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 753 if (C->getValue() == 0xffffffff) { 754 Addr.setExtendType(AArch64_AM::UXTW); 755 Register Reg = getRegForValue(LHS); 756 if (!Reg) 757 return false; 758 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 759 Addr.setOffsetReg(Reg); 760 return true; 761 } 762 } 763 764 Register Reg = getRegForValue(Src); 765 if (!Reg) 766 return false; 767 Addr.setOffsetReg(Reg); 768 return true; 769 } 770 case Instruction::Mul: { 771 if (Addr.getOffsetReg()) 772 break; 773 774 if (!isMulPowOf2(U)) 775 break; 776 777 const Value *LHS = U->getOperand(0); 778 const Value *RHS = U->getOperand(1); 779 780 // Canonicalize power-of-2 value to the RHS. 781 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 782 if (C->getValue().isPowerOf2()) 783 std::swap(LHS, RHS); 784 785 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 786 const auto *C = cast<ConstantInt>(RHS); 787 unsigned Val = C->getValue().logBase2(); 788 if (Val < 1 || Val > 3) 789 break; 790 791 uint64_t NumBytes = 0; 792 if (Ty && Ty->isSized()) { 793 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 794 NumBytes = NumBits / 8; 795 if (!isPowerOf2_64(NumBits)) 796 NumBytes = 0; 797 } 798 799 if (NumBytes != (1ULL << Val)) 800 break; 801 802 Addr.setShift(Val); 803 Addr.setExtendType(AArch64_AM::LSL); 804 805 const Value *Src = LHS; 806 if (const auto *I = dyn_cast<Instruction>(Src)) { 807 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 808 // Fold the zext or sext when it won't become a noop. 809 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 810 if (!isIntExtFree(ZE) && 811 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 812 Addr.setExtendType(AArch64_AM::UXTW); 813 Src = ZE->getOperand(0); 814 } 815 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 816 if (!isIntExtFree(SE) && 817 SE->getOperand(0)->getType()->isIntegerTy(32)) { 818 Addr.setExtendType(AArch64_AM::SXTW); 819 Src = SE->getOperand(0); 820 } 821 } 822 } 823 } 824 825 Register Reg = getRegForValue(Src); 826 if (!Reg) 827 return false; 828 Addr.setOffsetReg(Reg); 829 return true; 830 } 831 case Instruction::And: { 832 if (Addr.getOffsetReg()) 833 break; 834 835 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 836 break; 837 838 const Value *LHS = U->getOperand(0); 839 const Value *RHS = U->getOperand(1); 840 841 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 842 if (C->getValue() == 0xffffffff) 843 std::swap(LHS, RHS); 844 845 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 846 if (C->getValue() == 0xffffffff) { 847 Addr.setShift(0); 848 Addr.setExtendType(AArch64_AM::LSL); 849 Addr.setExtendType(AArch64_AM::UXTW); 850 851 Register Reg = getRegForValue(LHS); 852 if (!Reg) 853 return false; 854 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 855 Addr.setOffsetReg(Reg); 856 return true; 857 } 858 break; 859 } 860 case Instruction::SExt: 861 case Instruction::ZExt: { 862 if (!Addr.getReg() || Addr.getOffsetReg()) 863 break; 864 865 const Value *Src = nullptr; 866 // Fold the zext or sext when it won't become a noop. 867 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 868 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 869 Addr.setExtendType(AArch64_AM::UXTW); 870 Src = ZE->getOperand(0); 871 } 872 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 873 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 874 Addr.setExtendType(AArch64_AM::SXTW); 875 Src = SE->getOperand(0); 876 } 877 } 878 879 if (!Src) 880 break; 881 882 Addr.setShift(0); 883 Register Reg = getRegForValue(Src); 884 if (!Reg) 885 return false; 886 Addr.setOffsetReg(Reg); 887 return true; 888 } 889 } // end switch 890 891 if (Addr.isRegBase() && !Addr.getReg()) { 892 Register Reg = getRegForValue(Obj); 893 if (!Reg) 894 return false; 895 Addr.setReg(Reg); 896 return true; 897 } 898 899 if (!Addr.getOffsetReg()) { 900 Register Reg = getRegForValue(Obj); 901 if (!Reg) 902 return false; 903 Addr.setOffsetReg(Reg); 904 return true; 905 } 906 907 return false; 908 } 909 910 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 911 const User *U = nullptr; 912 unsigned Opcode = Instruction::UserOp1; 913 bool InMBB = true; 914 915 if (const auto *I = dyn_cast<Instruction>(V)) { 916 Opcode = I->getOpcode(); 917 U = I; 918 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 919 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 920 Opcode = C->getOpcode(); 921 U = C; 922 } 923 924 switch (Opcode) { 925 default: break; 926 case Instruction::BitCast: 927 // Look past bitcasts if its operand is in the same BB. 928 if (InMBB) 929 return computeCallAddress(U->getOperand(0), Addr); 930 break; 931 case Instruction::IntToPtr: 932 // Look past no-op inttoptrs if its operand is in the same BB. 933 if (InMBB && 934 TLI.getValueType(DL, U->getOperand(0)->getType()) == 935 TLI.getPointerTy(DL)) 936 return computeCallAddress(U->getOperand(0), Addr); 937 break; 938 case Instruction::PtrToInt: 939 // Look past no-op ptrtoints if its operand is in the same BB. 940 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 941 return computeCallAddress(U->getOperand(0), Addr); 942 break; 943 } 944 945 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 946 Addr.setGlobalValue(GV); 947 return true; 948 } 949 950 // If all else fails, try to materialize the value in a register. 951 if (!Addr.getGlobalValue()) { 952 Addr.setReg(getRegForValue(V)); 953 return Addr.getReg() != 0; 954 } 955 956 return false; 957 } 958 959 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 960 EVT evt = TLI.getValueType(DL, Ty, true); 961 962 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 963 return false; 964 965 // Only handle simple types. 966 if (evt == MVT::Other || !evt.isSimple()) 967 return false; 968 VT = evt.getSimpleVT(); 969 970 // This is a legal type, but it's not something we handle in fast-isel. 971 if (VT == MVT::f128) 972 return false; 973 974 // Handle all other legal types, i.e. a register that will directly hold this 975 // value. 976 return TLI.isTypeLegal(VT); 977 } 978 979 /// Determine if the value type is supported by FastISel. 980 /// 981 /// FastISel for AArch64 can handle more value types than are legal. This adds 982 /// simple value type such as i1, i8, and i16. 983 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 984 if (Ty->isVectorTy() && !IsVectorAllowed) 985 return false; 986 987 if (isTypeLegal(Ty, VT)) 988 return true; 989 990 // If this is a type than can be sign or zero-extended to a basic operation 991 // go ahead and accept it now. 992 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 993 return true; 994 995 return false; 996 } 997 998 bool AArch64FastISel::isValueAvailable(const Value *V) const { 999 if (!isa<Instruction>(V)) 1000 return true; 1001 1002 const auto *I = cast<Instruction>(V); 1003 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 1004 } 1005 1006 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1007 if (Subtarget->isTargetILP32()) 1008 return false; 1009 1010 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1011 if (!ScaleFactor) 1012 return false; 1013 1014 bool ImmediateOffsetNeedsLowering = false; 1015 bool RegisterOffsetNeedsLowering = false; 1016 int64_t Offset = Addr.getOffset(); 1017 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1018 ImmediateOffsetNeedsLowering = true; 1019 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1020 !isUInt<12>(Offset / ScaleFactor)) 1021 ImmediateOffsetNeedsLowering = true; 1022 1023 // Cannot encode an offset register and an immediate offset in the same 1024 // instruction. Fold the immediate offset into the load/store instruction and 1025 // emit an additional add to take care of the offset register. 1026 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1027 RegisterOffsetNeedsLowering = true; 1028 1029 // Cannot encode zero register as base. 1030 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1031 RegisterOffsetNeedsLowering = true; 1032 1033 // If this is a stack pointer and the offset needs to be simplified then put 1034 // the alloca address into a register, set the base type back to register and 1035 // continue. This should almost never happen. 1036 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1037 { 1038 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1039 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 1040 ResultReg) 1041 .addFrameIndex(Addr.getFI()) 1042 .addImm(0) 1043 .addImm(0); 1044 Addr.setKind(Address::RegBase); 1045 Addr.setReg(ResultReg); 1046 } 1047 1048 if (RegisterOffsetNeedsLowering) { 1049 unsigned ResultReg = 0; 1050 if (Addr.getReg()) { 1051 if (Addr.getExtendType() == AArch64_AM::SXTW || 1052 Addr.getExtendType() == AArch64_AM::UXTW ) 1053 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1054 Addr.getOffsetReg(), Addr.getExtendType(), 1055 Addr.getShift()); 1056 else 1057 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1058 Addr.getOffsetReg(), AArch64_AM::LSL, 1059 Addr.getShift()); 1060 } else { 1061 if (Addr.getExtendType() == AArch64_AM::UXTW) 1062 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1063 Addr.getShift(), /*IsZExt=*/true); 1064 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1065 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1066 Addr.getShift(), /*IsZExt=*/false); 1067 else 1068 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1069 Addr.getShift()); 1070 } 1071 if (!ResultReg) 1072 return false; 1073 1074 Addr.setReg(ResultReg); 1075 Addr.setOffsetReg(0); 1076 Addr.setShift(0); 1077 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1078 } 1079 1080 // Since the offset is too large for the load/store instruction get the 1081 // reg+offset into a register. 1082 if (ImmediateOffsetNeedsLowering) { 1083 unsigned ResultReg; 1084 if (Addr.getReg()) 1085 // Try to fold the immediate into the add instruction. 1086 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); 1087 else 1088 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1089 1090 if (!ResultReg) 1091 return false; 1092 Addr.setReg(ResultReg); 1093 Addr.setOffset(0); 1094 } 1095 return true; 1096 } 1097 1098 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1099 const MachineInstrBuilder &MIB, 1100 MachineMemOperand::Flags Flags, 1101 unsigned ScaleFactor, 1102 MachineMemOperand *MMO) { 1103 int64_t Offset = Addr.getOffset() / ScaleFactor; 1104 // Frame base works a bit differently. Handle it separately. 1105 if (Addr.isFIBase()) { 1106 int FI = Addr.getFI(); 1107 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1108 // and alignment should be based on the VT. 1109 MMO = FuncInfo.MF->getMachineMemOperand( 1110 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1111 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1112 // Now add the rest of the operands. 1113 MIB.addFrameIndex(FI).addImm(Offset); 1114 } else { 1115 assert(Addr.isRegBase() && "Unexpected address kind."); 1116 const MCInstrDesc &II = MIB->getDesc(); 1117 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1118 Addr.setReg( 1119 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1120 Addr.setOffsetReg( 1121 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1122 if (Addr.getOffsetReg()) { 1123 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1124 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1125 Addr.getExtendType() == AArch64_AM::SXTX; 1126 MIB.addReg(Addr.getReg()); 1127 MIB.addReg(Addr.getOffsetReg()); 1128 MIB.addImm(IsSigned); 1129 MIB.addImm(Addr.getShift() != 0); 1130 } else 1131 MIB.addReg(Addr.getReg()).addImm(Offset); 1132 } 1133 1134 if (MMO) 1135 MIB.addMemOperand(MMO); 1136 } 1137 1138 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1139 const Value *RHS, bool SetFlags, 1140 bool WantResult, bool IsZExt) { 1141 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1142 bool NeedExtend = false; 1143 switch (RetVT.SimpleTy) { 1144 default: 1145 return 0; 1146 case MVT::i1: 1147 NeedExtend = true; 1148 break; 1149 case MVT::i8: 1150 NeedExtend = true; 1151 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1152 break; 1153 case MVT::i16: 1154 NeedExtend = true; 1155 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1156 break; 1157 case MVT::i32: // fall-through 1158 case MVT::i64: 1159 break; 1160 } 1161 MVT SrcVT = RetVT; 1162 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1163 1164 // Canonicalize immediates to the RHS first. 1165 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1166 std::swap(LHS, RHS); 1167 1168 // Canonicalize mul by power of 2 to the RHS. 1169 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1170 if (isMulPowOf2(LHS)) 1171 std::swap(LHS, RHS); 1172 1173 // Canonicalize shift immediate to the RHS. 1174 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1175 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1176 if (isa<ConstantInt>(SI->getOperand(1))) 1177 if (SI->getOpcode() == Instruction::Shl || 1178 SI->getOpcode() == Instruction::LShr || 1179 SI->getOpcode() == Instruction::AShr ) 1180 std::swap(LHS, RHS); 1181 1182 Register LHSReg = getRegForValue(LHS); 1183 if (!LHSReg) 1184 return 0; 1185 1186 if (NeedExtend) 1187 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1188 1189 unsigned ResultReg = 0; 1190 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1191 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1192 if (C->isNegative()) 1193 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags, 1194 WantResult); 1195 else 1196 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, 1197 WantResult); 1198 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1199 if (C->isNullValue()) 1200 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult); 1201 1202 if (ResultReg) 1203 return ResultReg; 1204 1205 // Only extend the RHS within the instruction if there is a valid extend type. 1206 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1207 isValueAvailable(RHS)) { 1208 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1209 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1210 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1211 Register RHSReg = getRegForValue(SI->getOperand(0)); 1212 if (!RHSReg) 1213 return 0; 1214 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 1215 C->getZExtValue(), SetFlags, WantResult); 1216 } 1217 Register RHSReg = getRegForValue(RHS); 1218 if (!RHSReg) 1219 return 0; 1220 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0, 1221 SetFlags, WantResult); 1222 } 1223 1224 // Check if the mul can be folded into the instruction. 1225 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1226 if (isMulPowOf2(RHS)) { 1227 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1228 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1229 1230 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1231 if (C->getValue().isPowerOf2()) 1232 std::swap(MulLHS, MulRHS); 1233 1234 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1235 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1236 Register RHSReg = getRegForValue(MulLHS); 1237 if (!RHSReg) 1238 return 0; 1239 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL, 1240 ShiftVal, SetFlags, WantResult); 1241 if (ResultReg) 1242 return ResultReg; 1243 } 1244 } 1245 1246 // Check if the shift can be folded into the instruction. 1247 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1248 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1249 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1250 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1251 switch (SI->getOpcode()) { 1252 default: break; 1253 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1254 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1255 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1256 } 1257 uint64_t ShiftVal = C->getZExtValue(); 1258 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1259 Register RHSReg = getRegForValue(SI->getOperand(0)); 1260 if (!RHSReg) 1261 return 0; 1262 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, 1263 ShiftVal, SetFlags, WantResult); 1264 if (ResultReg) 1265 return ResultReg; 1266 } 1267 } 1268 } 1269 } 1270 1271 Register RHSReg = getRegForValue(RHS); 1272 if (!RHSReg) 1273 return 0; 1274 1275 if (NeedExtend) 1276 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1277 1278 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); 1279 } 1280 1281 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1282 unsigned RHSReg, bool SetFlags, 1283 bool WantResult) { 1284 assert(LHSReg && RHSReg && "Invalid register number."); 1285 1286 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1287 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1288 return 0; 1289 1290 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1291 return 0; 1292 1293 static const unsigned OpcTable[2][2][2] = { 1294 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1295 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1296 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1297 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1298 }; 1299 bool Is64Bit = RetVT == MVT::i64; 1300 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1301 const TargetRegisterClass *RC = 1302 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1303 unsigned ResultReg; 1304 if (WantResult) 1305 ResultReg = createResultReg(RC); 1306 else 1307 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1308 1309 const MCInstrDesc &II = TII.get(Opc); 1310 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1311 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1312 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1313 .addReg(LHSReg) 1314 .addReg(RHSReg); 1315 return ResultReg; 1316 } 1317 1318 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1319 uint64_t Imm, bool SetFlags, 1320 bool WantResult) { 1321 assert(LHSReg && "Invalid register number."); 1322 1323 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1324 return 0; 1325 1326 unsigned ShiftImm; 1327 if (isUInt<12>(Imm)) 1328 ShiftImm = 0; 1329 else if ((Imm & 0xfff000) == Imm) { 1330 ShiftImm = 12; 1331 Imm >>= 12; 1332 } else 1333 return 0; 1334 1335 static const unsigned OpcTable[2][2][2] = { 1336 { { AArch64::SUBWri, AArch64::SUBXri }, 1337 { AArch64::ADDWri, AArch64::ADDXri } }, 1338 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1339 { AArch64::ADDSWri, AArch64::ADDSXri } } 1340 }; 1341 bool Is64Bit = RetVT == MVT::i64; 1342 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1343 const TargetRegisterClass *RC; 1344 if (SetFlags) 1345 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1346 else 1347 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1348 unsigned ResultReg; 1349 if (WantResult) 1350 ResultReg = createResultReg(RC); 1351 else 1352 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1353 1354 const MCInstrDesc &II = TII.get(Opc); 1355 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1356 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1357 .addReg(LHSReg) 1358 .addImm(Imm) 1359 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1360 return ResultReg; 1361 } 1362 1363 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1364 unsigned RHSReg, 1365 AArch64_AM::ShiftExtendType ShiftType, 1366 uint64_t ShiftImm, bool SetFlags, 1367 bool WantResult) { 1368 assert(LHSReg && RHSReg && "Invalid register number."); 1369 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1370 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1371 1372 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1373 return 0; 1374 1375 // Don't deal with undefined shifts. 1376 if (ShiftImm >= RetVT.getSizeInBits()) 1377 return 0; 1378 1379 static const unsigned OpcTable[2][2][2] = { 1380 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1381 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1382 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1383 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1384 }; 1385 bool Is64Bit = RetVT == MVT::i64; 1386 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1387 const TargetRegisterClass *RC = 1388 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1389 unsigned ResultReg; 1390 if (WantResult) 1391 ResultReg = createResultReg(RC); 1392 else 1393 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1394 1395 const MCInstrDesc &II = TII.get(Opc); 1396 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1397 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1398 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1399 .addReg(LHSReg) 1400 .addReg(RHSReg) 1401 .addImm(getShifterImm(ShiftType, ShiftImm)); 1402 return ResultReg; 1403 } 1404 1405 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1406 unsigned RHSReg, 1407 AArch64_AM::ShiftExtendType ExtType, 1408 uint64_t ShiftImm, bool SetFlags, 1409 bool WantResult) { 1410 assert(LHSReg && RHSReg && "Invalid register number."); 1411 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1412 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1413 1414 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1415 return 0; 1416 1417 if (ShiftImm >= 4) 1418 return 0; 1419 1420 static const unsigned OpcTable[2][2][2] = { 1421 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1422 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1423 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1424 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1425 }; 1426 bool Is64Bit = RetVT == MVT::i64; 1427 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1428 const TargetRegisterClass *RC = nullptr; 1429 if (SetFlags) 1430 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1431 else 1432 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1433 unsigned ResultReg; 1434 if (WantResult) 1435 ResultReg = createResultReg(RC); 1436 else 1437 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1438 1439 const MCInstrDesc &II = TII.get(Opc); 1440 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1441 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1442 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1443 .addReg(LHSReg) 1444 .addReg(RHSReg) 1445 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1446 return ResultReg; 1447 } 1448 1449 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1450 Type *Ty = LHS->getType(); 1451 EVT EVT = TLI.getValueType(DL, Ty, true); 1452 if (!EVT.isSimple()) 1453 return false; 1454 MVT VT = EVT.getSimpleVT(); 1455 1456 switch (VT.SimpleTy) { 1457 default: 1458 return false; 1459 case MVT::i1: 1460 case MVT::i8: 1461 case MVT::i16: 1462 case MVT::i32: 1463 case MVT::i64: 1464 return emitICmp(VT, LHS, RHS, IsZExt); 1465 case MVT::f32: 1466 case MVT::f64: 1467 return emitFCmp(VT, LHS, RHS); 1468 } 1469 } 1470 1471 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1472 bool IsZExt) { 1473 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1474 IsZExt) != 0; 1475 } 1476 1477 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { 1478 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, 1479 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1480 } 1481 1482 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1483 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1484 return false; 1485 1486 // Check to see if the 2nd operand is a constant that we can encode directly 1487 // in the compare. 1488 bool UseImm = false; 1489 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1490 if (CFP->isZero() && !CFP->isNegative()) 1491 UseImm = true; 1492 1493 Register LHSReg = getRegForValue(LHS); 1494 if (!LHSReg) 1495 return false; 1496 1497 if (UseImm) { 1498 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1499 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1500 .addReg(LHSReg); 1501 return true; 1502 } 1503 1504 Register RHSReg = getRegForValue(RHS); 1505 if (!RHSReg) 1506 return false; 1507 1508 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1509 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1510 .addReg(LHSReg) 1511 .addReg(RHSReg); 1512 return true; 1513 } 1514 1515 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1516 bool SetFlags, bool WantResult, bool IsZExt) { 1517 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1518 IsZExt); 1519 } 1520 1521 /// This method is a wrapper to simplify add emission. 1522 /// 1523 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1524 /// that fails, then try to materialize the immediate into a register and use 1525 /// emitAddSub_rr instead. 1526 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { 1527 unsigned ResultReg; 1528 if (Imm < 0) 1529 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm); 1530 else 1531 ResultReg = emitAddSub_ri(true, VT, Op0, Imm); 1532 1533 if (ResultReg) 1534 return ResultReg; 1535 1536 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1537 if (!CReg) 1538 return 0; 1539 1540 ResultReg = emitAddSub_rr(true, VT, Op0, CReg); 1541 return ResultReg; 1542 } 1543 1544 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1545 bool SetFlags, bool WantResult, bool IsZExt) { 1546 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1547 IsZExt); 1548 } 1549 1550 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1551 unsigned RHSReg, bool WantResult) { 1552 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, 1553 /*SetFlags=*/true, WantResult); 1554 } 1555 1556 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1557 unsigned RHSReg, 1558 AArch64_AM::ShiftExtendType ShiftType, 1559 uint64_t ShiftImm, bool WantResult) { 1560 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, 1561 ShiftImm, /*SetFlags=*/true, WantResult); 1562 } 1563 1564 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1565 const Value *LHS, const Value *RHS) { 1566 // Canonicalize immediates to the RHS first. 1567 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1568 std::swap(LHS, RHS); 1569 1570 // Canonicalize mul by power-of-2 to the RHS. 1571 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1572 if (isMulPowOf2(LHS)) 1573 std::swap(LHS, RHS); 1574 1575 // Canonicalize shift immediate to the RHS. 1576 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1577 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1578 if (isa<ConstantInt>(SI->getOperand(1))) 1579 std::swap(LHS, RHS); 1580 1581 Register LHSReg = getRegForValue(LHS); 1582 if (!LHSReg) 1583 return 0; 1584 1585 unsigned ResultReg = 0; 1586 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1587 uint64_t Imm = C->getZExtValue(); 1588 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); 1589 } 1590 if (ResultReg) 1591 return ResultReg; 1592 1593 // Check if the mul can be folded into the instruction. 1594 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1595 if (isMulPowOf2(RHS)) { 1596 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1597 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1598 1599 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1600 if (C->getValue().isPowerOf2()) 1601 std::swap(MulLHS, MulRHS); 1602 1603 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1604 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1605 1606 Register RHSReg = getRegForValue(MulLHS); 1607 if (!RHSReg) 1608 return 0; 1609 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1610 if (ResultReg) 1611 return ResultReg; 1612 } 1613 } 1614 1615 // Check if the shift can be folded into the instruction. 1616 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1617 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1618 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1619 uint64_t ShiftVal = C->getZExtValue(); 1620 Register RHSReg = getRegForValue(SI->getOperand(0)); 1621 if (!RHSReg) 1622 return 0; 1623 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1624 if (ResultReg) 1625 return ResultReg; 1626 } 1627 } 1628 1629 Register RHSReg = getRegForValue(RHS); 1630 if (!RHSReg) 1631 return 0; 1632 1633 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1634 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg); 1635 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1636 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1637 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1638 } 1639 return ResultReg; 1640 } 1641 1642 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1643 unsigned LHSReg, uint64_t Imm) { 1644 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1645 "ISD nodes are not consecutive!"); 1646 static const unsigned OpcTable[3][2] = { 1647 { AArch64::ANDWri, AArch64::ANDXri }, 1648 { AArch64::ORRWri, AArch64::ORRXri }, 1649 { AArch64::EORWri, AArch64::EORXri } 1650 }; 1651 const TargetRegisterClass *RC; 1652 unsigned Opc; 1653 unsigned RegSize; 1654 switch (RetVT.SimpleTy) { 1655 default: 1656 return 0; 1657 case MVT::i1: 1658 case MVT::i8: 1659 case MVT::i16: 1660 case MVT::i32: { 1661 unsigned Idx = ISDOpc - ISD::AND; 1662 Opc = OpcTable[Idx][0]; 1663 RC = &AArch64::GPR32spRegClass; 1664 RegSize = 32; 1665 break; 1666 } 1667 case MVT::i64: 1668 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1669 RC = &AArch64::GPR64spRegClass; 1670 RegSize = 64; 1671 break; 1672 } 1673 1674 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1675 return 0; 1676 1677 Register ResultReg = 1678 fastEmitInst_ri(Opc, RC, LHSReg, 1679 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1680 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1681 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1682 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1683 } 1684 return ResultReg; 1685 } 1686 1687 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1688 unsigned LHSReg, unsigned RHSReg, 1689 uint64_t ShiftImm) { 1690 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1691 "ISD nodes are not consecutive!"); 1692 static const unsigned OpcTable[3][2] = { 1693 { AArch64::ANDWrs, AArch64::ANDXrs }, 1694 { AArch64::ORRWrs, AArch64::ORRXrs }, 1695 { AArch64::EORWrs, AArch64::EORXrs } 1696 }; 1697 1698 // Don't deal with undefined shifts. 1699 if (ShiftImm >= RetVT.getSizeInBits()) 1700 return 0; 1701 1702 const TargetRegisterClass *RC; 1703 unsigned Opc; 1704 switch (RetVT.SimpleTy) { 1705 default: 1706 return 0; 1707 case MVT::i1: 1708 case MVT::i8: 1709 case MVT::i16: 1710 case MVT::i32: 1711 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1712 RC = &AArch64::GPR32RegClass; 1713 break; 1714 case MVT::i64: 1715 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1716 RC = &AArch64::GPR64RegClass; 1717 break; 1718 } 1719 Register ResultReg = 1720 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg, 1721 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1722 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1723 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1724 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1725 } 1726 return ResultReg; 1727 } 1728 1729 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, 1730 uint64_t Imm) { 1731 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm); 1732 } 1733 1734 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1735 bool WantZExt, MachineMemOperand *MMO) { 1736 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1737 return 0; 1738 1739 // Simplify this down to something we can handle. 1740 if (!simplifyAddress(Addr, VT)) 1741 return 0; 1742 1743 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1744 if (!ScaleFactor) 1745 llvm_unreachable("Unexpected value type."); 1746 1747 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1748 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1749 bool UseScaled = true; 1750 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1751 UseScaled = false; 1752 ScaleFactor = 1; 1753 } 1754 1755 static const unsigned GPOpcTable[2][8][4] = { 1756 // Sign-extend. 1757 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1758 AArch64::LDURXi }, 1759 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1760 AArch64::LDURXi }, 1761 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1762 AArch64::LDRXui }, 1763 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1764 AArch64::LDRXui }, 1765 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1766 AArch64::LDRXroX }, 1767 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1768 AArch64::LDRXroX }, 1769 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1770 AArch64::LDRXroW }, 1771 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1772 AArch64::LDRXroW } 1773 }, 1774 // Zero-extend. 1775 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1776 AArch64::LDURXi }, 1777 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1778 AArch64::LDURXi }, 1779 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1780 AArch64::LDRXui }, 1781 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1782 AArch64::LDRXui }, 1783 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1784 AArch64::LDRXroX }, 1785 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1786 AArch64::LDRXroX }, 1787 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1788 AArch64::LDRXroW }, 1789 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1790 AArch64::LDRXroW } 1791 } 1792 }; 1793 1794 static const unsigned FPOpcTable[4][2] = { 1795 { AArch64::LDURSi, AArch64::LDURDi }, 1796 { AArch64::LDRSui, AArch64::LDRDui }, 1797 { AArch64::LDRSroX, AArch64::LDRDroX }, 1798 { AArch64::LDRSroW, AArch64::LDRDroW } 1799 }; 1800 1801 unsigned Opc; 1802 const TargetRegisterClass *RC; 1803 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1804 Addr.getOffsetReg(); 1805 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1806 if (Addr.getExtendType() == AArch64_AM::UXTW || 1807 Addr.getExtendType() == AArch64_AM::SXTW) 1808 Idx++; 1809 1810 bool IsRet64Bit = RetVT == MVT::i64; 1811 switch (VT.SimpleTy) { 1812 default: 1813 llvm_unreachable("Unexpected value type."); 1814 case MVT::i1: // Intentional fall-through. 1815 case MVT::i8: 1816 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1817 RC = (IsRet64Bit && !WantZExt) ? 1818 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1819 break; 1820 case MVT::i16: 1821 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1822 RC = (IsRet64Bit && !WantZExt) ? 1823 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1824 break; 1825 case MVT::i32: 1826 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1827 RC = (IsRet64Bit && !WantZExt) ? 1828 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1829 break; 1830 case MVT::i64: 1831 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1832 RC = &AArch64::GPR64RegClass; 1833 break; 1834 case MVT::f32: 1835 Opc = FPOpcTable[Idx][0]; 1836 RC = &AArch64::FPR32RegClass; 1837 break; 1838 case MVT::f64: 1839 Opc = FPOpcTable[Idx][1]; 1840 RC = &AArch64::FPR64RegClass; 1841 break; 1842 } 1843 1844 // Create the base instruction, then add the operands. 1845 Register ResultReg = createResultReg(RC); 1846 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1847 TII.get(Opc), ResultReg); 1848 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1849 1850 // Loading an i1 requires special handling. 1851 if (VT == MVT::i1) { 1852 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); 1853 assert(ANDReg && "Unexpected AND instruction emission failure."); 1854 ResultReg = ANDReg; 1855 } 1856 1857 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1858 // the 32bit reg to a 64bit reg. 1859 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1860 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 1861 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1862 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1863 .addImm(0) 1864 .addReg(ResultReg, getKillRegState(true)) 1865 .addImm(AArch64::sub_32); 1866 ResultReg = Reg64; 1867 } 1868 return ResultReg; 1869 } 1870 1871 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1872 MVT VT; 1873 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1874 return false; 1875 1876 if (VT.isVector()) 1877 return selectOperator(I, I->getOpcode()); 1878 1879 unsigned ResultReg; 1880 switch (I->getOpcode()) { 1881 default: 1882 llvm_unreachable("Unexpected instruction."); 1883 case Instruction::Add: 1884 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1885 break; 1886 case Instruction::Sub: 1887 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1888 break; 1889 } 1890 if (!ResultReg) 1891 return false; 1892 1893 updateValueMap(I, ResultReg); 1894 return true; 1895 } 1896 1897 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1898 MVT VT; 1899 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1900 return false; 1901 1902 if (VT.isVector()) 1903 return selectOperator(I, I->getOpcode()); 1904 1905 unsigned ResultReg; 1906 switch (I->getOpcode()) { 1907 default: 1908 llvm_unreachable("Unexpected instruction."); 1909 case Instruction::And: 1910 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1911 break; 1912 case Instruction::Or: 1913 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1914 break; 1915 case Instruction::Xor: 1916 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1917 break; 1918 } 1919 if (!ResultReg) 1920 return false; 1921 1922 updateValueMap(I, ResultReg); 1923 return true; 1924 } 1925 1926 bool AArch64FastISel::selectLoad(const Instruction *I) { 1927 MVT VT; 1928 // Verify we have a legal type before going any further. Currently, we handle 1929 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1930 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1931 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1932 cast<LoadInst>(I)->isAtomic()) 1933 return false; 1934 1935 const Value *SV = I->getOperand(0); 1936 if (TLI.supportSwiftError()) { 1937 // Swifterror values can come from either a function parameter with 1938 // swifterror attribute or an alloca with swifterror attribute. 1939 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1940 if (Arg->hasSwiftErrorAttr()) 1941 return false; 1942 } 1943 1944 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1945 if (Alloca->isSwiftError()) 1946 return false; 1947 } 1948 } 1949 1950 // See if we can handle this address. 1951 Address Addr; 1952 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1953 return false; 1954 1955 // Fold the following sign-/zero-extend into the load instruction. 1956 bool WantZExt = true; 1957 MVT RetVT = VT; 1958 const Value *IntExtVal = nullptr; 1959 if (I->hasOneUse()) { 1960 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1961 if (isTypeSupported(ZE->getType(), RetVT)) 1962 IntExtVal = ZE; 1963 else 1964 RetVT = VT; 1965 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1966 if (isTypeSupported(SE->getType(), RetVT)) 1967 IntExtVal = SE; 1968 else 1969 RetVT = VT; 1970 WantZExt = false; 1971 } 1972 } 1973 1974 unsigned ResultReg = 1975 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1976 if (!ResultReg) 1977 return false; 1978 1979 // There are a few different cases we have to handle, because the load or the 1980 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1981 // SelectionDAG. There is also an ordering issue when both instructions are in 1982 // different basic blocks. 1983 // 1.) The load instruction is selected by FastISel, but the integer extend 1984 // not. This usually happens when the integer extend is in a different 1985 // basic block and SelectionDAG took over for that basic block. 1986 // 2.) The load instruction is selected before the integer extend. This only 1987 // happens when the integer extend is in a different basic block. 1988 // 3.) The load instruction is selected by SelectionDAG and the integer extend 1989 // by FastISel. This happens if there are instructions between the load 1990 // and the integer extend that couldn't be selected by FastISel. 1991 if (IntExtVal) { 1992 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 1993 // could select it. Emit a copy to subreg if necessary. FastISel will remove 1994 // it when it selects the integer extend. 1995 Register Reg = lookUpRegForValue(IntExtVal); 1996 auto *MI = MRI.getUniqueVRegDef(Reg); 1997 if (!MI) { 1998 if (RetVT == MVT::i64 && VT <= MVT::i32) { 1999 if (WantZExt) { 2000 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2001 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2002 ResultReg = std::prev(I)->getOperand(0).getReg(); 2003 removeDeadCode(I, std::next(I)); 2004 } else 2005 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2006 AArch64::sub_32); 2007 } 2008 updateValueMap(I, ResultReg); 2009 return true; 2010 } 2011 2012 // The integer extend has already been emitted - delete all the instructions 2013 // that have been emitted by the integer extend lowering code and use the 2014 // result from the load instruction directly. 2015 while (MI) { 2016 Reg = 0; 2017 for (auto &Opnd : MI->uses()) { 2018 if (Opnd.isReg()) { 2019 Reg = Opnd.getReg(); 2020 break; 2021 } 2022 } 2023 MachineBasicBlock::iterator I(MI); 2024 removeDeadCode(I, std::next(I)); 2025 MI = nullptr; 2026 if (Reg) 2027 MI = MRI.getUniqueVRegDef(Reg); 2028 } 2029 updateValueMap(IntExtVal, ResultReg); 2030 return true; 2031 } 2032 2033 updateValueMap(I, ResultReg); 2034 return true; 2035 } 2036 2037 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2038 unsigned AddrReg, 2039 MachineMemOperand *MMO) { 2040 unsigned Opc; 2041 switch (VT.SimpleTy) { 2042 default: return false; 2043 case MVT::i8: Opc = AArch64::STLRB; break; 2044 case MVT::i16: Opc = AArch64::STLRH; break; 2045 case MVT::i32: Opc = AArch64::STLRW; break; 2046 case MVT::i64: Opc = AArch64::STLRX; break; 2047 } 2048 2049 const MCInstrDesc &II = TII.get(Opc); 2050 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2051 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2052 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2053 .addReg(SrcReg) 2054 .addReg(AddrReg) 2055 .addMemOperand(MMO); 2056 return true; 2057 } 2058 2059 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2060 MachineMemOperand *MMO) { 2061 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2062 return false; 2063 2064 // Simplify this down to something we can handle. 2065 if (!simplifyAddress(Addr, VT)) 2066 return false; 2067 2068 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2069 if (!ScaleFactor) 2070 llvm_unreachable("Unexpected value type."); 2071 2072 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2073 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2074 bool UseScaled = true; 2075 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2076 UseScaled = false; 2077 ScaleFactor = 1; 2078 } 2079 2080 static const unsigned OpcTable[4][6] = { 2081 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2082 AArch64::STURSi, AArch64::STURDi }, 2083 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2084 AArch64::STRSui, AArch64::STRDui }, 2085 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2086 AArch64::STRSroX, AArch64::STRDroX }, 2087 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2088 AArch64::STRSroW, AArch64::STRDroW } 2089 }; 2090 2091 unsigned Opc; 2092 bool VTIsi1 = false; 2093 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2094 Addr.getOffsetReg(); 2095 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2096 if (Addr.getExtendType() == AArch64_AM::UXTW || 2097 Addr.getExtendType() == AArch64_AM::SXTW) 2098 Idx++; 2099 2100 switch (VT.SimpleTy) { 2101 default: llvm_unreachable("Unexpected value type."); 2102 case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; 2103 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2104 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2105 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2106 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2107 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2108 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2109 } 2110 2111 // Storing an i1 requires special handling. 2112 if (VTIsi1 && SrcReg != AArch64::WZR) { 2113 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); 2114 assert(ANDReg && "Unexpected AND instruction emission failure."); 2115 SrcReg = ANDReg; 2116 } 2117 // Create the base instruction, then add the operands. 2118 const MCInstrDesc &II = TII.get(Opc); 2119 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2120 MachineInstrBuilder MIB = 2121 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 2122 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2123 2124 return true; 2125 } 2126 2127 bool AArch64FastISel::selectStore(const Instruction *I) { 2128 MVT VT; 2129 const Value *Op0 = I->getOperand(0); 2130 // Verify we have a legal type before going any further. Currently, we handle 2131 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2132 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2133 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2134 return false; 2135 2136 const Value *PtrV = I->getOperand(1); 2137 if (TLI.supportSwiftError()) { 2138 // Swifterror values can come from either a function parameter with 2139 // swifterror attribute or an alloca with swifterror attribute. 2140 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2141 if (Arg->hasSwiftErrorAttr()) 2142 return false; 2143 } 2144 2145 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2146 if (Alloca->isSwiftError()) 2147 return false; 2148 } 2149 } 2150 2151 // Get the value to be stored into a register. Use the zero register directly 2152 // when possible to avoid an unnecessary copy and a wasted register. 2153 unsigned SrcReg = 0; 2154 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2155 if (CI->isZero()) 2156 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2157 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2158 if (CF->isZero() && !CF->isNegative()) { 2159 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2160 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2161 } 2162 } 2163 2164 if (!SrcReg) 2165 SrcReg = getRegForValue(Op0); 2166 2167 if (!SrcReg) 2168 return false; 2169 2170 auto *SI = cast<StoreInst>(I); 2171 2172 // Try to emit a STLR for seq_cst/release. 2173 if (SI->isAtomic()) { 2174 AtomicOrdering Ord = SI->getOrdering(); 2175 // The non-atomic instructions are sufficient for relaxed stores. 2176 if (isReleaseOrStronger(Ord)) { 2177 // The STLR addressing mode only supports a base reg; pass that directly. 2178 Register AddrReg = getRegForValue(PtrV); 2179 return emitStoreRelease(VT, SrcReg, AddrReg, 2180 createMachineMemOperandFor(I)); 2181 } 2182 } 2183 2184 // See if we can handle this address. 2185 Address Addr; 2186 if (!computeAddress(PtrV, Addr, Op0->getType())) 2187 return false; 2188 2189 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2190 return false; 2191 return true; 2192 } 2193 2194 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2195 switch (Pred) { 2196 case CmpInst::FCMP_ONE: 2197 case CmpInst::FCMP_UEQ: 2198 default: 2199 // AL is our "false" for now. The other two need more compares. 2200 return AArch64CC::AL; 2201 case CmpInst::ICMP_EQ: 2202 case CmpInst::FCMP_OEQ: 2203 return AArch64CC::EQ; 2204 case CmpInst::ICMP_SGT: 2205 case CmpInst::FCMP_OGT: 2206 return AArch64CC::GT; 2207 case CmpInst::ICMP_SGE: 2208 case CmpInst::FCMP_OGE: 2209 return AArch64CC::GE; 2210 case CmpInst::ICMP_UGT: 2211 case CmpInst::FCMP_UGT: 2212 return AArch64CC::HI; 2213 case CmpInst::FCMP_OLT: 2214 return AArch64CC::MI; 2215 case CmpInst::ICMP_ULE: 2216 case CmpInst::FCMP_OLE: 2217 return AArch64CC::LS; 2218 case CmpInst::FCMP_ORD: 2219 return AArch64CC::VC; 2220 case CmpInst::FCMP_UNO: 2221 return AArch64CC::VS; 2222 case CmpInst::FCMP_UGE: 2223 return AArch64CC::PL; 2224 case CmpInst::ICMP_SLT: 2225 case CmpInst::FCMP_ULT: 2226 return AArch64CC::LT; 2227 case CmpInst::ICMP_SLE: 2228 case CmpInst::FCMP_ULE: 2229 return AArch64CC::LE; 2230 case CmpInst::FCMP_UNE: 2231 case CmpInst::ICMP_NE: 2232 return AArch64CC::NE; 2233 case CmpInst::ICMP_UGE: 2234 return AArch64CC::HS; 2235 case CmpInst::ICMP_ULT: 2236 return AArch64CC::LO; 2237 } 2238 } 2239 2240 /// Try to emit a combined compare-and-branch instruction. 2241 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2242 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2243 // will not be produced, as they are conditional branch instructions that do 2244 // not set flags. 2245 if (FuncInfo.MF->getFunction().hasFnAttribute( 2246 Attribute::SpeculativeLoadHardening)) 2247 return false; 2248 2249 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2250 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2251 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2252 2253 const Value *LHS = CI->getOperand(0); 2254 const Value *RHS = CI->getOperand(1); 2255 2256 MVT VT; 2257 if (!isTypeSupported(LHS->getType(), VT)) 2258 return false; 2259 2260 unsigned BW = VT.getSizeInBits(); 2261 if (BW > 64) 2262 return false; 2263 2264 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2265 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2266 2267 // Try to take advantage of fallthrough opportunities. 2268 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2269 std::swap(TBB, FBB); 2270 Predicate = CmpInst::getInversePredicate(Predicate); 2271 } 2272 2273 int TestBit = -1; 2274 bool IsCmpNE; 2275 switch (Predicate) { 2276 default: 2277 return false; 2278 case CmpInst::ICMP_EQ: 2279 case CmpInst::ICMP_NE: 2280 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2281 std::swap(LHS, RHS); 2282 2283 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2284 return false; 2285 2286 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2287 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2288 const Value *AndLHS = AI->getOperand(0); 2289 const Value *AndRHS = AI->getOperand(1); 2290 2291 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2292 if (C->getValue().isPowerOf2()) 2293 std::swap(AndLHS, AndRHS); 2294 2295 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2296 if (C->getValue().isPowerOf2()) { 2297 TestBit = C->getValue().logBase2(); 2298 LHS = AndLHS; 2299 } 2300 } 2301 2302 if (VT == MVT::i1) 2303 TestBit = 0; 2304 2305 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2306 break; 2307 case CmpInst::ICMP_SLT: 2308 case CmpInst::ICMP_SGE: 2309 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2310 return false; 2311 2312 TestBit = BW - 1; 2313 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2314 break; 2315 case CmpInst::ICMP_SGT: 2316 case CmpInst::ICMP_SLE: 2317 if (!isa<ConstantInt>(RHS)) 2318 return false; 2319 2320 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2321 return false; 2322 2323 TestBit = BW - 1; 2324 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2325 break; 2326 } // end switch 2327 2328 static const unsigned OpcTable[2][2][2] = { 2329 { {AArch64::CBZW, AArch64::CBZX }, 2330 {AArch64::CBNZW, AArch64::CBNZX} }, 2331 { {AArch64::TBZW, AArch64::TBZX }, 2332 {AArch64::TBNZW, AArch64::TBNZX} } 2333 }; 2334 2335 bool IsBitTest = TestBit != -1; 2336 bool Is64Bit = BW == 64; 2337 if (TestBit < 32 && TestBit >= 0) 2338 Is64Bit = false; 2339 2340 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2341 const MCInstrDesc &II = TII.get(Opc); 2342 2343 Register SrcReg = getRegForValue(LHS); 2344 if (!SrcReg) 2345 return false; 2346 2347 if (BW == 64 && !Is64Bit) 2348 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); 2349 2350 if ((BW < 32) && !IsBitTest) 2351 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2352 2353 // Emit the combined compare and branch instruction. 2354 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2355 MachineInstrBuilder MIB = 2356 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 2357 .addReg(SrcReg); 2358 if (IsBitTest) 2359 MIB.addImm(TestBit); 2360 MIB.addMBB(TBB); 2361 2362 finishCondBranch(BI->getParent(), TBB, FBB); 2363 return true; 2364 } 2365 2366 bool AArch64FastISel::selectBranch(const Instruction *I) { 2367 const BranchInst *BI = cast<BranchInst>(I); 2368 if (BI->isUnconditional()) { 2369 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2370 fastEmitBranch(MSucc, BI->getDebugLoc()); 2371 return true; 2372 } 2373 2374 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2375 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2376 2377 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2378 if (CI->hasOneUse() && isValueAvailable(CI)) { 2379 // Try to optimize or fold the cmp. 2380 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2381 switch (Predicate) { 2382 default: 2383 break; 2384 case CmpInst::FCMP_FALSE: 2385 fastEmitBranch(FBB, DbgLoc); 2386 return true; 2387 case CmpInst::FCMP_TRUE: 2388 fastEmitBranch(TBB, DbgLoc); 2389 return true; 2390 } 2391 2392 // Try to emit a combined compare-and-branch first. 2393 if (emitCompareAndBranch(BI)) 2394 return true; 2395 2396 // Try to take advantage of fallthrough opportunities. 2397 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2398 std::swap(TBB, FBB); 2399 Predicate = CmpInst::getInversePredicate(Predicate); 2400 } 2401 2402 // Emit the cmp. 2403 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2404 return false; 2405 2406 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2407 // instruction. 2408 AArch64CC::CondCode CC = getCompareCC(Predicate); 2409 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2410 switch (Predicate) { 2411 default: 2412 break; 2413 case CmpInst::FCMP_UEQ: 2414 ExtraCC = AArch64CC::EQ; 2415 CC = AArch64CC::VS; 2416 break; 2417 case CmpInst::FCMP_ONE: 2418 ExtraCC = AArch64CC::MI; 2419 CC = AArch64CC::GT; 2420 break; 2421 } 2422 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2423 2424 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2425 if (ExtraCC != AArch64CC::AL) { 2426 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2427 .addImm(ExtraCC) 2428 .addMBB(TBB); 2429 } 2430 2431 // Emit the branch. 2432 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2433 .addImm(CC) 2434 .addMBB(TBB); 2435 2436 finishCondBranch(BI->getParent(), TBB, FBB); 2437 return true; 2438 } 2439 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2440 uint64_t Imm = CI->getZExtValue(); 2441 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2442 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 2443 .addMBB(Target); 2444 2445 // Obtain the branch probability and add the target to the successor list. 2446 if (FuncInfo.BPI) { 2447 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2448 BI->getParent(), Target->getBasicBlock()); 2449 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2450 } else 2451 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2452 return true; 2453 } else { 2454 AArch64CC::CondCode CC = AArch64CC::NE; 2455 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2456 // Fake request the condition, otherwise the intrinsic might be completely 2457 // optimized away. 2458 Register CondReg = getRegForValue(BI->getCondition()); 2459 if (!CondReg) 2460 return false; 2461 2462 // Emit the branch. 2463 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2464 .addImm(CC) 2465 .addMBB(TBB); 2466 2467 finishCondBranch(BI->getParent(), TBB, FBB); 2468 return true; 2469 } 2470 } 2471 2472 Register CondReg = getRegForValue(BI->getCondition()); 2473 if (CondReg == 0) 2474 return false; 2475 2476 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2477 unsigned Opcode = AArch64::TBNZW; 2478 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2479 std::swap(TBB, FBB); 2480 Opcode = AArch64::TBZW; 2481 } 2482 2483 const MCInstrDesc &II = TII.get(Opcode); 2484 Register ConstrainedCondReg 2485 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2487 .addReg(ConstrainedCondReg) 2488 .addImm(0) 2489 .addMBB(TBB); 2490 2491 finishCondBranch(BI->getParent(), TBB, FBB); 2492 return true; 2493 } 2494 2495 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2496 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2497 Register AddrReg = getRegForValue(BI->getOperand(0)); 2498 if (AddrReg == 0) 2499 return false; 2500 2501 // Emit the indirect branch. 2502 const MCInstrDesc &II = TII.get(AArch64::BR); 2503 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2504 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 2505 2506 // Make sure the CFG is up-to-date. 2507 for (auto *Succ : BI->successors()) 2508 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2509 2510 return true; 2511 } 2512 2513 bool AArch64FastISel::selectCmp(const Instruction *I) { 2514 const CmpInst *CI = cast<CmpInst>(I); 2515 2516 // Vectors of i1 are weird: bail out. 2517 if (CI->getType()->isVectorTy()) 2518 return false; 2519 2520 // Try to optimize or fold the cmp. 2521 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2522 unsigned ResultReg = 0; 2523 switch (Predicate) { 2524 default: 2525 break; 2526 case CmpInst::FCMP_FALSE: 2527 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2528 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2529 TII.get(TargetOpcode::COPY), ResultReg) 2530 .addReg(AArch64::WZR, getKillRegState(true)); 2531 break; 2532 case CmpInst::FCMP_TRUE: 2533 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2534 break; 2535 } 2536 2537 if (ResultReg) { 2538 updateValueMap(I, ResultReg); 2539 return true; 2540 } 2541 2542 // Emit the cmp. 2543 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2544 return false; 2545 2546 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2547 2548 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2549 // condition codes are inverted, because they are used by CSINC. 2550 static unsigned CondCodeTable[2][2] = { 2551 { AArch64CC::NE, AArch64CC::VC }, 2552 { AArch64CC::PL, AArch64CC::LE } 2553 }; 2554 unsigned *CondCodes = nullptr; 2555 switch (Predicate) { 2556 default: 2557 break; 2558 case CmpInst::FCMP_UEQ: 2559 CondCodes = &CondCodeTable[0][0]; 2560 break; 2561 case CmpInst::FCMP_ONE: 2562 CondCodes = &CondCodeTable[1][0]; 2563 break; 2564 } 2565 2566 if (CondCodes) { 2567 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2568 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2569 TmpReg1) 2570 .addReg(AArch64::WZR, getKillRegState(true)) 2571 .addReg(AArch64::WZR, getKillRegState(true)) 2572 .addImm(CondCodes[0]); 2573 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2574 ResultReg) 2575 .addReg(TmpReg1, getKillRegState(true)) 2576 .addReg(AArch64::WZR, getKillRegState(true)) 2577 .addImm(CondCodes[1]); 2578 2579 updateValueMap(I, ResultReg); 2580 return true; 2581 } 2582 2583 // Now set a register based on the comparison. 2584 AArch64CC::CondCode CC = getCompareCC(Predicate); 2585 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2586 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2587 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2588 ResultReg) 2589 .addReg(AArch64::WZR, getKillRegState(true)) 2590 .addReg(AArch64::WZR, getKillRegState(true)) 2591 .addImm(invertedCC); 2592 2593 updateValueMap(I, ResultReg); 2594 return true; 2595 } 2596 2597 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2598 /// value. 2599 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2600 if (!SI->getType()->isIntegerTy(1)) 2601 return false; 2602 2603 const Value *Src1Val, *Src2Val; 2604 unsigned Opc = 0; 2605 bool NeedExtraOp = false; 2606 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2607 if (CI->isOne()) { 2608 Src1Val = SI->getCondition(); 2609 Src2Val = SI->getFalseValue(); 2610 Opc = AArch64::ORRWrr; 2611 } else { 2612 assert(CI->isZero()); 2613 Src1Val = SI->getFalseValue(); 2614 Src2Val = SI->getCondition(); 2615 Opc = AArch64::BICWrr; 2616 } 2617 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2618 if (CI->isOne()) { 2619 Src1Val = SI->getCondition(); 2620 Src2Val = SI->getTrueValue(); 2621 Opc = AArch64::ORRWrr; 2622 NeedExtraOp = true; 2623 } else { 2624 assert(CI->isZero()); 2625 Src1Val = SI->getCondition(); 2626 Src2Val = SI->getTrueValue(); 2627 Opc = AArch64::ANDWrr; 2628 } 2629 } 2630 2631 if (!Opc) 2632 return false; 2633 2634 Register Src1Reg = getRegForValue(Src1Val); 2635 if (!Src1Reg) 2636 return false; 2637 2638 Register Src2Reg = getRegForValue(Src2Val); 2639 if (!Src2Reg) 2640 return false; 2641 2642 if (NeedExtraOp) 2643 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1); 2644 2645 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2646 Src2Reg); 2647 updateValueMap(SI, ResultReg); 2648 return true; 2649 } 2650 2651 bool AArch64FastISel::selectSelect(const Instruction *I) { 2652 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2653 MVT VT; 2654 if (!isTypeSupported(I->getType(), VT)) 2655 return false; 2656 2657 unsigned Opc; 2658 const TargetRegisterClass *RC; 2659 switch (VT.SimpleTy) { 2660 default: 2661 return false; 2662 case MVT::i1: 2663 case MVT::i8: 2664 case MVT::i16: 2665 case MVT::i32: 2666 Opc = AArch64::CSELWr; 2667 RC = &AArch64::GPR32RegClass; 2668 break; 2669 case MVT::i64: 2670 Opc = AArch64::CSELXr; 2671 RC = &AArch64::GPR64RegClass; 2672 break; 2673 case MVT::f32: 2674 Opc = AArch64::FCSELSrrr; 2675 RC = &AArch64::FPR32RegClass; 2676 break; 2677 case MVT::f64: 2678 Opc = AArch64::FCSELDrrr; 2679 RC = &AArch64::FPR64RegClass; 2680 break; 2681 } 2682 2683 const SelectInst *SI = cast<SelectInst>(I); 2684 const Value *Cond = SI->getCondition(); 2685 AArch64CC::CondCode CC = AArch64CC::NE; 2686 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2687 2688 if (optimizeSelect(SI)) 2689 return true; 2690 2691 // Try to pickup the flags, so we don't have to emit another compare. 2692 if (foldXALUIntrinsic(CC, I, Cond)) { 2693 // Fake request the condition to force emission of the XALU intrinsic. 2694 Register CondReg = getRegForValue(Cond); 2695 if (!CondReg) 2696 return false; 2697 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2698 isValueAvailable(Cond)) { 2699 const auto *Cmp = cast<CmpInst>(Cond); 2700 // Try to optimize or fold the cmp. 2701 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2702 const Value *FoldSelect = nullptr; 2703 switch (Predicate) { 2704 default: 2705 break; 2706 case CmpInst::FCMP_FALSE: 2707 FoldSelect = SI->getFalseValue(); 2708 break; 2709 case CmpInst::FCMP_TRUE: 2710 FoldSelect = SI->getTrueValue(); 2711 break; 2712 } 2713 2714 if (FoldSelect) { 2715 Register SrcReg = getRegForValue(FoldSelect); 2716 if (!SrcReg) 2717 return false; 2718 2719 updateValueMap(I, SrcReg); 2720 return true; 2721 } 2722 2723 // Emit the cmp. 2724 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2725 return false; 2726 2727 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2728 CC = getCompareCC(Predicate); 2729 switch (Predicate) { 2730 default: 2731 break; 2732 case CmpInst::FCMP_UEQ: 2733 ExtraCC = AArch64CC::EQ; 2734 CC = AArch64CC::VS; 2735 break; 2736 case CmpInst::FCMP_ONE: 2737 ExtraCC = AArch64CC::MI; 2738 CC = AArch64CC::GT; 2739 break; 2740 } 2741 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2742 } else { 2743 Register CondReg = getRegForValue(Cond); 2744 if (!CondReg) 2745 return false; 2746 2747 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2748 CondReg = constrainOperandRegClass(II, CondReg, 1); 2749 2750 // Emit a TST instruction (ANDS wzr, reg, #imm). 2751 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, 2752 AArch64::WZR) 2753 .addReg(CondReg) 2754 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2755 } 2756 2757 Register Src1Reg = getRegForValue(SI->getTrueValue()); 2758 Register Src2Reg = getRegForValue(SI->getFalseValue()); 2759 2760 if (!Src1Reg || !Src2Reg) 2761 return false; 2762 2763 if (ExtraCC != AArch64CC::AL) 2764 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC); 2765 2766 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC); 2767 updateValueMap(I, ResultReg); 2768 return true; 2769 } 2770 2771 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2772 Value *V = I->getOperand(0); 2773 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2774 return false; 2775 2776 Register Op = getRegForValue(V); 2777 if (Op == 0) 2778 return false; 2779 2780 Register ResultReg = createResultReg(&AArch64::FPR64RegClass); 2781 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 2782 ResultReg).addReg(Op); 2783 updateValueMap(I, ResultReg); 2784 return true; 2785 } 2786 2787 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2788 Value *V = I->getOperand(0); 2789 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2790 return false; 2791 2792 Register Op = getRegForValue(V); 2793 if (Op == 0) 2794 return false; 2795 2796 Register ResultReg = createResultReg(&AArch64::FPR32RegClass); 2797 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 2798 ResultReg).addReg(Op); 2799 updateValueMap(I, ResultReg); 2800 return true; 2801 } 2802 2803 // FPToUI and FPToSI 2804 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2805 MVT DestVT; 2806 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2807 return false; 2808 2809 Register SrcReg = getRegForValue(I->getOperand(0)); 2810 if (SrcReg == 0) 2811 return false; 2812 2813 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2814 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2815 return false; 2816 2817 unsigned Opc; 2818 if (SrcVT == MVT::f64) { 2819 if (Signed) 2820 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2821 else 2822 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2823 } else { 2824 if (Signed) 2825 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2826 else 2827 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2828 } 2829 Register ResultReg = createResultReg( 2830 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2831 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2832 .addReg(SrcReg); 2833 updateValueMap(I, ResultReg); 2834 return true; 2835 } 2836 2837 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2838 MVT DestVT; 2839 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2840 return false; 2841 // Let regular ISEL handle FP16 2842 if (DestVT == MVT::f16) 2843 return false; 2844 2845 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2846 "Unexpected value type."); 2847 2848 Register SrcReg = getRegForValue(I->getOperand(0)); 2849 if (!SrcReg) 2850 return false; 2851 2852 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2853 2854 // Handle sign-extension. 2855 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2856 SrcReg = 2857 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2858 if (!SrcReg) 2859 return false; 2860 } 2861 2862 unsigned Opc; 2863 if (SrcVT == MVT::i64) { 2864 if (Signed) 2865 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2866 else 2867 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2868 } else { 2869 if (Signed) 2870 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2871 else 2872 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2873 } 2874 2875 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg); 2876 updateValueMap(I, ResultReg); 2877 return true; 2878 } 2879 2880 bool AArch64FastISel::fastLowerArguments() { 2881 if (!FuncInfo.CanLowerReturn) 2882 return false; 2883 2884 const Function *F = FuncInfo.Fn; 2885 if (F->isVarArg()) 2886 return false; 2887 2888 CallingConv::ID CC = F->getCallingConv(); 2889 if (CC != CallingConv::C && CC != CallingConv::Swift) 2890 return false; 2891 2892 if (Subtarget->hasCustomCallingConv()) 2893 return false; 2894 2895 // Only handle simple cases of up to 8 GPR and FPR each. 2896 unsigned GPRCnt = 0; 2897 unsigned FPRCnt = 0; 2898 for (auto const &Arg : F->args()) { 2899 if (Arg.hasAttribute(Attribute::ByVal) || 2900 Arg.hasAttribute(Attribute::InReg) || 2901 Arg.hasAttribute(Attribute::StructRet) || 2902 Arg.hasAttribute(Attribute::SwiftSelf) || 2903 Arg.hasAttribute(Attribute::SwiftAsync) || 2904 Arg.hasAttribute(Attribute::SwiftError) || 2905 Arg.hasAttribute(Attribute::Nest)) 2906 return false; 2907 2908 Type *ArgTy = Arg.getType(); 2909 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2910 return false; 2911 2912 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2913 if (!ArgVT.isSimple()) 2914 return false; 2915 2916 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2917 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2918 return false; 2919 2920 if (VT.isVector() && 2921 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2922 return false; 2923 2924 if (VT >= MVT::i1 && VT <= MVT::i64) 2925 ++GPRCnt; 2926 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2927 VT.is128BitVector()) 2928 ++FPRCnt; 2929 else 2930 return false; 2931 2932 if (GPRCnt > 8 || FPRCnt > 8) 2933 return false; 2934 } 2935 2936 static const MCPhysReg Registers[6][8] = { 2937 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2938 AArch64::W5, AArch64::W6, AArch64::W7 }, 2939 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2940 AArch64::X5, AArch64::X6, AArch64::X7 }, 2941 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2942 AArch64::H5, AArch64::H6, AArch64::H7 }, 2943 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2944 AArch64::S5, AArch64::S6, AArch64::S7 }, 2945 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2946 AArch64::D5, AArch64::D6, AArch64::D7 }, 2947 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2948 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2949 }; 2950 2951 unsigned GPRIdx = 0; 2952 unsigned FPRIdx = 0; 2953 for (auto const &Arg : F->args()) { 2954 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2955 unsigned SrcReg; 2956 const TargetRegisterClass *RC; 2957 if (VT >= MVT::i1 && VT <= MVT::i32) { 2958 SrcReg = Registers[0][GPRIdx++]; 2959 RC = &AArch64::GPR32RegClass; 2960 VT = MVT::i32; 2961 } else if (VT == MVT::i64) { 2962 SrcReg = Registers[1][GPRIdx++]; 2963 RC = &AArch64::GPR64RegClass; 2964 } else if (VT == MVT::f16) { 2965 SrcReg = Registers[2][FPRIdx++]; 2966 RC = &AArch64::FPR16RegClass; 2967 } else if (VT == MVT::f32) { 2968 SrcReg = Registers[3][FPRIdx++]; 2969 RC = &AArch64::FPR32RegClass; 2970 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2971 SrcReg = Registers[4][FPRIdx++]; 2972 RC = &AArch64::FPR64RegClass; 2973 } else if (VT.is128BitVector()) { 2974 SrcReg = Registers[5][FPRIdx++]; 2975 RC = &AArch64::FPR128RegClass; 2976 } else 2977 llvm_unreachable("Unexpected value type."); 2978 2979 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2980 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2981 // Without this, EmitLiveInCopies may eliminate the livein if its only 2982 // use is a bitcast (which isn't turned into an instruction). 2983 Register ResultReg = createResultReg(RC); 2984 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2985 TII.get(TargetOpcode::COPY), ResultReg) 2986 .addReg(DstReg, getKillRegState(true)); 2987 updateValueMap(&Arg, ResultReg); 2988 } 2989 return true; 2990 } 2991 2992 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 2993 SmallVectorImpl<MVT> &OutVTs, 2994 unsigned &NumBytes) { 2995 CallingConv::ID CC = CLI.CallConv; 2996 SmallVector<CCValAssign, 16> ArgLocs; 2997 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 2998 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 2999 3000 // Get a count of how many bytes are to be pushed on the stack. 3001 NumBytes = CCInfo.getNextStackOffset(); 3002 3003 // Issue CALLSEQ_START 3004 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3005 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 3006 .addImm(NumBytes).addImm(0); 3007 3008 // Process the args. 3009 for (CCValAssign &VA : ArgLocs) { 3010 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3011 MVT ArgVT = OutVTs[VA.getValNo()]; 3012 3013 Register ArgReg = getRegForValue(ArgVal); 3014 if (!ArgReg) 3015 return false; 3016 3017 // Handle arg promotion: SExt, ZExt, AExt. 3018 switch (VA.getLocInfo()) { 3019 case CCValAssign::Full: 3020 break; 3021 case CCValAssign::SExt: { 3022 MVT DestVT = VA.getLocVT(); 3023 MVT SrcVT = ArgVT; 3024 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3025 if (!ArgReg) 3026 return false; 3027 break; 3028 } 3029 case CCValAssign::AExt: 3030 // Intentional fall-through. 3031 case CCValAssign::ZExt: { 3032 MVT DestVT = VA.getLocVT(); 3033 MVT SrcVT = ArgVT; 3034 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3035 if (!ArgReg) 3036 return false; 3037 break; 3038 } 3039 default: 3040 llvm_unreachable("Unknown arg promotion!"); 3041 } 3042 3043 // Now copy/store arg to correct locations. 3044 if (VA.isRegLoc() && !VA.needsCustom()) { 3045 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3046 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3047 CLI.OutRegs.push_back(VA.getLocReg()); 3048 } else if (VA.needsCustom()) { 3049 // FIXME: Handle custom args. 3050 return false; 3051 } else { 3052 assert(VA.isMemLoc() && "Assuming store on stack."); 3053 3054 // Don't emit stores for undef values. 3055 if (isa<UndefValue>(ArgVal)) 3056 continue; 3057 3058 // Need to store on the stack. 3059 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3060 3061 unsigned BEAlign = 0; 3062 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3063 BEAlign = 8 - ArgSize; 3064 3065 Address Addr; 3066 Addr.setKind(Address::RegBase); 3067 Addr.setReg(AArch64::SP); 3068 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3069 3070 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3071 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3072 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3073 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3074 3075 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3076 return false; 3077 } 3078 } 3079 return true; 3080 } 3081 3082 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 3083 unsigned NumBytes) { 3084 CallingConv::ID CC = CLI.CallConv; 3085 3086 // Issue CALLSEQ_END 3087 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3088 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3089 .addImm(NumBytes).addImm(0); 3090 3091 // Now the return value. 3092 if (RetVT != MVT::isVoid) { 3093 SmallVector<CCValAssign, 16> RVLocs; 3094 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3095 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3096 3097 // Only handle a single return value. 3098 if (RVLocs.size() != 1) 3099 return false; 3100 3101 // Copy all of the result registers out of their specified physreg. 3102 MVT CopyVT = RVLocs[0].getValVT(); 3103 3104 // TODO: Handle big-endian results 3105 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3106 return false; 3107 3108 Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3109 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3110 TII.get(TargetOpcode::COPY), ResultReg) 3111 .addReg(RVLocs[0].getLocReg()); 3112 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3113 3114 CLI.ResultReg = ResultReg; 3115 CLI.NumResultRegs = 1; 3116 } 3117 3118 return true; 3119 } 3120 3121 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3122 CallingConv::ID CC = CLI.CallConv; 3123 bool IsTailCall = CLI.IsTailCall; 3124 bool IsVarArg = CLI.IsVarArg; 3125 const Value *Callee = CLI.Callee; 3126 MCSymbol *Symbol = CLI.Symbol; 3127 3128 if (!Callee && !Symbol) 3129 return false; 3130 3131 // Allow SelectionDAG isel to handle calls to functions like setjmp that need 3132 // a bti instruction following the call. 3133 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && 3134 !Subtarget->noBTIAtReturnTwice() && 3135 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 3136 return false; 3137 3138 // Allow SelectionDAG isel to handle tail calls. 3139 if (IsTailCall) 3140 return false; 3141 3142 // FIXME: we could and should support this, but for now correctness at -O0 is 3143 // more important. 3144 if (Subtarget->isTargetILP32()) 3145 return false; 3146 3147 CodeModel::Model CM = TM.getCodeModel(); 3148 // Only support the small-addressing and large code models. 3149 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3150 return false; 3151 3152 // FIXME: Add large code model support for ELF. 3153 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3154 return false; 3155 3156 // Let SDISel handle vararg functions. 3157 if (IsVarArg) 3158 return false; 3159 3160 // FIXME: Only handle *simple* calls for now. 3161 MVT RetVT; 3162 if (CLI.RetTy->isVoidTy()) 3163 RetVT = MVT::isVoid; 3164 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3165 return false; 3166 3167 for (auto Flag : CLI.OutFlags) 3168 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3169 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) 3170 return false; 3171 3172 // Set up the argument vectors. 3173 SmallVector<MVT, 16> OutVTs; 3174 OutVTs.reserve(CLI.OutVals.size()); 3175 3176 for (auto *Val : CLI.OutVals) { 3177 MVT VT; 3178 if (!isTypeLegal(Val->getType(), VT) && 3179 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3180 return false; 3181 3182 // We don't handle vector parameters yet. 3183 if (VT.isVector() || VT.getSizeInBits() > 64) 3184 return false; 3185 3186 OutVTs.push_back(VT); 3187 } 3188 3189 Address Addr; 3190 if (Callee && !computeCallAddress(Callee, Addr)) 3191 return false; 3192 3193 // The weak function target may be zero; in that case we must use indirect 3194 // addressing via a stub on windows as it may be out of range for a 3195 // PC-relative jump. 3196 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3197 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3198 return false; 3199 3200 // Handle the arguments now that we've gotten them. 3201 unsigned NumBytes; 3202 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3203 return false; 3204 3205 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3206 if (RegInfo->isAnyArgRegReserved(*MF)) 3207 RegInfo->emitReservedArgRegCallError(*MF); 3208 3209 // Issue the call. 3210 MachineInstrBuilder MIB; 3211 if (Subtarget->useSmallAddressing()) { 3212 const MCInstrDesc &II = 3213 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3214 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 3215 if (Symbol) 3216 MIB.addSym(Symbol, 0); 3217 else if (Addr.getGlobalValue()) 3218 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3219 else if (Addr.getReg()) { 3220 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3221 MIB.addReg(Reg); 3222 } else 3223 return false; 3224 } else { 3225 unsigned CallReg = 0; 3226 if (Symbol) { 3227 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3228 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 3229 ADRPReg) 3230 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3231 3232 CallReg = createResultReg(&AArch64::GPR64RegClass); 3233 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3234 TII.get(AArch64::LDRXui), CallReg) 3235 .addReg(ADRPReg) 3236 .addSym(Symbol, 3237 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3238 } else if (Addr.getGlobalValue()) 3239 CallReg = materializeGV(Addr.getGlobalValue()); 3240 else if (Addr.getReg()) 3241 CallReg = Addr.getReg(); 3242 3243 if (!CallReg) 3244 return false; 3245 3246 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3247 CallReg = constrainOperandRegClass(II, CallReg, 0); 3248 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 3249 } 3250 3251 // Add implicit physical register uses to the call. 3252 for (auto Reg : CLI.OutRegs) 3253 MIB.addReg(Reg, RegState::Implicit); 3254 3255 // Add a register mask with the call-preserved registers. 3256 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3257 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3258 3259 CLI.Call = MIB; 3260 3261 // Finish off the call including any return values. 3262 return finishCall(CLI, RetVT, NumBytes); 3263 } 3264 3265 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 3266 if (Alignment) 3267 return Len / Alignment <= 4; 3268 else 3269 return Len < 32; 3270 } 3271 3272 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3273 uint64_t Len, unsigned Alignment) { 3274 // Make sure we don't bloat code by inlining very large memcpy's. 3275 if (!isMemCpySmall(Len, Alignment)) 3276 return false; 3277 3278 int64_t UnscaledOffset = 0; 3279 Address OrigDest = Dest; 3280 Address OrigSrc = Src; 3281 3282 while (Len) { 3283 MVT VT; 3284 if (!Alignment || Alignment >= 8) { 3285 if (Len >= 8) 3286 VT = MVT::i64; 3287 else if (Len >= 4) 3288 VT = MVT::i32; 3289 else if (Len >= 2) 3290 VT = MVT::i16; 3291 else { 3292 VT = MVT::i8; 3293 } 3294 } else { 3295 // Bound based on alignment. 3296 if (Len >= 4 && Alignment == 4) 3297 VT = MVT::i32; 3298 else if (Len >= 2 && Alignment == 2) 3299 VT = MVT::i16; 3300 else { 3301 VT = MVT::i8; 3302 } 3303 } 3304 3305 unsigned ResultReg = emitLoad(VT, VT, Src); 3306 if (!ResultReg) 3307 return false; 3308 3309 if (!emitStore(VT, ResultReg, Dest)) 3310 return false; 3311 3312 int64_t Size = VT.getSizeInBits() / 8; 3313 Len -= Size; 3314 UnscaledOffset += Size; 3315 3316 // We need to recompute the unscaled offset for each iteration. 3317 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3318 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3319 } 3320 3321 return true; 3322 } 3323 3324 /// Check if it is possible to fold the condition from the XALU intrinsic 3325 /// into the user. The condition code will only be updated on success. 3326 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3327 const Instruction *I, 3328 const Value *Cond) { 3329 if (!isa<ExtractValueInst>(Cond)) 3330 return false; 3331 3332 const auto *EV = cast<ExtractValueInst>(Cond); 3333 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3334 return false; 3335 3336 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3337 MVT RetVT; 3338 const Function *Callee = II->getCalledFunction(); 3339 Type *RetTy = 3340 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3341 if (!isTypeLegal(RetTy, RetVT)) 3342 return false; 3343 3344 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3345 return false; 3346 3347 const Value *LHS = II->getArgOperand(0); 3348 const Value *RHS = II->getArgOperand(1); 3349 3350 // Canonicalize immediate to the RHS. 3351 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3352 std::swap(LHS, RHS); 3353 3354 // Simplify multiplies. 3355 Intrinsic::ID IID = II->getIntrinsicID(); 3356 switch (IID) { 3357 default: 3358 break; 3359 case Intrinsic::smul_with_overflow: 3360 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3361 if (C->getValue() == 2) 3362 IID = Intrinsic::sadd_with_overflow; 3363 break; 3364 case Intrinsic::umul_with_overflow: 3365 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3366 if (C->getValue() == 2) 3367 IID = Intrinsic::uadd_with_overflow; 3368 break; 3369 } 3370 3371 AArch64CC::CondCode TmpCC; 3372 switch (IID) { 3373 default: 3374 return false; 3375 case Intrinsic::sadd_with_overflow: 3376 case Intrinsic::ssub_with_overflow: 3377 TmpCC = AArch64CC::VS; 3378 break; 3379 case Intrinsic::uadd_with_overflow: 3380 TmpCC = AArch64CC::HS; 3381 break; 3382 case Intrinsic::usub_with_overflow: 3383 TmpCC = AArch64CC::LO; 3384 break; 3385 case Intrinsic::smul_with_overflow: 3386 case Intrinsic::umul_with_overflow: 3387 TmpCC = AArch64CC::NE; 3388 break; 3389 } 3390 3391 // Check if both instructions are in the same basic block. 3392 if (!isValueAvailable(II)) 3393 return false; 3394 3395 // Make sure nothing is in the way 3396 BasicBlock::const_iterator Start(I); 3397 BasicBlock::const_iterator End(II); 3398 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3399 // We only expect extractvalue instructions between the intrinsic and the 3400 // instruction to be selected. 3401 if (!isa<ExtractValueInst>(Itr)) 3402 return false; 3403 3404 // Check that the extractvalue operand comes from the intrinsic. 3405 const auto *EVI = cast<ExtractValueInst>(Itr); 3406 if (EVI->getAggregateOperand() != II) 3407 return false; 3408 } 3409 3410 CC = TmpCC; 3411 return true; 3412 } 3413 3414 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3415 // FIXME: Handle more intrinsics. 3416 switch (II->getIntrinsicID()) { 3417 default: return false; 3418 case Intrinsic::frameaddress: { 3419 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3420 MFI.setFrameAddressIsTaken(true); 3421 3422 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3423 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3424 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3425 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3426 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3427 // Recursively load frame address 3428 // ldr x0, [fp] 3429 // ldr x0, [x0] 3430 // ldr x0, [x0] 3431 // ... 3432 unsigned DestReg; 3433 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3434 while (Depth--) { 3435 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3436 SrcReg, 0); 3437 assert(DestReg && "Unexpected LDR instruction emission failure."); 3438 SrcReg = DestReg; 3439 } 3440 3441 updateValueMap(II, SrcReg); 3442 return true; 3443 } 3444 case Intrinsic::sponentry: { 3445 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3446 3447 // SP = FP + Fixed Object + 16 3448 int FI = MFI.CreateFixedObject(4, 0, false); 3449 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3450 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3451 TII.get(AArch64::ADDXri), ResultReg) 3452 .addFrameIndex(FI) 3453 .addImm(0) 3454 .addImm(0); 3455 3456 updateValueMap(II, ResultReg); 3457 return true; 3458 } 3459 case Intrinsic::memcpy: 3460 case Intrinsic::memmove: { 3461 const auto *MTI = cast<MemTransferInst>(II); 3462 // Don't handle volatile. 3463 if (MTI->isVolatile()) 3464 return false; 3465 3466 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3467 // we would emit dead code because we don't currently handle memmoves. 3468 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3469 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3470 // Small memcpy's are common enough that we want to do them without a call 3471 // if possible. 3472 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3473 unsigned Alignment = MinAlign(MTI->getDestAlignment(), 3474 MTI->getSourceAlignment()); 3475 if (isMemCpySmall(Len, Alignment)) { 3476 Address Dest, Src; 3477 if (!computeAddress(MTI->getRawDest(), Dest) || 3478 !computeAddress(MTI->getRawSource(), Src)) 3479 return false; 3480 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3481 return true; 3482 } 3483 } 3484 3485 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3486 return false; 3487 3488 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3489 // Fast instruction selection doesn't support the special 3490 // address spaces. 3491 return false; 3492 3493 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3494 return lowerCallTo(II, IntrMemName, II->arg_size() - 1); 3495 } 3496 case Intrinsic::memset: { 3497 const MemSetInst *MSI = cast<MemSetInst>(II); 3498 // Don't handle volatile. 3499 if (MSI->isVolatile()) 3500 return false; 3501 3502 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3503 return false; 3504 3505 if (MSI->getDestAddressSpace() > 255) 3506 // Fast instruction selection doesn't support the special 3507 // address spaces. 3508 return false; 3509 3510 return lowerCallTo(II, "memset", II->arg_size() - 1); 3511 } 3512 case Intrinsic::sin: 3513 case Intrinsic::cos: 3514 case Intrinsic::pow: { 3515 MVT RetVT; 3516 if (!isTypeLegal(II->getType(), RetVT)) 3517 return false; 3518 3519 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3520 return false; 3521 3522 static const RTLIB::Libcall LibCallTable[3][2] = { 3523 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3524 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3525 { RTLIB::POW_F32, RTLIB::POW_F64 } 3526 }; 3527 RTLIB::Libcall LC; 3528 bool Is64Bit = RetVT == MVT::f64; 3529 switch (II->getIntrinsicID()) { 3530 default: 3531 llvm_unreachable("Unexpected intrinsic."); 3532 case Intrinsic::sin: 3533 LC = LibCallTable[0][Is64Bit]; 3534 break; 3535 case Intrinsic::cos: 3536 LC = LibCallTable[1][Is64Bit]; 3537 break; 3538 case Intrinsic::pow: 3539 LC = LibCallTable[2][Is64Bit]; 3540 break; 3541 } 3542 3543 ArgListTy Args; 3544 Args.reserve(II->arg_size()); 3545 3546 // Populate the argument list. 3547 for (auto &Arg : II->args()) { 3548 ArgListEntry Entry; 3549 Entry.Val = Arg; 3550 Entry.Ty = Arg->getType(); 3551 Args.push_back(Entry); 3552 } 3553 3554 CallLoweringInfo CLI; 3555 MCContext &Ctx = MF->getContext(); 3556 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3557 TLI.getLibcallName(LC), std::move(Args)); 3558 if (!lowerCallTo(CLI)) 3559 return false; 3560 updateValueMap(II, CLI.ResultReg); 3561 return true; 3562 } 3563 case Intrinsic::fabs: { 3564 MVT VT; 3565 if (!isTypeLegal(II->getType(), VT)) 3566 return false; 3567 3568 unsigned Opc; 3569 switch (VT.SimpleTy) { 3570 default: 3571 return false; 3572 case MVT::f32: 3573 Opc = AArch64::FABSSr; 3574 break; 3575 case MVT::f64: 3576 Opc = AArch64::FABSDr; 3577 break; 3578 } 3579 Register SrcReg = getRegForValue(II->getOperand(0)); 3580 if (!SrcReg) 3581 return false; 3582 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3583 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 3584 .addReg(SrcReg); 3585 updateValueMap(II, ResultReg); 3586 return true; 3587 } 3588 case Intrinsic::trap: 3589 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3590 .addImm(1); 3591 return true; 3592 case Intrinsic::debugtrap: 3593 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3594 .addImm(0xF000); 3595 return true; 3596 3597 case Intrinsic::sqrt: { 3598 Type *RetTy = II->getCalledFunction()->getReturnType(); 3599 3600 MVT VT; 3601 if (!isTypeLegal(RetTy, VT)) 3602 return false; 3603 3604 Register Op0Reg = getRegForValue(II->getOperand(0)); 3605 if (!Op0Reg) 3606 return false; 3607 3608 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg); 3609 if (!ResultReg) 3610 return false; 3611 3612 updateValueMap(II, ResultReg); 3613 return true; 3614 } 3615 case Intrinsic::sadd_with_overflow: 3616 case Intrinsic::uadd_with_overflow: 3617 case Intrinsic::ssub_with_overflow: 3618 case Intrinsic::usub_with_overflow: 3619 case Intrinsic::smul_with_overflow: 3620 case Intrinsic::umul_with_overflow: { 3621 // This implements the basic lowering of the xalu with overflow intrinsics. 3622 const Function *Callee = II->getCalledFunction(); 3623 auto *Ty = cast<StructType>(Callee->getReturnType()); 3624 Type *RetTy = Ty->getTypeAtIndex(0U); 3625 3626 MVT VT; 3627 if (!isTypeLegal(RetTy, VT)) 3628 return false; 3629 3630 if (VT != MVT::i32 && VT != MVT::i64) 3631 return false; 3632 3633 const Value *LHS = II->getArgOperand(0); 3634 const Value *RHS = II->getArgOperand(1); 3635 // Canonicalize immediate to the RHS. 3636 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3637 std::swap(LHS, RHS); 3638 3639 // Simplify multiplies. 3640 Intrinsic::ID IID = II->getIntrinsicID(); 3641 switch (IID) { 3642 default: 3643 break; 3644 case Intrinsic::smul_with_overflow: 3645 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3646 if (C->getValue() == 2) { 3647 IID = Intrinsic::sadd_with_overflow; 3648 RHS = LHS; 3649 } 3650 break; 3651 case Intrinsic::umul_with_overflow: 3652 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3653 if (C->getValue() == 2) { 3654 IID = Intrinsic::uadd_with_overflow; 3655 RHS = LHS; 3656 } 3657 break; 3658 } 3659 3660 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3661 AArch64CC::CondCode CC = AArch64CC::Invalid; 3662 switch (IID) { 3663 default: llvm_unreachable("Unexpected intrinsic!"); 3664 case Intrinsic::sadd_with_overflow: 3665 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3666 CC = AArch64CC::VS; 3667 break; 3668 case Intrinsic::uadd_with_overflow: 3669 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3670 CC = AArch64CC::HS; 3671 break; 3672 case Intrinsic::ssub_with_overflow: 3673 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3674 CC = AArch64CC::VS; 3675 break; 3676 case Intrinsic::usub_with_overflow: 3677 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3678 CC = AArch64CC::LO; 3679 break; 3680 case Intrinsic::smul_with_overflow: { 3681 CC = AArch64CC::NE; 3682 Register LHSReg = getRegForValue(LHS); 3683 if (!LHSReg) 3684 return false; 3685 3686 Register RHSReg = getRegForValue(RHS); 3687 if (!RHSReg) 3688 return false; 3689 3690 if (VT == MVT::i32) { 3691 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg); 3692 Register MulSubReg = 3693 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3694 // cmp xreg, wreg, sxtw 3695 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg, 3696 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, 3697 /*WantResult=*/false); 3698 MulReg = MulSubReg; 3699 } else { 3700 assert(VT == MVT::i64 && "Unexpected value type."); 3701 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3702 // reused in the next instruction. 3703 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3704 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg); 3705 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63, 3706 /*WantResult=*/false); 3707 } 3708 break; 3709 } 3710 case Intrinsic::umul_with_overflow: { 3711 CC = AArch64CC::NE; 3712 Register LHSReg = getRegForValue(LHS); 3713 if (!LHSReg) 3714 return false; 3715 3716 Register RHSReg = getRegForValue(RHS); 3717 if (!RHSReg) 3718 return false; 3719 3720 if (VT == MVT::i32) { 3721 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg); 3722 // tst xreg, #0xffffffff00000000 3723 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3724 TII.get(AArch64::ANDSXri), AArch64::XZR) 3725 .addReg(MulReg) 3726 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64)); 3727 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3728 } else { 3729 assert(VT == MVT::i64 && "Unexpected value type."); 3730 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3731 // reused in the next instruction. 3732 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3733 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg); 3734 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false); 3735 } 3736 break; 3737 } 3738 } 3739 3740 if (MulReg) { 3741 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3742 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3743 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3744 } 3745 3746 if (!ResultReg1) 3747 return false; 3748 3749 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3750 AArch64::WZR, AArch64::WZR, 3751 getInvertedCondCode(CC)); 3752 (void)ResultReg2; 3753 assert((ResultReg1 + 1) == ResultReg2 && 3754 "Nonconsecutive result registers."); 3755 updateValueMap(II, ResultReg1, 2); 3756 return true; 3757 } 3758 } 3759 return false; 3760 } 3761 3762 bool AArch64FastISel::selectRet(const Instruction *I) { 3763 const ReturnInst *Ret = cast<ReturnInst>(I); 3764 const Function &F = *I->getParent()->getParent(); 3765 3766 if (!FuncInfo.CanLowerReturn) 3767 return false; 3768 3769 if (F.isVarArg()) 3770 return false; 3771 3772 if (TLI.supportSwiftError() && 3773 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3774 return false; 3775 3776 if (TLI.supportSplitCSR(FuncInfo.MF)) 3777 return false; 3778 3779 // Build a list of return value registers. 3780 SmallVector<unsigned, 4> RetRegs; 3781 3782 if (Ret->getNumOperands() > 0) { 3783 CallingConv::ID CC = F.getCallingConv(); 3784 SmallVector<ISD::OutputArg, 4> Outs; 3785 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3786 3787 // Analyze operands of the call, assigning locations to each operand. 3788 SmallVector<CCValAssign, 16> ValLocs; 3789 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3790 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3791 : RetCC_AArch64_AAPCS; 3792 CCInfo.AnalyzeReturn(Outs, RetCC); 3793 3794 // Only handle a single return value for now. 3795 if (ValLocs.size() != 1) 3796 return false; 3797 3798 CCValAssign &VA = ValLocs[0]; 3799 const Value *RV = Ret->getOperand(0); 3800 3801 // Don't bother handling odd stuff for now. 3802 if ((VA.getLocInfo() != CCValAssign::Full) && 3803 (VA.getLocInfo() != CCValAssign::BCvt)) 3804 return false; 3805 3806 // Only handle register returns for now. 3807 if (!VA.isRegLoc()) 3808 return false; 3809 3810 Register Reg = getRegForValue(RV); 3811 if (Reg == 0) 3812 return false; 3813 3814 unsigned SrcReg = Reg + VA.getValNo(); 3815 Register DestReg = VA.getLocReg(); 3816 // Avoid a cross-class copy. This is very unlikely. 3817 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3818 return false; 3819 3820 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3821 if (!RVEVT.isSimple()) 3822 return false; 3823 3824 // Vectors (of > 1 lane) in big endian need tricky handling. 3825 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && 3826 !Subtarget->isLittleEndian()) 3827 return false; 3828 3829 MVT RVVT = RVEVT.getSimpleVT(); 3830 if (RVVT == MVT::f128) 3831 return false; 3832 3833 MVT DestVT = VA.getValVT(); 3834 // Special handling for extended integers. 3835 if (RVVT != DestVT) { 3836 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3837 return false; 3838 3839 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3840 return false; 3841 3842 bool IsZExt = Outs[0].Flags.isZExt(); 3843 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3844 if (SrcReg == 0) 3845 return false; 3846 } 3847 3848 // "Callee" (i.e. value producer) zero extends pointers at function 3849 // boundary. 3850 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3851 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff); 3852 3853 // Make the copy. 3854 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3855 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3856 3857 // Add register to return instruction. 3858 RetRegs.push_back(VA.getLocReg()); 3859 } 3860 3861 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3862 TII.get(AArch64::RET_ReallyLR)); 3863 for (unsigned RetReg : RetRegs) 3864 MIB.addReg(RetReg, RegState::Implicit); 3865 return true; 3866 } 3867 3868 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3869 Type *DestTy = I->getType(); 3870 Value *Op = I->getOperand(0); 3871 Type *SrcTy = Op->getType(); 3872 3873 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3874 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3875 if (!SrcEVT.isSimple()) 3876 return false; 3877 if (!DestEVT.isSimple()) 3878 return false; 3879 3880 MVT SrcVT = SrcEVT.getSimpleVT(); 3881 MVT DestVT = DestEVT.getSimpleVT(); 3882 3883 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3884 SrcVT != MVT::i8) 3885 return false; 3886 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3887 DestVT != MVT::i1) 3888 return false; 3889 3890 Register SrcReg = getRegForValue(Op); 3891 if (!SrcReg) 3892 return false; 3893 3894 // If we're truncating from i64 to a smaller non-legal type then generate an 3895 // AND. Otherwise, we know the high bits are undefined and a truncate only 3896 // generate a COPY. We cannot mark the source register also as result 3897 // register, because this can incorrectly transfer the kill flag onto the 3898 // source register. 3899 unsigned ResultReg; 3900 if (SrcVT == MVT::i64) { 3901 uint64_t Mask = 0; 3902 switch (DestVT.SimpleTy) { 3903 default: 3904 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3905 return false; 3906 case MVT::i1: 3907 Mask = 0x1; 3908 break; 3909 case MVT::i8: 3910 Mask = 0xff; 3911 break; 3912 case MVT::i16: 3913 Mask = 0xffff; 3914 break; 3915 } 3916 // Issue an extract_subreg to get the lower 32-bits. 3917 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, 3918 AArch64::sub_32); 3919 // Create the AND instruction which performs the actual truncation. 3920 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask); 3921 assert(ResultReg && "Unexpected AND instruction emission failure."); 3922 } else { 3923 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3924 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3925 TII.get(TargetOpcode::COPY), ResultReg) 3926 .addReg(SrcReg); 3927 } 3928 3929 updateValueMap(I, ResultReg); 3930 return true; 3931 } 3932 3933 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 3934 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 3935 DestVT == MVT::i64) && 3936 "Unexpected value type."); 3937 // Handle i8 and i16 as i32. 3938 if (DestVT == MVT::i8 || DestVT == MVT::i16) 3939 DestVT = MVT::i32; 3940 3941 if (IsZExt) { 3942 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1); 3943 assert(ResultReg && "Unexpected AND instruction emission failure."); 3944 if (DestVT == MVT::i64) { 3945 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 3946 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 3947 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3948 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3949 TII.get(AArch64::SUBREG_TO_REG), Reg64) 3950 .addImm(0) 3951 .addReg(ResultReg) 3952 .addImm(AArch64::sub_32); 3953 ResultReg = Reg64; 3954 } 3955 return ResultReg; 3956 } else { 3957 if (DestVT == MVT::i64) { 3958 // FIXME: We're SExt i1 to i64. 3959 return 0; 3960 } 3961 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 3962 0, 0); 3963 } 3964 } 3965 3966 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 3967 unsigned Opc, ZReg; 3968 switch (RetVT.SimpleTy) { 3969 default: return 0; 3970 case MVT::i8: 3971 case MVT::i16: 3972 case MVT::i32: 3973 RetVT = MVT::i32; 3974 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 3975 case MVT::i64: 3976 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 3977 } 3978 3979 const TargetRegisterClass *RC = 3980 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3981 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg); 3982 } 3983 3984 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 3985 if (RetVT != MVT::i64) 3986 return 0; 3987 3988 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 3989 Op0, Op1, AArch64::XZR); 3990 } 3991 3992 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 3993 if (RetVT != MVT::i64) 3994 return 0; 3995 3996 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 3997 Op0, Op1, AArch64::XZR); 3998 } 3999 4000 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, 4001 unsigned Op1Reg) { 4002 unsigned Opc = 0; 4003 bool NeedTrunc = false; 4004 uint64_t Mask = 0; 4005 switch (RetVT.SimpleTy) { 4006 default: return 0; 4007 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4008 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4009 case MVT::i32: Opc = AArch64::LSLVWr; break; 4010 case MVT::i64: Opc = AArch64::LSLVXr; break; 4011 } 4012 4013 const TargetRegisterClass *RC = 4014 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4015 if (NeedTrunc) 4016 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4017 4018 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4019 if (NeedTrunc) 4020 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4021 return ResultReg; 4022 } 4023 4024 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4025 uint64_t Shift, bool IsZExt) { 4026 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4027 "Unexpected source/return type pair."); 4028 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4029 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4030 "Unexpected source value type."); 4031 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4032 RetVT == MVT::i64) && "Unexpected return value type."); 4033 4034 bool Is64Bit = (RetVT == MVT::i64); 4035 unsigned RegSize = Is64Bit ? 64 : 32; 4036 unsigned DstBits = RetVT.getSizeInBits(); 4037 unsigned SrcBits = SrcVT.getSizeInBits(); 4038 const TargetRegisterClass *RC = 4039 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4040 4041 // Just emit a copy for "zero" shifts. 4042 if (Shift == 0) { 4043 if (RetVT == SrcVT) { 4044 Register ResultReg = createResultReg(RC); 4045 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4046 TII.get(TargetOpcode::COPY), ResultReg) 4047 .addReg(Op0); 4048 return ResultReg; 4049 } else 4050 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4051 } 4052 4053 // Don't deal with undefined shifts. 4054 if (Shift >= DstBits) 4055 return 0; 4056 4057 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4058 // {S|U}BFM Wd, Wn, #r, #s 4059 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4060 4061 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4062 // %2 = shl i16 %1, 4 4063 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4064 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4065 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4066 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4067 4068 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4069 // %2 = shl i16 %1, 8 4070 // Wd<32+7-24,32-24> = Wn<7:0> 4071 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4072 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4073 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4074 4075 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4076 // %2 = shl i16 %1, 12 4077 // Wd<32+3-20,32-20> = Wn<3:0> 4078 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4079 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4080 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4081 4082 unsigned ImmR = RegSize - Shift; 4083 // Limit the width to the length of the source type. 4084 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4085 static const unsigned OpcTable[2][2] = { 4086 {AArch64::SBFMWri, AArch64::SBFMXri}, 4087 {AArch64::UBFMWri, AArch64::UBFMXri} 4088 }; 4089 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4090 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4091 Register TmpReg = MRI.createVirtualRegister(RC); 4092 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4093 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4094 .addImm(0) 4095 .addReg(Op0) 4096 .addImm(AArch64::sub_32); 4097 Op0 = TmpReg; 4098 } 4099 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4100 } 4101 4102 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, 4103 unsigned Op1Reg) { 4104 unsigned Opc = 0; 4105 bool NeedTrunc = false; 4106 uint64_t Mask = 0; 4107 switch (RetVT.SimpleTy) { 4108 default: return 0; 4109 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4110 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4111 case MVT::i32: Opc = AArch64::LSRVWr; break; 4112 case MVT::i64: Opc = AArch64::LSRVXr; break; 4113 } 4114 4115 const TargetRegisterClass *RC = 4116 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4117 if (NeedTrunc) { 4118 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask); 4119 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4120 } 4121 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4122 if (NeedTrunc) 4123 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4124 return ResultReg; 4125 } 4126 4127 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4128 uint64_t Shift, bool IsZExt) { 4129 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4130 "Unexpected source/return type pair."); 4131 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4132 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4133 "Unexpected source value type."); 4134 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4135 RetVT == MVT::i64) && "Unexpected return value type."); 4136 4137 bool Is64Bit = (RetVT == MVT::i64); 4138 unsigned RegSize = Is64Bit ? 64 : 32; 4139 unsigned DstBits = RetVT.getSizeInBits(); 4140 unsigned SrcBits = SrcVT.getSizeInBits(); 4141 const TargetRegisterClass *RC = 4142 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4143 4144 // Just emit a copy for "zero" shifts. 4145 if (Shift == 0) { 4146 if (RetVT == SrcVT) { 4147 Register ResultReg = createResultReg(RC); 4148 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4149 TII.get(TargetOpcode::COPY), ResultReg) 4150 .addReg(Op0); 4151 return ResultReg; 4152 } else 4153 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4154 } 4155 4156 // Don't deal with undefined shifts. 4157 if (Shift >= DstBits) 4158 return 0; 4159 4160 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4161 // {S|U}BFM Wd, Wn, #r, #s 4162 // Wd<s-r:0> = Wn<s:r> when r <= s 4163 4164 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4165 // %2 = lshr i16 %1, 4 4166 // Wd<7-4:0> = Wn<7:4> 4167 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4168 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4169 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4170 4171 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4172 // %2 = lshr i16 %1, 8 4173 // Wd<7-7,0> = Wn<7:7> 4174 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4175 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4176 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4177 4178 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4179 // %2 = lshr i16 %1, 12 4180 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4181 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4182 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4183 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4184 4185 if (Shift >= SrcBits && IsZExt) 4186 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4187 4188 // It is not possible to fold a sign-extend into the LShr instruction. In this 4189 // case emit a sign-extend. 4190 if (!IsZExt) { 4191 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4192 if (!Op0) 4193 return 0; 4194 SrcVT = RetVT; 4195 SrcBits = SrcVT.getSizeInBits(); 4196 IsZExt = true; 4197 } 4198 4199 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4200 unsigned ImmS = SrcBits - 1; 4201 static const unsigned OpcTable[2][2] = { 4202 {AArch64::SBFMWri, AArch64::SBFMXri}, 4203 {AArch64::UBFMWri, AArch64::UBFMXri} 4204 }; 4205 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4206 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4207 Register TmpReg = MRI.createVirtualRegister(RC); 4208 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4209 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4210 .addImm(0) 4211 .addReg(Op0) 4212 .addImm(AArch64::sub_32); 4213 Op0 = TmpReg; 4214 } 4215 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4216 } 4217 4218 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, 4219 unsigned Op1Reg) { 4220 unsigned Opc = 0; 4221 bool NeedTrunc = false; 4222 uint64_t Mask = 0; 4223 switch (RetVT.SimpleTy) { 4224 default: return 0; 4225 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4226 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4227 case MVT::i32: Opc = AArch64::ASRVWr; break; 4228 case MVT::i64: Opc = AArch64::ASRVXr; break; 4229 } 4230 4231 const TargetRegisterClass *RC = 4232 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4233 if (NeedTrunc) { 4234 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4235 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4236 } 4237 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4238 if (NeedTrunc) 4239 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4240 return ResultReg; 4241 } 4242 4243 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4244 uint64_t Shift, bool IsZExt) { 4245 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4246 "Unexpected source/return type pair."); 4247 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4248 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4249 "Unexpected source value type."); 4250 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4251 RetVT == MVT::i64) && "Unexpected return value type."); 4252 4253 bool Is64Bit = (RetVT == MVT::i64); 4254 unsigned RegSize = Is64Bit ? 64 : 32; 4255 unsigned DstBits = RetVT.getSizeInBits(); 4256 unsigned SrcBits = SrcVT.getSizeInBits(); 4257 const TargetRegisterClass *RC = 4258 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4259 4260 // Just emit a copy for "zero" shifts. 4261 if (Shift == 0) { 4262 if (RetVT == SrcVT) { 4263 Register ResultReg = createResultReg(RC); 4264 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4265 TII.get(TargetOpcode::COPY), ResultReg) 4266 .addReg(Op0); 4267 return ResultReg; 4268 } else 4269 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4270 } 4271 4272 // Don't deal with undefined shifts. 4273 if (Shift >= DstBits) 4274 return 0; 4275 4276 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4277 // {S|U}BFM Wd, Wn, #r, #s 4278 // Wd<s-r:0> = Wn<s:r> when r <= s 4279 4280 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4281 // %2 = ashr i16 %1, 4 4282 // Wd<7-4:0> = Wn<7:4> 4283 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4284 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4285 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4286 4287 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4288 // %2 = ashr i16 %1, 8 4289 // Wd<7-7,0> = Wn<7:7> 4290 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4291 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4292 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4293 4294 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4295 // %2 = ashr i16 %1, 12 4296 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4297 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4298 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4299 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4300 4301 if (Shift >= SrcBits && IsZExt) 4302 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4303 4304 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4305 unsigned ImmS = SrcBits - 1; 4306 static const unsigned OpcTable[2][2] = { 4307 {AArch64::SBFMWri, AArch64::SBFMXri}, 4308 {AArch64::UBFMWri, AArch64::UBFMXri} 4309 }; 4310 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4311 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4312 Register TmpReg = MRI.createVirtualRegister(RC); 4313 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4314 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4315 .addImm(0) 4316 .addReg(Op0) 4317 .addImm(AArch64::sub_32); 4318 Op0 = TmpReg; 4319 } 4320 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4321 } 4322 4323 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4324 bool IsZExt) { 4325 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4326 4327 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4328 // DestVT are odd things, so test to make sure that they are both types we can 4329 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4330 // bail out to SelectionDAG. 4331 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4332 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4333 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4334 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4335 return 0; 4336 4337 unsigned Opc; 4338 unsigned Imm = 0; 4339 4340 switch (SrcVT.SimpleTy) { 4341 default: 4342 return 0; 4343 case MVT::i1: 4344 return emiti1Ext(SrcReg, DestVT, IsZExt); 4345 case MVT::i8: 4346 if (DestVT == MVT::i64) 4347 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4348 else 4349 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4350 Imm = 7; 4351 break; 4352 case MVT::i16: 4353 if (DestVT == MVT::i64) 4354 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4355 else 4356 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4357 Imm = 15; 4358 break; 4359 case MVT::i32: 4360 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4361 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4362 Imm = 31; 4363 break; 4364 } 4365 4366 // Handle i8 and i16 as i32. 4367 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4368 DestVT = MVT::i32; 4369 else if (DestVT == MVT::i64) { 4370 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4371 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4372 TII.get(AArch64::SUBREG_TO_REG), Src64) 4373 .addImm(0) 4374 .addReg(SrcReg) 4375 .addImm(AArch64::sub_32); 4376 SrcReg = Src64; 4377 } 4378 4379 const TargetRegisterClass *RC = 4380 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4381 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm); 4382 } 4383 4384 static bool isZExtLoad(const MachineInstr *LI) { 4385 switch (LI->getOpcode()) { 4386 default: 4387 return false; 4388 case AArch64::LDURBBi: 4389 case AArch64::LDURHHi: 4390 case AArch64::LDURWi: 4391 case AArch64::LDRBBui: 4392 case AArch64::LDRHHui: 4393 case AArch64::LDRWui: 4394 case AArch64::LDRBBroX: 4395 case AArch64::LDRHHroX: 4396 case AArch64::LDRWroX: 4397 case AArch64::LDRBBroW: 4398 case AArch64::LDRHHroW: 4399 case AArch64::LDRWroW: 4400 return true; 4401 } 4402 } 4403 4404 static bool isSExtLoad(const MachineInstr *LI) { 4405 switch (LI->getOpcode()) { 4406 default: 4407 return false; 4408 case AArch64::LDURSBWi: 4409 case AArch64::LDURSHWi: 4410 case AArch64::LDURSBXi: 4411 case AArch64::LDURSHXi: 4412 case AArch64::LDURSWi: 4413 case AArch64::LDRSBWui: 4414 case AArch64::LDRSHWui: 4415 case AArch64::LDRSBXui: 4416 case AArch64::LDRSHXui: 4417 case AArch64::LDRSWui: 4418 case AArch64::LDRSBWroX: 4419 case AArch64::LDRSHWroX: 4420 case AArch64::LDRSBXroX: 4421 case AArch64::LDRSHXroX: 4422 case AArch64::LDRSWroX: 4423 case AArch64::LDRSBWroW: 4424 case AArch64::LDRSHWroW: 4425 case AArch64::LDRSBXroW: 4426 case AArch64::LDRSHXroW: 4427 case AArch64::LDRSWroW: 4428 return true; 4429 } 4430 } 4431 4432 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4433 MVT SrcVT) { 4434 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4435 if (!LI || !LI->hasOneUse()) 4436 return false; 4437 4438 // Check if the load instruction has already been selected. 4439 Register Reg = lookUpRegForValue(LI); 4440 if (!Reg) 4441 return false; 4442 4443 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4444 if (!MI) 4445 return false; 4446 4447 // Check if the correct load instruction has been emitted - SelectionDAG might 4448 // have emitted a zero-extending load, but we need a sign-extending load. 4449 bool IsZExt = isa<ZExtInst>(I); 4450 const auto *LoadMI = MI; 4451 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4452 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4453 Register LoadReg = MI->getOperand(1).getReg(); 4454 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4455 assert(LoadMI && "Expected valid instruction"); 4456 } 4457 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4458 return false; 4459 4460 // Nothing to be done. 4461 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4462 updateValueMap(I, Reg); 4463 return true; 4464 } 4465 4466 if (IsZExt) { 4467 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 4468 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4469 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4470 .addImm(0) 4471 .addReg(Reg, getKillRegState(true)) 4472 .addImm(AArch64::sub_32); 4473 Reg = Reg64; 4474 } else { 4475 assert((MI->getOpcode() == TargetOpcode::COPY && 4476 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4477 "Expected copy instruction"); 4478 Reg = MI->getOperand(1).getReg(); 4479 MachineBasicBlock::iterator I(MI); 4480 removeDeadCode(I, std::next(I)); 4481 } 4482 updateValueMap(I, Reg); 4483 return true; 4484 } 4485 4486 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4487 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4488 "Unexpected integer extend instruction."); 4489 MVT RetVT; 4490 MVT SrcVT; 4491 if (!isTypeSupported(I->getType(), RetVT)) 4492 return false; 4493 4494 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4495 return false; 4496 4497 // Try to optimize already sign-/zero-extended values from load instructions. 4498 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4499 return true; 4500 4501 Register SrcReg = getRegForValue(I->getOperand(0)); 4502 if (!SrcReg) 4503 return false; 4504 4505 // Try to optimize already sign-/zero-extended values from function arguments. 4506 bool IsZExt = isa<ZExtInst>(I); 4507 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4508 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4509 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4510 Register ResultReg = createResultReg(&AArch64::GPR64RegClass); 4511 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4512 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4513 .addImm(0) 4514 .addReg(SrcReg) 4515 .addImm(AArch64::sub_32); 4516 SrcReg = ResultReg; 4517 } 4518 4519 updateValueMap(I, SrcReg); 4520 return true; 4521 } 4522 } 4523 4524 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4525 if (!ResultReg) 4526 return false; 4527 4528 updateValueMap(I, ResultReg); 4529 return true; 4530 } 4531 4532 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4533 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4534 if (!DestEVT.isSimple()) 4535 return false; 4536 4537 MVT DestVT = DestEVT.getSimpleVT(); 4538 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4539 return false; 4540 4541 unsigned DivOpc; 4542 bool Is64bit = (DestVT == MVT::i64); 4543 switch (ISDOpcode) { 4544 default: 4545 return false; 4546 case ISD::SREM: 4547 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4548 break; 4549 case ISD::UREM: 4550 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4551 break; 4552 } 4553 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4554 Register Src0Reg = getRegForValue(I->getOperand(0)); 4555 if (!Src0Reg) 4556 return false; 4557 4558 Register Src1Reg = getRegForValue(I->getOperand(1)); 4559 if (!Src1Reg) 4560 return false; 4561 4562 const TargetRegisterClass *RC = 4563 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4564 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg); 4565 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4566 // The remainder is computed as numerator - (quotient * denominator) using the 4567 // MSUB instruction. 4568 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg); 4569 updateValueMap(I, ResultReg); 4570 return true; 4571 } 4572 4573 bool AArch64FastISel::selectMul(const Instruction *I) { 4574 MVT VT; 4575 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4576 return false; 4577 4578 if (VT.isVector()) 4579 return selectBinaryOp(I, ISD::MUL); 4580 4581 const Value *Src0 = I->getOperand(0); 4582 const Value *Src1 = I->getOperand(1); 4583 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4584 if (C->getValue().isPowerOf2()) 4585 std::swap(Src0, Src1); 4586 4587 // Try to simplify to a shift instruction. 4588 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4589 if (C->getValue().isPowerOf2()) { 4590 uint64_t ShiftVal = C->getValue().logBase2(); 4591 MVT SrcVT = VT; 4592 bool IsZExt = true; 4593 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4594 if (!isIntExtFree(ZExt)) { 4595 MVT VT; 4596 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4597 SrcVT = VT; 4598 IsZExt = true; 4599 Src0 = ZExt->getOperand(0); 4600 } 4601 } 4602 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4603 if (!isIntExtFree(SExt)) { 4604 MVT VT; 4605 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4606 SrcVT = VT; 4607 IsZExt = false; 4608 Src0 = SExt->getOperand(0); 4609 } 4610 } 4611 } 4612 4613 Register Src0Reg = getRegForValue(Src0); 4614 if (!Src0Reg) 4615 return false; 4616 4617 unsigned ResultReg = 4618 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt); 4619 4620 if (ResultReg) { 4621 updateValueMap(I, ResultReg); 4622 return true; 4623 } 4624 } 4625 4626 Register Src0Reg = getRegForValue(I->getOperand(0)); 4627 if (!Src0Reg) 4628 return false; 4629 4630 Register Src1Reg = getRegForValue(I->getOperand(1)); 4631 if (!Src1Reg) 4632 return false; 4633 4634 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg); 4635 4636 if (!ResultReg) 4637 return false; 4638 4639 updateValueMap(I, ResultReg); 4640 return true; 4641 } 4642 4643 bool AArch64FastISel::selectShift(const Instruction *I) { 4644 MVT RetVT; 4645 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4646 return false; 4647 4648 if (RetVT.isVector()) 4649 return selectOperator(I, I->getOpcode()); 4650 4651 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4652 unsigned ResultReg = 0; 4653 uint64_t ShiftVal = C->getZExtValue(); 4654 MVT SrcVT = RetVT; 4655 bool IsZExt = I->getOpcode() != Instruction::AShr; 4656 const Value *Op0 = I->getOperand(0); 4657 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4658 if (!isIntExtFree(ZExt)) { 4659 MVT TmpVT; 4660 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4661 SrcVT = TmpVT; 4662 IsZExt = true; 4663 Op0 = ZExt->getOperand(0); 4664 } 4665 } 4666 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4667 if (!isIntExtFree(SExt)) { 4668 MVT TmpVT; 4669 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4670 SrcVT = TmpVT; 4671 IsZExt = false; 4672 Op0 = SExt->getOperand(0); 4673 } 4674 } 4675 } 4676 4677 Register Op0Reg = getRegForValue(Op0); 4678 if (!Op0Reg) 4679 return false; 4680 4681 switch (I->getOpcode()) { 4682 default: llvm_unreachable("Unexpected instruction."); 4683 case Instruction::Shl: 4684 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4685 break; 4686 case Instruction::AShr: 4687 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4688 break; 4689 case Instruction::LShr: 4690 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4691 break; 4692 } 4693 if (!ResultReg) 4694 return false; 4695 4696 updateValueMap(I, ResultReg); 4697 return true; 4698 } 4699 4700 Register Op0Reg = getRegForValue(I->getOperand(0)); 4701 if (!Op0Reg) 4702 return false; 4703 4704 Register Op1Reg = getRegForValue(I->getOperand(1)); 4705 if (!Op1Reg) 4706 return false; 4707 4708 unsigned ResultReg = 0; 4709 switch (I->getOpcode()) { 4710 default: llvm_unreachable("Unexpected instruction."); 4711 case Instruction::Shl: 4712 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); 4713 break; 4714 case Instruction::AShr: 4715 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); 4716 break; 4717 case Instruction::LShr: 4718 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); 4719 break; 4720 } 4721 4722 if (!ResultReg) 4723 return false; 4724 4725 updateValueMap(I, ResultReg); 4726 return true; 4727 } 4728 4729 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4730 MVT RetVT, SrcVT; 4731 4732 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4733 return false; 4734 if (!isTypeLegal(I->getType(), RetVT)) 4735 return false; 4736 4737 unsigned Opc; 4738 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4739 Opc = AArch64::FMOVWSr; 4740 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4741 Opc = AArch64::FMOVXDr; 4742 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4743 Opc = AArch64::FMOVSWr; 4744 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4745 Opc = AArch64::FMOVDXr; 4746 else 4747 return false; 4748 4749 const TargetRegisterClass *RC = nullptr; 4750 switch (RetVT.SimpleTy) { 4751 default: llvm_unreachable("Unexpected value type."); 4752 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4753 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4754 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4755 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4756 } 4757 Register Op0Reg = getRegForValue(I->getOperand(0)); 4758 if (!Op0Reg) 4759 return false; 4760 4761 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg); 4762 if (!ResultReg) 4763 return false; 4764 4765 updateValueMap(I, ResultReg); 4766 return true; 4767 } 4768 4769 bool AArch64FastISel::selectFRem(const Instruction *I) { 4770 MVT RetVT; 4771 if (!isTypeLegal(I->getType(), RetVT)) 4772 return false; 4773 4774 RTLIB::Libcall LC; 4775 switch (RetVT.SimpleTy) { 4776 default: 4777 return false; 4778 case MVT::f32: 4779 LC = RTLIB::REM_F32; 4780 break; 4781 case MVT::f64: 4782 LC = RTLIB::REM_F64; 4783 break; 4784 } 4785 4786 ArgListTy Args; 4787 Args.reserve(I->getNumOperands()); 4788 4789 // Populate the argument list. 4790 for (auto &Arg : I->operands()) { 4791 ArgListEntry Entry; 4792 Entry.Val = Arg; 4793 Entry.Ty = Arg->getType(); 4794 Args.push_back(Entry); 4795 } 4796 4797 CallLoweringInfo CLI; 4798 MCContext &Ctx = MF->getContext(); 4799 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4800 TLI.getLibcallName(LC), std::move(Args)); 4801 if (!lowerCallTo(CLI)) 4802 return false; 4803 updateValueMap(I, CLI.ResultReg); 4804 return true; 4805 } 4806 4807 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4808 MVT VT; 4809 if (!isTypeLegal(I->getType(), VT)) 4810 return false; 4811 4812 if (!isa<ConstantInt>(I->getOperand(1))) 4813 return selectBinaryOp(I, ISD::SDIV); 4814 4815 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4816 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4817 !(C.isPowerOf2() || C.isNegatedPowerOf2())) 4818 return selectBinaryOp(I, ISD::SDIV); 4819 4820 unsigned Lg2 = C.countTrailingZeros(); 4821 Register Src0Reg = getRegForValue(I->getOperand(0)); 4822 if (!Src0Reg) 4823 return false; 4824 4825 if (cast<BinaryOperator>(I)->isExact()) { 4826 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2); 4827 if (!ResultReg) 4828 return false; 4829 updateValueMap(I, ResultReg); 4830 return true; 4831 } 4832 4833 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4834 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne); 4835 if (!AddReg) 4836 return false; 4837 4838 // (Src0 < 0) ? Pow2 - 1 : 0; 4839 if (!emitICmp_ri(VT, Src0Reg, 0)) 4840 return false; 4841 4842 unsigned SelectOpc; 4843 const TargetRegisterClass *RC; 4844 if (VT == MVT::i64) { 4845 SelectOpc = AArch64::CSELXr; 4846 RC = &AArch64::GPR64RegClass; 4847 } else { 4848 SelectOpc = AArch64::CSELWr; 4849 RC = &AArch64::GPR32RegClass; 4850 } 4851 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg, 4852 AArch64CC::LT); 4853 if (!SelectReg) 4854 return false; 4855 4856 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4857 // negate the result. 4858 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4859 unsigned ResultReg; 4860 if (C.isNegative()) 4861 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg, 4862 AArch64_AM::ASR, Lg2); 4863 else 4864 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2); 4865 4866 if (!ResultReg) 4867 return false; 4868 4869 updateValueMap(I, ResultReg); 4870 return true; 4871 } 4872 4873 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4874 /// have to duplicate it for AArch64, because otherwise we would fail during the 4875 /// sign-extend emission. 4876 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4877 Register IdxN = getRegForValue(Idx); 4878 if (IdxN == 0) 4879 // Unhandled operand. Halt "fast" selection and bail. 4880 return 0; 4881 4882 // If the index is smaller or larger than intptr_t, truncate or extend it. 4883 MVT PtrVT = TLI.getPointerTy(DL); 4884 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4885 if (IdxVT.bitsLT(PtrVT)) { 4886 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 4887 } else if (IdxVT.bitsGT(PtrVT)) 4888 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4889 return IdxN; 4890 } 4891 4892 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4893 /// duplicate it for AArch64, because otherwise we would bail out even for 4894 /// simple cases. This is because the standard fastEmit functions don't cover 4895 /// MUL at all and ADD is lowered very inefficientily. 4896 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4897 if (Subtarget->isTargetILP32()) 4898 return false; 4899 4900 Register N = getRegForValue(I->getOperand(0)); 4901 if (!N) 4902 return false; 4903 4904 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4905 // into a single N = N + TotalOffset. 4906 uint64_t TotalOffs = 0; 4907 MVT VT = TLI.getPointerTy(DL); 4908 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4909 GTI != E; ++GTI) { 4910 const Value *Idx = GTI.getOperand(); 4911 if (auto *StTy = GTI.getStructTypeOrNull()) { 4912 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4913 // N = N + Offset 4914 if (Field) 4915 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4916 } else { 4917 Type *Ty = GTI.getIndexedType(); 4918 4919 // If this is a constant subscript, handle it quickly. 4920 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4921 if (CI->isZero()) 4922 continue; 4923 // N = N + Offset 4924 TotalOffs += 4925 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 4926 continue; 4927 } 4928 if (TotalOffs) { 4929 N = emitAdd_ri_(VT, N, TotalOffs); 4930 if (!N) 4931 return false; 4932 TotalOffs = 0; 4933 } 4934 4935 // N = N + Idx * ElementSize; 4936 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 4937 unsigned IdxN = getRegForGEPIndex(Idx); 4938 if (!IdxN) 4939 return false; 4940 4941 if (ElementSize != 1) { 4942 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 4943 if (!C) 4944 return false; 4945 IdxN = emitMul_rr(VT, IdxN, C); 4946 if (!IdxN) 4947 return false; 4948 } 4949 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN); 4950 if (!N) 4951 return false; 4952 } 4953 } 4954 if (TotalOffs) { 4955 N = emitAdd_ri_(VT, N, TotalOffs); 4956 if (!N) 4957 return false; 4958 } 4959 updateValueMap(I, N); 4960 return true; 4961 } 4962 4963 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 4964 assert(TM.getOptLevel() == CodeGenOpt::None && 4965 "cmpxchg survived AtomicExpand at optlevel > -O0"); 4966 4967 auto *RetPairTy = cast<StructType>(I->getType()); 4968 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 4969 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 4970 "cmpxchg has a non-i1 status result"); 4971 4972 MVT VT; 4973 if (!isTypeLegal(RetTy, VT)) 4974 return false; 4975 4976 const TargetRegisterClass *ResRC; 4977 unsigned Opc, CmpOpc; 4978 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 4979 // extractvalue selection doesn't support that. 4980 if (VT == MVT::i32) { 4981 Opc = AArch64::CMP_SWAP_32; 4982 CmpOpc = AArch64::SUBSWrs; 4983 ResRC = &AArch64::GPR32RegClass; 4984 } else if (VT == MVT::i64) { 4985 Opc = AArch64::CMP_SWAP_64; 4986 CmpOpc = AArch64::SUBSXrs; 4987 ResRC = &AArch64::GPR64RegClass; 4988 } else { 4989 return false; 4990 } 4991 4992 const MCInstrDesc &II = TII.get(Opc); 4993 4994 const Register AddrReg = constrainOperandRegClass( 4995 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 4996 const Register DesiredReg = constrainOperandRegClass( 4997 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 4998 const Register NewReg = constrainOperandRegClass( 4999 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 5000 5001 const Register ResultReg1 = createResultReg(ResRC); 5002 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5003 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5004 5005 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5006 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 5007 .addDef(ResultReg1) 5008 .addDef(ScratchReg) 5009 .addUse(AddrReg) 5010 .addUse(DesiredReg) 5011 .addUse(NewReg); 5012 5013 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 5014 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5015 .addUse(ResultReg1) 5016 .addUse(DesiredReg) 5017 .addImm(0); 5018 5019 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) 5020 .addDef(ResultReg2) 5021 .addUse(AArch64::WZR) 5022 .addUse(AArch64::WZR) 5023 .addImm(AArch64CC::NE); 5024 5025 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5026 updateValueMap(I, ResultReg1, 2); 5027 return true; 5028 } 5029 5030 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5031 switch (I->getOpcode()) { 5032 default: 5033 break; 5034 case Instruction::Add: 5035 case Instruction::Sub: 5036 return selectAddSub(I); 5037 case Instruction::Mul: 5038 return selectMul(I); 5039 case Instruction::SDiv: 5040 return selectSDiv(I); 5041 case Instruction::SRem: 5042 if (!selectBinaryOp(I, ISD::SREM)) 5043 return selectRem(I, ISD::SREM); 5044 return true; 5045 case Instruction::URem: 5046 if (!selectBinaryOp(I, ISD::UREM)) 5047 return selectRem(I, ISD::UREM); 5048 return true; 5049 case Instruction::Shl: 5050 case Instruction::LShr: 5051 case Instruction::AShr: 5052 return selectShift(I); 5053 case Instruction::And: 5054 case Instruction::Or: 5055 case Instruction::Xor: 5056 return selectLogicalOp(I); 5057 case Instruction::Br: 5058 return selectBranch(I); 5059 case Instruction::IndirectBr: 5060 return selectIndirectBr(I); 5061 case Instruction::BitCast: 5062 if (!FastISel::selectBitCast(I)) 5063 return selectBitCast(I); 5064 return true; 5065 case Instruction::FPToSI: 5066 if (!selectCast(I, ISD::FP_TO_SINT)) 5067 return selectFPToInt(I, /*Signed=*/true); 5068 return true; 5069 case Instruction::FPToUI: 5070 return selectFPToInt(I, /*Signed=*/false); 5071 case Instruction::ZExt: 5072 case Instruction::SExt: 5073 return selectIntExt(I); 5074 case Instruction::Trunc: 5075 if (!selectCast(I, ISD::TRUNCATE)) 5076 return selectTrunc(I); 5077 return true; 5078 case Instruction::FPExt: 5079 return selectFPExt(I); 5080 case Instruction::FPTrunc: 5081 return selectFPTrunc(I); 5082 case Instruction::SIToFP: 5083 if (!selectCast(I, ISD::SINT_TO_FP)) 5084 return selectIntToFP(I, /*Signed=*/true); 5085 return true; 5086 case Instruction::UIToFP: 5087 return selectIntToFP(I, /*Signed=*/false); 5088 case Instruction::Load: 5089 return selectLoad(I); 5090 case Instruction::Store: 5091 return selectStore(I); 5092 case Instruction::FCmp: 5093 case Instruction::ICmp: 5094 return selectCmp(I); 5095 case Instruction::Select: 5096 return selectSelect(I); 5097 case Instruction::Ret: 5098 return selectRet(I); 5099 case Instruction::FRem: 5100 return selectFRem(I); 5101 case Instruction::GetElementPtr: 5102 return selectGetElementPtr(I); 5103 case Instruction::AtomicCmpXchg: 5104 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5105 } 5106 5107 // fall-back to target-independent instruction selection. 5108 return selectOperator(I, I->getOpcode()); 5109 } 5110 5111 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5112 const TargetLibraryInfo *LibInfo) { 5113 return new AArch64FastISel(FuncInfo, LibInfo); 5114 } 5115