1 //===- AArch6464FastISel.cpp - AArch64 FastISel implementation ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the AArch64-specific support for the FastISel class. Some 10 // of the target-specific code is generated by tablegen in the file 11 // AArch64GenFastISel.inc, which is #included here. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AArch64.h" 16 #include "AArch64CallingConvention.h" 17 #include "AArch64MachineFunctionInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "Utils/AArch64BaseInfo.h" 22 #include "llvm/ADT/APFloat.h" 23 #include "llvm/ADT/APInt.h" 24 #include "llvm/ADT/DenseMap.h" 25 #include "llvm/ADT/SmallVector.h" 26 #include "llvm/Analysis/BranchProbabilityInfo.h" 27 #include "llvm/CodeGen/CallingConvLower.h" 28 #include "llvm/CodeGen/FastISel.h" 29 #include "llvm/CodeGen/FunctionLoweringInfo.h" 30 #include "llvm/CodeGen/ISDOpcodes.h" 31 #include "llvm/CodeGen/MachineBasicBlock.h" 32 #include "llvm/CodeGen/MachineConstantPool.h" 33 #include "llvm/CodeGen/MachineFrameInfo.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineMemOperand.h" 37 #include "llvm/CodeGen/MachineRegisterInfo.h" 38 #include "llvm/CodeGen/RuntimeLibcalls.h" 39 #include "llvm/CodeGen/ValueTypes.h" 40 #include "llvm/IR/Argument.h" 41 #include "llvm/IR/Attributes.h" 42 #include "llvm/IR/BasicBlock.h" 43 #include "llvm/IR/CallingConv.h" 44 #include "llvm/IR/Constant.h" 45 #include "llvm/IR/Constants.h" 46 #include "llvm/IR/DataLayout.h" 47 #include "llvm/IR/DerivedTypes.h" 48 #include "llvm/IR/Function.h" 49 #include "llvm/IR/GetElementPtrTypeIterator.h" 50 #include "llvm/IR/GlobalValue.h" 51 #include "llvm/IR/InstrTypes.h" 52 #include "llvm/IR/Instruction.h" 53 #include "llvm/IR/Instructions.h" 54 #include "llvm/IR/IntrinsicInst.h" 55 #include "llvm/IR/Intrinsics.h" 56 #include "llvm/IR/Operator.h" 57 #include "llvm/IR/Type.h" 58 #include "llvm/IR/User.h" 59 #include "llvm/IR/Value.h" 60 #include "llvm/MC/MCInstrDesc.h" 61 #include "llvm/MC/MCRegisterInfo.h" 62 #include "llvm/MC/MCSymbol.h" 63 #include "llvm/Support/AtomicOrdering.h" 64 #include "llvm/Support/Casting.h" 65 #include "llvm/Support/CodeGen.h" 66 #include "llvm/Support/Compiler.h" 67 #include "llvm/Support/ErrorHandling.h" 68 #include "llvm/Support/MachineValueType.h" 69 #include "llvm/Support/MathExtras.h" 70 #include <algorithm> 71 #include <cassert> 72 #include <cstdint> 73 #include <iterator> 74 #include <utility> 75 76 using namespace llvm; 77 78 namespace { 79 80 class AArch64FastISel final : public FastISel { 81 class Address { 82 public: 83 using BaseKind = enum { 84 RegBase, 85 FrameIndexBase 86 }; 87 88 private: 89 BaseKind Kind = RegBase; 90 AArch64_AM::ShiftExtendType ExtType = AArch64_AM::InvalidShiftExtend; 91 union { 92 unsigned Reg; 93 int FI; 94 } Base; 95 unsigned OffsetReg = 0; 96 unsigned Shift = 0; 97 int64_t Offset = 0; 98 const GlobalValue *GV = nullptr; 99 100 public: 101 Address() { Base.Reg = 0; } 102 103 void setKind(BaseKind K) { Kind = K; } 104 BaseKind getKind() const { return Kind; } 105 void setExtendType(AArch64_AM::ShiftExtendType E) { ExtType = E; } 106 AArch64_AM::ShiftExtendType getExtendType() const { return ExtType; } 107 bool isRegBase() const { return Kind == RegBase; } 108 bool isFIBase() const { return Kind == FrameIndexBase; } 109 110 void setReg(unsigned Reg) { 111 assert(isRegBase() && "Invalid base register access!"); 112 Base.Reg = Reg; 113 } 114 115 unsigned getReg() const { 116 assert(isRegBase() && "Invalid base register access!"); 117 return Base.Reg; 118 } 119 120 void setOffsetReg(unsigned Reg) { 121 OffsetReg = Reg; 122 } 123 124 unsigned getOffsetReg() const { 125 return OffsetReg; 126 } 127 128 void setFI(unsigned FI) { 129 assert(isFIBase() && "Invalid base frame index access!"); 130 Base.FI = FI; 131 } 132 133 unsigned getFI() const { 134 assert(isFIBase() && "Invalid base frame index access!"); 135 return Base.FI; 136 } 137 138 void setOffset(int64_t O) { Offset = O; } 139 int64_t getOffset() { return Offset; } 140 void setShift(unsigned S) { Shift = S; } 141 unsigned getShift() { return Shift; } 142 143 void setGlobalValue(const GlobalValue *G) { GV = G; } 144 const GlobalValue *getGlobalValue() { return GV; } 145 }; 146 147 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 148 /// make the right decision when generating code for different targets. 149 const AArch64Subtarget *Subtarget; 150 LLVMContext *Context; 151 152 bool fastLowerArguments() override; 153 bool fastLowerCall(CallLoweringInfo &CLI) override; 154 bool fastLowerIntrinsicCall(const IntrinsicInst *II) override; 155 156 private: 157 // Selection routines. 158 bool selectAddSub(const Instruction *I); 159 bool selectLogicalOp(const Instruction *I); 160 bool selectLoad(const Instruction *I); 161 bool selectStore(const Instruction *I); 162 bool selectBranch(const Instruction *I); 163 bool selectIndirectBr(const Instruction *I); 164 bool selectCmp(const Instruction *I); 165 bool selectSelect(const Instruction *I); 166 bool selectFPExt(const Instruction *I); 167 bool selectFPTrunc(const Instruction *I); 168 bool selectFPToInt(const Instruction *I, bool Signed); 169 bool selectIntToFP(const Instruction *I, bool Signed); 170 bool selectRem(const Instruction *I, unsigned ISDOpcode); 171 bool selectRet(const Instruction *I); 172 bool selectTrunc(const Instruction *I); 173 bool selectIntExt(const Instruction *I); 174 bool selectMul(const Instruction *I); 175 bool selectShift(const Instruction *I); 176 bool selectBitCast(const Instruction *I); 177 bool selectFRem(const Instruction *I); 178 bool selectSDiv(const Instruction *I); 179 bool selectGetElementPtr(const Instruction *I); 180 bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I); 181 182 // Utility helper routines. 183 bool isTypeLegal(Type *Ty, MVT &VT); 184 bool isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed = false); 185 bool isValueAvailable(const Value *V) const; 186 bool computeAddress(const Value *Obj, Address &Addr, Type *Ty = nullptr); 187 bool computeCallAddress(const Value *V, Address &Addr); 188 bool simplifyAddress(Address &Addr, MVT VT); 189 void addLoadStoreOperands(Address &Addr, const MachineInstrBuilder &MIB, 190 MachineMemOperand::Flags Flags, 191 unsigned ScaleFactor, MachineMemOperand *MMO); 192 bool isMemCpySmall(uint64_t Len, unsigned Alignment); 193 bool tryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len, 194 unsigned Alignment); 195 bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I, 196 const Value *Cond); 197 bool optimizeIntExtLoad(const Instruction *I, MVT RetVT, MVT SrcVT); 198 bool optimizeSelect(const SelectInst *SI); 199 unsigned getRegForGEPIndex(const Value *Idx); 200 201 // Emit helper routines. 202 unsigned emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 203 const Value *RHS, bool SetFlags = false, 204 bool WantResult = true, bool IsZExt = false); 205 unsigned emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 206 unsigned RHSReg, bool SetFlags = false, 207 bool WantResult = true); 208 unsigned emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 209 uint64_t Imm, bool SetFlags = false, 210 bool WantResult = true); 211 unsigned emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 212 unsigned RHSReg, AArch64_AM::ShiftExtendType ShiftType, 213 uint64_t ShiftImm, bool SetFlags = false, 214 bool WantResult = true); 215 unsigned emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 216 unsigned RHSReg, AArch64_AM::ShiftExtendType ExtType, 217 uint64_t ShiftImm, bool SetFlags = false, 218 bool WantResult = true); 219 220 // Emit functions. 221 bool emitCompareAndBranch(const BranchInst *BI); 222 bool emitCmp(const Value *LHS, const Value *RHS, bool IsZExt); 223 bool emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, bool IsZExt); 224 bool emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 225 bool emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS); 226 unsigned emitLoad(MVT VT, MVT ResultVT, Address Addr, bool WantZExt = true, 227 MachineMemOperand *MMO = nullptr); 228 bool emitStore(MVT VT, unsigned SrcReg, Address Addr, 229 MachineMemOperand *MMO = nullptr); 230 bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg, 231 MachineMemOperand *MMO = nullptr); 232 unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt); 233 unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt); 234 unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 235 bool SetFlags = false, bool WantResult = true, 236 bool IsZExt = false); 237 unsigned emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm); 238 unsigned emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 239 bool SetFlags = false, bool WantResult = true, 240 bool IsZExt = false); 241 unsigned emitSubs_rr(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 242 bool WantResult = true); 243 unsigned emitSubs_rs(MVT RetVT, unsigned LHSReg, unsigned RHSReg, 244 AArch64_AM::ShiftExtendType ShiftType, uint64_t ShiftImm, 245 bool WantResult = true); 246 unsigned emitLogicalOp(unsigned ISDOpc, MVT RetVT, const Value *LHS, 247 const Value *RHS); 248 unsigned emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 249 uint64_t Imm); 250 unsigned emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, unsigned LHSReg, 251 unsigned RHSReg, uint64_t ShiftImm); 252 unsigned emitAnd_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm); 253 unsigned emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1); 254 unsigned emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 255 unsigned emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1); 256 unsigned emitLSL_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 257 unsigned emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 258 bool IsZExt = true); 259 unsigned emitLSR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 260 unsigned emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 261 bool IsZExt = true); 262 unsigned emitASR_rr(MVT RetVT, unsigned Op0Reg, unsigned Op1Reg); 263 unsigned emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0Reg, uint64_t Imm, 264 bool IsZExt = false); 265 266 unsigned materializeInt(const ConstantInt *CI, MVT VT); 267 unsigned materializeFP(const ConstantFP *CFP, MVT VT); 268 unsigned materializeGV(const GlobalValue *GV); 269 270 // Call handling routines. 271 private: 272 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC) const; 273 bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs, 274 unsigned &NumBytes); 275 bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes); 276 277 public: 278 // Backend specific FastISel code. 279 unsigned fastMaterializeAlloca(const AllocaInst *AI) override; 280 unsigned fastMaterializeConstant(const Constant *C) override; 281 unsigned fastMaterializeFloatZero(const ConstantFP* CF) override; 282 283 explicit AArch64FastISel(FunctionLoweringInfo &FuncInfo, 284 const TargetLibraryInfo *LibInfo) 285 : FastISel(FuncInfo, LibInfo, /*SkipTargetIndependentISel=*/true) { 286 Subtarget = &FuncInfo.MF->getSubtarget<AArch64Subtarget>(); 287 Context = &FuncInfo.Fn->getContext(); 288 } 289 290 bool fastSelectInstruction(const Instruction *I) override; 291 292 #include "AArch64GenFastISel.inc" 293 }; 294 295 } // end anonymous namespace 296 297 /// Check if the sign-/zero-extend will be a noop. 298 static bool isIntExtFree(const Instruction *I) { 299 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 300 "Unexpected integer extend instruction."); 301 assert(!I->getType()->isVectorTy() && I->getType()->isIntegerTy() && 302 "Unexpected value type."); 303 bool IsZExt = isa<ZExtInst>(I); 304 305 if (const auto *LI = dyn_cast<LoadInst>(I->getOperand(0))) 306 if (LI->hasOneUse()) 307 return true; 308 309 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) 310 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) 311 return true; 312 313 return false; 314 } 315 316 /// Determine the implicit scale factor that is applied by a memory 317 /// operation for a given value type. 318 static unsigned getImplicitScaleFactor(MVT VT) { 319 switch (VT.SimpleTy) { 320 default: 321 return 0; // invalid 322 case MVT::i1: // fall-through 323 case MVT::i8: 324 return 1; 325 case MVT::i16: 326 return 2; 327 case MVT::i32: // fall-through 328 case MVT::f32: 329 return 4; 330 case MVT::i64: // fall-through 331 case MVT::f64: 332 return 8; 333 } 334 } 335 336 CCAssignFn *AArch64FastISel::CCAssignFnForCall(CallingConv::ID CC) const { 337 if (CC == CallingConv::WebKit_JS) 338 return CC_AArch64_WebKit_JS; 339 if (CC == CallingConv::GHC) 340 return CC_AArch64_GHC; 341 if (CC == CallingConv::CFGuard_Check) 342 return CC_AArch64_Win64_CFGuard_Check; 343 return Subtarget->isTargetDarwin() ? CC_AArch64_DarwinPCS : CC_AArch64_AAPCS; 344 } 345 346 unsigned AArch64FastISel::fastMaterializeAlloca(const AllocaInst *AI) { 347 assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i64 && 348 "Alloca should always return a pointer."); 349 350 // Don't handle dynamic allocas. 351 if (!FuncInfo.StaticAllocaMap.count(AI)) 352 return 0; 353 354 DenseMap<const AllocaInst *, int>::iterator SI = 355 FuncInfo.StaticAllocaMap.find(AI); 356 357 if (SI != FuncInfo.StaticAllocaMap.end()) { 358 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 359 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 360 ResultReg) 361 .addFrameIndex(SI->second) 362 .addImm(0) 363 .addImm(0); 364 return ResultReg; 365 } 366 367 return 0; 368 } 369 370 unsigned AArch64FastISel::materializeInt(const ConstantInt *CI, MVT VT) { 371 if (VT > MVT::i64) 372 return 0; 373 374 if (!CI->isZero()) 375 return fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue()); 376 377 // Create a copy from the zero register to materialize a "0" value. 378 const TargetRegisterClass *RC = (VT == MVT::i64) ? &AArch64::GPR64RegClass 379 : &AArch64::GPR32RegClass; 380 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 381 Register ResultReg = createResultReg(RC); 382 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY), 383 ResultReg).addReg(ZeroReg, getKillRegState(true)); 384 return ResultReg; 385 } 386 387 unsigned AArch64FastISel::materializeFP(const ConstantFP *CFP, MVT VT) { 388 // Positive zero (+0.0) has to be materialized with a fmov from the zero 389 // register, because the immediate version of fmov cannot encode zero. 390 if (CFP->isNullValue()) 391 return fastMaterializeFloatZero(CFP); 392 393 if (VT != MVT::f32 && VT != MVT::f64) 394 return 0; 395 396 const APFloat Val = CFP->getValueAPF(); 397 bool Is64Bit = (VT == MVT::f64); 398 // This checks to see if we can use FMOV instructions to materialize 399 // a constant, otherwise we have to materialize via the constant pool. 400 int Imm = 401 Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); 402 if (Imm != -1) { 403 unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; 404 return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); 405 } 406 407 // For the large code model materialize the FP constant in code. 408 if (TM.getCodeModel() == CodeModel::Large) { 409 unsigned Opc1 = Is64Bit ? AArch64::MOVi64imm : AArch64::MOVi32imm; 410 const TargetRegisterClass *RC = Is64Bit ? 411 &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 412 413 Register TmpReg = createResultReg(RC); 414 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc1), TmpReg) 415 .addImm(CFP->getValueAPF().bitcastToAPInt().getZExtValue()); 416 417 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 418 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 419 TII.get(TargetOpcode::COPY), ResultReg) 420 .addReg(TmpReg, getKillRegState(true)); 421 422 return ResultReg; 423 } 424 425 // Materialize via constant pool. MachineConstantPool wants an explicit 426 // alignment. 427 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 428 429 unsigned CPI = MCP.getConstantPoolIndex(cast<Constant>(CFP), Alignment); 430 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 432 ADRPReg).addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGE); 433 434 unsigned Opc = Is64Bit ? AArch64::LDRDui : AArch64::LDRSui; 435 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 436 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 437 .addReg(ADRPReg) 438 .addConstantPoolIndex(CPI, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 439 return ResultReg; 440 } 441 442 unsigned AArch64FastISel::materializeGV(const GlobalValue *GV) { 443 // We can't handle thread-local variables quickly yet. 444 if (GV->isThreadLocal()) 445 return 0; 446 447 // MachO still uses GOT for large code-model accesses, but ELF requires 448 // movz/movk sequences, which FastISel doesn't handle yet. 449 if (!Subtarget->useSmallAddressing() && !Subtarget->isTargetMachO()) 450 return 0; 451 452 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); 453 454 EVT DestEVT = TLI.getValueType(DL, GV->getType(), true); 455 if (!DestEVT.isSimple()) 456 return 0; 457 458 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 459 unsigned ResultReg; 460 461 if (OpFlags & AArch64II::MO_GOT) { 462 // ADRP + LDRX 463 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 464 ADRPReg) 465 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 466 467 unsigned LdrOpc; 468 if (Subtarget->isTargetILP32()) { 469 ResultReg = createResultReg(&AArch64::GPR32RegClass); 470 LdrOpc = AArch64::LDRWui; 471 } else { 472 ResultReg = createResultReg(&AArch64::GPR64RegClass); 473 LdrOpc = AArch64::LDRXui; 474 } 475 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(LdrOpc), 476 ResultReg) 477 .addReg(ADRPReg) 478 .addGlobalAddress(GV, 0, AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | 479 AArch64II::MO_NC | OpFlags); 480 if (!Subtarget->isTargetILP32()) 481 return ResultReg; 482 483 // LDRWui produces a 32-bit register, but pointers in-register are 64-bits 484 // so we must extend the result on ILP32. 485 Register Result64 = createResultReg(&AArch64::GPR64RegClass); 486 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 487 TII.get(TargetOpcode::SUBREG_TO_REG)) 488 .addDef(Result64) 489 .addImm(0) 490 .addReg(ResultReg, RegState::Kill) 491 .addImm(AArch64::sub_32); 492 return Result64; 493 } else { 494 // ADRP + ADDX 495 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 496 ADRPReg) 497 .addGlobalAddress(GV, 0, AArch64II::MO_PAGE | OpFlags); 498 499 ResultReg = createResultReg(&AArch64::GPR64spRegClass); 500 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 501 ResultReg) 502 .addReg(ADRPReg) 503 .addGlobalAddress(GV, 0, 504 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | OpFlags) 505 .addImm(0); 506 } 507 return ResultReg; 508 } 509 510 unsigned AArch64FastISel::fastMaterializeConstant(const Constant *C) { 511 EVT CEVT = TLI.getValueType(DL, C->getType(), true); 512 513 // Only handle simple types. 514 if (!CEVT.isSimple()) 515 return 0; 516 MVT VT = CEVT.getSimpleVT(); 517 // arm64_32 has 32-bit pointers held in 64-bit registers. Because of that, 518 // 'null' pointers need to have a somewhat special treatment. 519 if (isa<ConstantPointerNull>(C)) { 520 assert(VT == MVT::i64 && "Expected 64-bit pointers"); 521 return materializeInt(ConstantInt::get(Type::getInt64Ty(*Context), 0), VT); 522 } 523 524 if (const auto *CI = dyn_cast<ConstantInt>(C)) 525 return materializeInt(CI, VT); 526 else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 527 return materializeFP(CFP, VT); 528 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 529 return materializeGV(GV); 530 531 return 0; 532 } 533 534 unsigned AArch64FastISel::fastMaterializeFloatZero(const ConstantFP* CFP) { 535 assert(CFP->isNullValue() && 536 "Floating-point constant is not a positive zero."); 537 MVT VT; 538 if (!isTypeLegal(CFP->getType(), VT)) 539 return 0; 540 541 if (VT != MVT::f32 && VT != MVT::f64) 542 return 0; 543 544 bool Is64Bit = (VT == MVT::f64); 545 unsigned ZReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 546 unsigned Opc = Is64Bit ? AArch64::FMOVXDr : AArch64::FMOVWSr; 547 return fastEmitInst_r(Opc, TLI.getRegClassFor(VT), ZReg); 548 } 549 550 /// Check if the multiply is by a power-of-2 constant. 551 static bool isMulPowOf2(const Value *I) { 552 if (const auto *MI = dyn_cast<MulOperator>(I)) { 553 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(0))) 554 if (C->getValue().isPowerOf2()) 555 return true; 556 if (const auto *C = dyn_cast<ConstantInt>(MI->getOperand(1))) 557 if (C->getValue().isPowerOf2()) 558 return true; 559 } 560 return false; 561 } 562 563 // Computes the address to get to an object. 564 bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty) 565 { 566 const User *U = nullptr; 567 unsigned Opcode = Instruction::UserOp1; 568 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 569 // Don't walk into other basic blocks unless the object is an alloca from 570 // another block, otherwise it may not have a virtual register assigned. 571 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 572 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 573 Opcode = I->getOpcode(); 574 U = I; 575 } 576 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 577 Opcode = C->getOpcode(); 578 U = C; 579 } 580 581 if (auto *Ty = dyn_cast<PointerType>(Obj->getType())) 582 if (Ty->getAddressSpace() > 255) 583 // Fast instruction selection doesn't support the special 584 // address spaces. 585 return false; 586 587 switch (Opcode) { 588 default: 589 break; 590 case Instruction::BitCast: 591 // Look through bitcasts. 592 return computeAddress(U->getOperand(0), Addr, Ty); 593 594 case Instruction::IntToPtr: 595 // Look past no-op inttoptrs. 596 if (TLI.getValueType(DL, U->getOperand(0)->getType()) == 597 TLI.getPointerTy(DL)) 598 return computeAddress(U->getOperand(0), Addr, Ty); 599 break; 600 601 case Instruction::PtrToInt: 602 // Look past no-op ptrtoints. 603 if (TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 604 return computeAddress(U->getOperand(0), Addr, Ty); 605 break; 606 607 case Instruction::GetElementPtr: { 608 Address SavedAddr = Addr; 609 uint64_t TmpOffset = Addr.getOffset(); 610 611 // Iterate through the GEP folding the constants into offsets where 612 // we can. 613 for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U); 614 GTI != E; ++GTI) { 615 const Value *Op = GTI.getOperand(); 616 if (StructType *STy = GTI.getStructTypeOrNull()) { 617 const StructLayout *SL = DL.getStructLayout(STy); 618 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 619 TmpOffset += SL->getElementOffset(Idx); 620 } else { 621 uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType()); 622 while (true) { 623 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 624 // Constant-offset addressing. 625 TmpOffset += CI->getSExtValue() * S; 626 break; 627 } 628 if (canFoldAddIntoGEP(U, Op)) { 629 // A compatible add with a constant operand. Fold the constant. 630 ConstantInt *CI = 631 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 632 TmpOffset += CI->getSExtValue() * S; 633 // Iterate on the other operand. 634 Op = cast<AddOperator>(Op)->getOperand(0); 635 continue; 636 } 637 // Unsupported 638 goto unsupported_gep; 639 } 640 } 641 } 642 643 // Try to grab the base operand now. 644 Addr.setOffset(TmpOffset); 645 if (computeAddress(U->getOperand(0), Addr, Ty)) 646 return true; 647 648 // We failed, restore everything and try the other options. 649 Addr = SavedAddr; 650 651 unsupported_gep: 652 break; 653 } 654 case Instruction::Alloca: { 655 const AllocaInst *AI = cast<AllocaInst>(Obj); 656 DenseMap<const AllocaInst *, int>::iterator SI = 657 FuncInfo.StaticAllocaMap.find(AI); 658 if (SI != FuncInfo.StaticAllocaMap.end()) { 659 Addr.setKind(Address::FrameIndexBase); 660 Addr.setFI(SI->second); 661 return true; 662 } 663 break; 664 } 665 case Instruction::Add: { 666 // Adds of constants are common and easy enough. 667 const Value *LHS = U->getOperand(0); 668 const Value *RHS = U->getOperand(1); 669 670 if (isa<ConstantInt>(LHS)) 671 std::swap(LHS, RHS); 672 673 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 674 Addr.setOffset(Addr.getOffset() + CI->getSExtValue()); 675 return computeAddress(LHS, Addr, Ty); 676 } 677 678 Address Backup = Addr; 679 if (computeAddress(LHS, Addr, Ty) && computeAddress(RHS, Addr, Ty)) 680 return true; 681 Addr = Backup; 682 683 break; 684 } 685 case Instruction::Sub: { 686 // Subs of constants are common and easy enough. 687 const Value *LHS = U->getOperand(0); 688 const Value *RHS = U->getOperand(1); 689 690 if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 691 Addr.setOffset(Addr.getOffset() - CI->getSExtValue()); 692 return computeAddress(LHS, Addr, Ty); 693 } 694 break; 695 } 696 case Instruction::Shl: { 697 if (Addr.getOffsetReg()) 698 break; 699 700 const auto *CI = dyn_cast<ConstantInt>(U->getOperand(1)); 701 if (!CI) 702 break; 703 704 unsigned Val = CI->getZExtValue(); 705 if (Val < 1 || Val > 3) 706 break; 707 708 uint64_t NumBytes = 0; 709 if (Ty && Ty->isSized()) { 710 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 711 NumBytes = NumBits / 8; 712 if (!isPowerOf2_64(NumBits)) 713 NumBytes = 0; 714 } 715 716 if (NumBytes != (1ULL << Val)) 717 break; 718 719 Addr.setShift(Val); 720 Addr.setExtendType(AArch64_AM::LSL); 721 722 const Value *Src = U->getOperand(0); 723 if (const auto *I = dyn_cast<Instruction>(Src)) { 724 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 725 // Fold the zext or sext when it won't become a noop. 726 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 727 if (!isIntExtFree(ZE) && 728 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 729 Addr.setExtendType(AArch64_AM::UXTW); 730 Src = ZE->getOperand(0); 731 } 732 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 733 if (!isIntExtFree(SE) && 734 SE->getOperand(0)->getType()->isIntegerTy(32)) { 735 Addr.setExtendType(AArch64_AM::SXTW); 736 Src = SE->getOperand(0); 737 } 738 } 739 } 740 } 741 742 if (const auto *AI = dyn_cast<BinaryOperator>(Src)) 743 if (AI->getOpcode() == Instruction::And) { 744 const Value *LHS = AI->getOperand(0); 745 const Value *RHS = AI->getOperand(1); 746 747 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 748 if (C->getValue() == 0xffffffff) 749 std::swap(LHS, RHS); 750 751 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 752 if (C->getValue() == 0xffffffff) { 753 Addr.setExtendType(AArch64_AM::UXTW); 754 Register Reg = getRegForValue(LHS); 755 if (!Reg) 756 return false; 757 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 758 Addr.setOffsetReg(Reg); 759 return true; 760 } 761 } 762 763 Register Reg = getRegForValue(Src); 764 if (!Reg) 765 return false; 766 Addr.setOffsetReg(Reg); 767 return true; 768 } 769 case Instruction::Mul: { 770 if (Addr.getOffsetReg()) 771 break; 772 773 if (!isMulPowOf2(U)) 774 break; 775 776 const Value *LHS = U->getOperand(0); 777 const Value *RHS = U->getOperand(1); 778 779 // Canonicalize power-of-2 value to the RHS. 780 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 781 if (C->getValue().isPowerOf2()) 782 std::swap(LHS, RHS); 783 784 assert(isa<ConstantInt>(RHS) && "Expected an ConstantInt."); 785 const auto *C = cast<ConstantInt>(RHS); 786 unsigned Val = C->getValue().logBase2(); 787 if (Val < 1 || Val > 3) 788 break; 789 790 uint64_t NumBytes = 0; 791 if (Ty && Ty->isSized()) { 792 uint64_t NumBits = DL.getTypeSizeInBits(Ty); 793 NumBytes = NumBits / 8; 794 if (!isPowerOf2_64(NumBits)) 795 NumBytes = 0; 796 } 797 798 if (NumBytes != (1ULL << Val)) 799 break; 800 801 Addr.setShift(Val); 802 Addr.setExtendType(AArch64_AM::LSL); 803 804 const Value *Src = LHS; 805 if (const auto *I = dyn_cast<Instruction>(Src)) { 806 if (FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 807 // Fold the zext or sext when it won't become a noop. 808 if (const auto *ZE = dyn_cast<ZExtInst>(I)) { 809 if (!isIntExtFree(ZE) && 810 ZE->getOperand(0)->getType()->isIntegerTy(32)) { 811 Addr.setExtendType(AArch64_AM::UXTW); 812 Src = ZE->getOperand(0); 813 } 814 } else if (const auto *SE = dyn_cast<SExtInst>(I)) { 815 if (!isIntExtFree(SE) && 816 SE->getOperand(0)->getType()->isIntegerTy(32)) { 817 Addr.setExtendType(AArch64_AM::SXTW); 818 Src = SE->getOperand(0); 819 } 820 } 821 } 822 } 823 824 Register Reg = getRegForValue(Src); 825 if (!Reg) 826 return false; 827 Addr.setOffsetReg(Reg); 828 return true; 829 } 830 case Instruction::And: { 831 if (Addr.getOffsetReg()) 832 break; 833 834 if (!Ty || DL.getTypeSizeInBits(Ty) != 8) 835 break; 836 837 const Value *LHS = U->getOperand(0); 838 const Value *RHS = U->getOperand(1); 839 840 if (const auto *C = dyn_cast<ConstantInt>(LHS)) 841 if (C->getValue() == 0xffffffff) 842 std::swap(LHS, RHS); 843 844 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 845 if (C->getValue() == 0xffffffff) { 846 Addr.setShift(0); 847 Addr.setExtendType(AArch64_AM::LSL); 848 Addr.setExtendType(AArch64_AM::UXTW); 849 850 Register Reg = getRegForValue(LHS); 851 if (!Reg) 852 return false; 853 Reg = fastEmitInst_extractsubreg(MVT::i32, Reg, AArch64::sub_32); 854 Addr.setOffsetReg(Reg); 855 return true; 856 } 857 break; 858 } 859 case Instruction::SExt: 860 case Instruction::ZExt: { 861 if (!Addr.getReg() || Addr.getOffsetReg()) 862 break; 863 864 const Value *Src = nullptr; 865 // Fold the zext or sext when it won't become a noop. 866 if (const auto *ZE = dyn_cast<ZExtInst>(U)) { 867 if (!isIntExtFree(ZE) && ZE->getOperand(0)->getType()->isIntegerTy(32)) { 868 Addr.setExtendType(AArch64_AM::UXTW); 869 Src = ZE->getOperand(0); 870 } 871 } else if (const auto *SE = dyn_cast<SExtInst>(U)) { 872 if (!isIntExtFree(SE) && SE->getOperand(0)->getType()->isIntegerTy(32)) { 873 Addr.setExtendType(AArch64_AM::SXTW); 874 Src = SE->getOperand(0); 875 } 876 } 877 878 if (!Src) 879 break; 880 881 Addr.setShift(0); 882 Register Reg = getRegForValue(Src); 883 if (!Reg) 884 return false; 885 Addr.setOffsetReg(Reg); 886 return true; 887 } 888 } // end switch 889 890 if (Addr.isRegBase() && !Addr.getReg()) { 891 Register Reg = getRegForValue(Obj); 892 if (!Reg) 893 return false; 894 Addr.setReg(Reg); 895 return true; 896 } 897 898 if (!Addr.getOffsetReg()) { 899 Register Reg = getRegForValue(Obj); 900 if (!Reg) 901 return false; 902 Addr.setOffsetReg(Reg); 903 return true; 904 } 905 906 return false; 907 } 908 909 bool AArch64FastISel::computeCallAddress(const Value *V, Address &Addr) { 910 const User *U = nullptr; 911 unsigned Opcode = Instruction::UserOp1; 912 bool InMBB = true; 913 914 if (const auto *I = dyn_cast<Instruction>(V)) { 915 Opcode = I->getOpcode(); 916 U = I; 917 InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock(); 918 } else if (const auto *C = dyn_cast<ConstantExpr>(V)) { 919 Opcode = C->getOpcode(); 920 U = C; 921 } 922 923 switch (Opcode) { 924 default: break; 925 case Instruction::BitCast: 926 // Look past bitcasts if its operand is in the same BB. 927 if (InMBB) 928 return computeCallAddress(U->getOperand(0), Addr); 929 break; 930 case Instruction::IntToPtr: 931 // Look past no-op inttoptrs if its operand is in the same BB. 932 if (InMBB && 933 TLI.getValueType(DL, U->getOperand(0)->getType()) == 934 TLI.getPointerTy(DL)) 935 return computeCallAddress(U->getOperand(0), Addr); 936 break; 937 case Instruction::PtrToInt: 938 // Look past no-op ptrtoints if its operand is in the same BB. 939 if (InMBB && TLI.getValueType(DL, U->getType()) == TLI.getPointerTy(DL)) 940 return computeCallAddress(U->getOperand(0), Addr); 941 break; 942 } 943 944 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 945 Addr.setGlobalValue(GV); 946 return true; 947 } 948 949 // If all else fails, try to materialize the value in a register. 950 if (!Addr.getGlobalValue()) { 951 Addr.setReg(getRegForValue(V)); 952 return Addr.getReg() != 0; 953 } 954 955 return false; 956 } 957 958 bool AArch64FastISel::isTypeLegal(Type *Ty, MVT &VT) { 959 EVT evt = TLI.getValueType(DL, Ty, true); 960 961 if (Subtarget->isTargetILP32() && Ty->isPointerTy()) 962 return false; 963 964 // Only handle simple types. 965 if (evt == MVT::Other || !evt.isSimple()) 966 return false; 967 VT = evt.getSimpleVT(); 968 969 // This is a legal type, but it's not something we handle in fast-isel. 970 if (VT == MVT::f128) 971 return false; 972 973 // Handle all other legal types, i.e. a register that will directly hold this 974 // value. 975 return TLI.isTypeLegal(VT); 976 } 977 978 /// Determine if the value type is supported by FastISel. 979 /// 980 /// FastISel for AArch64 can handle more value types than are legal. This adds 981 /// simple value type such as i1, i8, and i16. 982 bool AArch64FastISel::isTypeSupported(Type *Ty, MVT &VT, bool IsVectorAllowed) { 983 if (Ty->isVectorTy() && !IsVectorAllowed) 984 return false; 985 986 if (isTypeLegal(Ty, VT)) 987 return true; 988 989 // If this is a type than can be sign or zero-extended to a basic operation 990 // go ahead and accept it now. 991 if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) 992 return true; 993 994 return false; 995 } 996 997 bool AArch64FastISel::isValueAvailable(const Value *V) const { 998 if (!isa<Instruction>(V)) 999 return true; 1000 1001 const auto *I = cast<Instruction>(V); 1002 return FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB; 1003 } 1004 1005 bool AArch64FastISel::simplifyAddress(Address &Addr, MVT VT) { 1006 if (Subtarget->isTargetILP32()) 1007 return false; 1008 1009 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1010 if (!ScaleFactor) 1011 return false; 1012 1013 bool ImmediateOffsetNeedsLowering = false; 1014 bool RegisterOffsetNeedsLowering = false; 1015 int64_t Offset = Addr.getOffset(); 1016 if (((Offset < 0) || (Offset & (ScaleFactor - 1))) && !isInt<9>(Offset)) 1017 ImmediateOffsetNeedsLowering = true; 1018 else if (Offset > 0 && !(Offset & (ScaleFactor - 1)) && 1019 !isUInt<12>(Offset / ScaleFactor)) 1020 ImmediateOffsetNeedsLowering = true; 1021 1022 // Cannot encode an offset register and an immediate offset in the same 1023 // instruction. Fold the immediate offset into the load/store instruction and 1024 // emit an additional add to take care of the offset register. 1025 if (!ImmediateOffsetNeedsLowering && Addr.getOffset() && Addr.getOffsetReg()) 1026 RegisterOffsetNeedsLowering = true; 1027 1028 // Cannot encode zero register as base. 1029 if (Addr.isRegBase() && Addr.getOffsetReg() && !Addr.getReg()) 1030 RegisterOffsetNeedsLowering = true; 1031 1032 // If this is a stack pointer and the offset needs to be simplified then put 1033 // the alloca address into a register, set the base type back to register and 1034 // continue. This should almost never happen. 1035 if ((ImmediateOffsetNeedsLowering || Addr.getOffsetReg()) && Addr.isFIBase()) 1036 { 1037 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 1038 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), 1039 ResultReg) 1040 .addFrameIndex(Addr.getFI()) 1041 .addImm(0) 1042 .addImm(0); 1043 Addr.setKind(Address::RegBase); 1044 Addr.setReg(ResultReg); 1045 } 1046 1047 if (RegisterOffsetNeedsLowering) { 1048 unsigned ResultReg = 0; 1049 if (Addr.getReg()) { 1050 if (Addr.getExtendType() == AArch64_AM::SXTW || 1051 Addr.getExtendType() == AArch64_AM::UXTW ) 1052 ResultReg = emitAddSub_rx(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1053 Addr.getOffsetReg(), Addr.getExtendType(), 1054 Addr.getShift()); 1055 else 1056 ResultReg = emitAddSub_rs(/*UseAdd=*/true, MVT::i64, Addr.getReg(), 1057 Addr.getOffsetReg(), AArch64_AM::LSL, 1058 Addr.getShift()); 1059 } else { 1060 if (Addr.getExtendType() == AArch64_AM::UXTW) 1061 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1062 Addr.getShift(), /*IsZExt=*/true); 1063 else if (Addr.getExtendType() == AArch64_AM::SXTW) 1064 ResultReg = emitLSL_ri(MVT::i64, MVT::i32, Addr.getOffsetReg(), 1065 Addr.getShift(), /*IsZExt=*/false); 1066 else 1067 ResultReg = emitLSL_ri(MVT::i64, MVT::i64, Addr.getOffsetReg(), 1068 Addr.getShift()); 1069 } 1070 if (!ResultReg) 1071 return false; 1072 1073 Addr.setReg(ResultReg); 1074 Addr.setOffsetReg(0); 1075 Addr.setShift(0); 1076 Addr.setExtendType(AArch64_AM::InvalidShiftExtend); 1077 } 1078 1079 // Since the offset is too large for the load/store instruction get the 1080 // reg+offset into a register. 1081 if (ImmediateOffsetNeedsLowering) { 1082 unsigned ResultReg; 1083 if (Addr.getReg()) 1084 // Try to fold the immediate into the add instruction. 1085 ResultReg = emitAdd_ri_(MVT::i64, Addr.getReg(), Offset); 1086 else 1087 ResultReg = fastEmit_i(MVT::i64, MVT::i64, ISD::Constant, Offset); 1088 1089 if (!ResultReg) 1090 return false; 1091 Addr.setReg(ResultReg); 1092 Addr.setOffset(0); 1093 } 1094 return true; 1095 } 1096 1097 void AArch64FastISel::addLoadStoreOperands(Address &Addr, 1098 const MachineInstrBuilder &MIB, 1099 MachineMemOperand::Flags Flags, 1100 unsigned ScaleFactor, 1101 MachineMemOperand *MMO) { 1102 int64_t Offset = Addr.getOffset() / ScaleFactor; 1103 // Frame base works a bit differently. Handle it separately. 1104 if (Addr.isFIBase()) { 1105 int FI = Addr.getFI(); 1106 // FIXME: We shouldn't be using getObjectSize/getObjectAlignment. The size 1107 // and alignment should be based on the VT. 1108 MMO = FuncInfo.MF->getMachineMemOperand( 1109 MachinePointerInfo::getFixedStack(*FuncInfo.MF, FI, Offset), Flags, 1110 MFI.getObjectSize(FI), MFI.getObjectAlign(FI)); 1111 // Now add the rest of the operands. 1112 MIB.addFrameIndex(FI).addImm(Offset); 1113 } else { 1114 assert(Addr.isRegBase() && "Unexpected address kind."); 1115 const MCInstrDesc &II = MIB->getDesc(); 1116 unsigned Idx = (Flags & MachineMemOperand::MOStore) ? 1 : 0; 1117 Addr.setReg( 1118 constrainOperandRegClass(II, Addr.getReg(), II.getNumDefs()+Idx)); 1119 Addr.setOffsetReg( 1120 constrainOperandRegClass(II, Addr.getOffsetReg(), II.getNumDefs()+Idx+1)); 1121 if (Addr.getOffsetReg()) { 1122 assert(Addr.getOffset() == 0 && "Unexpected offset"); 1123 bool IsSigned = Addr.getExtendType() == AArch64_AM::SXTW || 1124 Addr.getExtendType() == AArch64_AM::SXTX; 1125 MIB.addReg(Addr.getReg()); 1126 MIB.addReg(Addr.getOffsetReg()); 1127 MIB.addImm(IsSigned); 1128 MIB.addImm(Addr.getShift() != 0); 1129 } else 1130 MIB.addReg(Addr.getReg()).addImm(Offset); 1131 } 1132 1133 if (MMO) 1134 MIB.addMemOperand(MMO); 1135 } 1136 1137 unsigned AArch64FastISel::emitAddSub(bool UseAdd, MVT RetVT, const Value *LHS, 1138 const Value *RHS, bool SetFlags, 1139 bool WantResult, bool IsZExt) { 1140 AArch64_AM::ShiftExtendType ExtendType = AArch64_AM::InvalidShiftExtend; 1141 bool NeedExtend = false; 1142 switch (RetVT.SimpleTy) { 1143 default: 1144 return 0; 1145 case MVT::i1: 1146 NeedExtend = true; 1147 break; 1148 case MVT::i8: 1149 NeedExtend = true; 1150 ExtendType = IsZExt ? AArch64_AM::UXTB : AArch64_AM::SXTB; 1151 break; 1152 case MVT::i16: 1153 NeedExtend = true; 1154 ExtendType = IsZExt ? AArch64_AM::UXTH : AArch64_AM::SXTH; 1155 break; 1156 case MVT::i32: // fall-through 1157 case MVT::i64: 1158 break; 1159 } 1160 MVT SrcVT = RetVT; 1161 RetVT.SimpleTy = std::max(RetVT.SimpleTy, MVT::i32); 1162 1163 // Canonicalize immediates to the RHS first. 1164 if (UseAdd && isa<Constant>(LHS) && !isa<Constant>(RHS)) 1165 std::swap(LHS, RHS); 1166 1167 // Canonicalize mul by power of 2 to the RHS. 1168 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1169 if (isMulPowOf2(LHS)) 1170 std::swap(LHS, RHS); 1171 1172 // Canonicalize shift immediate to the RHS. 1173 if (UseAdd && LHS->hasOneUse() && isValueAvailable(LHS)) 1174 if (const auto *SI = dyn_cast<BinaryOperator>(LHS)) 1175 if (isa<ConstantInt>(SI->getOperand(1))) 1176 if (SI->getOpcode() == Instruction::Shl || 1177 SI->getOpcode() == Instruction::LShr || 1178 SI->getOpcode() == Instruction::AShr ) 1179 std::swap(LHS, RHS); 1180 1181 Register LHSReg = getRegForValue(LHS); 1182 if (!LHSReg) 1183 return 0; 1184 1185 if (NeedExtend) 1186 LHSReg = emitIntExt(SrcVT, LHSReg, RetVT, IsZExt); 1187 1188 unsigned ResultReg = 0; 1189 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1190 uint64_t Imm = IsZExt ? C->getZExtValue() : C->getSExtValue(); 1191 if (C->isNegative()) 1192 ResultReg = emitAddSub_ri(!UseAdd, RetVT, LHSReg, -Imm, SetFlags, 1193 WantResult); 1194 else 1195 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, Imm, SetFlags, 1196 WantResult); 1197 } else if (const auto *C = dyn_cast<Constant>(RHS)) 1198 if (C->isNullValue()) 1199 ResultReg = emitAddSub_ri(UseAdd, RetVT, LHSReg, 0, SetFlags, WantResult); 1200 1201 if (ResultReg) 1202 return ResultReg; 1203 1204 // Only extend the RHS within the instruction if there is a valid extend type. 1205 if (ExtendType != AArch64_AM::InvalidShiftExtend && RHS->hasOneUse() && 1206 isValueAvailable(RHS)) { 1207 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) 1208 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) 1209 if ((SI->getOpcode() == Instruction::Shl) && (C->getZExtValue() < 4)) { 1210 Register RHSReg = getRegForValue(SI->getOperand(0)); 1211 if (!RHSReg) 1212 return 0; 1213 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 1214 C->getZExtValue(), SetFlags, WantResult); 1215 } 1216 Register RHSReg = getRegForValue(RHS); 1217 if (!RHSReg) 1218 return 0; 1219 return emitAddSub_rx(UseAdd, RetVT, LHSReg, RHSReg, ExtendType, 0, 1220 SetFlags, WantResult); 1221 } 1222 1223 // Check if the mul can be folded into the instruction. 1224 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1225 if (isMulPowOf2(RHS)) { 1226 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1227 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1228 1229 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1230 if (C->getValue().isPowerOf2()) 1231 std::swap(MulLHS, MulRHS); 1232 1233 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1234 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1235 Register RHSReg = getRegForValue(MulLHS); 1236 if (!RHSReg) 1237 return 0; 1238 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, AArch64_AM::LSL, 1239 ShiftVal, SetFlags, WantResult); 1240 if (ResultReg) 1241 return ResultReg; 1242 } 1243 } 1244 1245 // Check if the shift can be folded into the instruction. 1246 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1247 if (const auto *SI = dyn_cast<BinaryOperator>(RHS)) { 1248 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1249 AArch64_AM::ShiftExtendType ShiftType = AArch64_AM::InvalidShiftExtend; 1250 switch (SI->getOpcode()) { 1251 default: break; 1252 case Instruction::Shl: ShiftType = AArch64_AM::LSL; break; 1253 case Instruction::LShr: ShiftType = AArch64_AM::LSR; break; 1254 case Instruction::AShr: ShiftType = AArch64_AM::ASR; break; 1255 } 1256 uint64_t ShiftVal = C->getZExtValue(); 1257 if (ShiftType != AArch64_AM::InvalidShiftExtend) { 1258 Register RHSReg = getRegForValue(SI->getOperand(0)); 1259 if (!RHSReg) 1260 return 0; 1261 ResultReg = emitAddSub_rs(UseAdd, RetVT, LHSReg, RHSReg, ShiftType, 1262 ShiftVal, SetFlags, WantResult); 1263 if (ResultReg) 1264 return ResultReg; 1265 } 1266 } 1267 } 1268 } 1269 1270 Register RHSReg = getRegForValue(RHS); 1271 if (!RHSReg) 1272 return 0; 1273 1274 if (NeedExtend) 1275 RHSReg = emitIntExt(SrcVT, RHSReg, RetVT, IsZExt); 1276 1277 return emitAddSub_rr(UseAdd, RetVT, LHSReg, RHSReg, SetFlags, WantResult); 1278 } 1279 1280 unsigned AArch64FastISel::emitAddSub_rr(bool UseAdd, MVT RetVT, unsigned LHSReg, 1281 unsigned RHSReg, bool SetFlags, 1282 bool WantResult) { 1283 assert(LHSReg && RHSReg && "Invalid register number."); 1284 1285 if (LHSReg == AArch64::SP || LHSReg == AArch64::WSP || 1286 RHSReg == AArch64::SP || RHSReg == AArch64::WSP) 1287 return 0; 1288 1289 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1290 return 0; 1291 1292 static const unsigned OpcTable[2][2][2] = { 1293 { { AArch64::SUBWrr, AArch64::SUBXrr }, 1294 { AArch64::ADDWrr, AArch64::ADDXrr } }, 1295 { { AArch64::SUBSWrr, AArch64::SUBSXrr }, 1296 { AArch64::ADDSWrr, AArch64::ADDSXrr } } 1297 }; 1298 bool Is64Bit = RetVT == MVT::i64; 1299 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1300 const TargetRegisterClass *RC = 1301 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1302 unsigned ResultReg; 1303 if (WantResult) 1304 ResultReg = createResultReg(RC); 1305 else 1306 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1307 1308 const MCInstrDesc &II = TII.get(Opc); 1309 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1310 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1311 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1312 .addReg(LHSReg) 1313 .addReg(RHSReg); 1314 return ResultReg; 1315 } 1316 1317 unsigned AArch64FastISel::emitAddSub_ri(bool UseAdd, MVT RetVT, unsigned LHSReg, 1318 uint64_t Imm, bool SetFlags, 1319 bool WantResult) { 1320 assert(LHSReg && "Invalid register number."); 1321 1322 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1323 return 0; 1324 1325 unsigned ShiftImm; 1326 if (isUInt<12>(Imm)) 1327 ShiftImm = 0; 1328 else if ((Imm & 0xfff000) == Imm) { 1329 ShiftImm = 12; 1330 Imm >>= 12; 1331 } else 1332 return 0; 1333 1334 static const unsigned OpcTable[2][2][2] = { 1335 { { AArch64::SUBWri, AArch64::SUBXri }, 1336 { AArch64::ADDWri, AArch64::ADDXri } }, 1337 { { AArch64::SUBSWri, AArch64::SUBSXri }, 1338 { AArch64::ADDSWri, AArch64::ADDSXri } } 1339 }; 1340 bool Is64Bit = RetVT == MVT::i64; 1341 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1342 const TargetRegisterClass *RC; 1343 if (SetFlags) 1344 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1345 else 1346 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1347 unsigned ResultReg; 1348 if (WantResult) 1349 ResultReg = createResultReg(RC); 1350 else 1351 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1352 1353 const MCInstrDesc &II = TII.get(Opc); 1354 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1355 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1356 .addReg(LHSReg) 1357 .addImm(Imm) 1358 .addImm(getShifterImm(AArch64_AM::LSL, ShiftImm)); 1359 return ResultReg; 1360 } 1361 1362 unsigned AArch64FastISel::emitAddSub_rs(bool UseAdd, MVT RetVT, unsigned LHSReg, 1363 unsigned RHSReg, 1364 AArch64_AM::ShiftExtendType ShiftType, 1365 uint64_t ShiftImm, bool SetFlags, 1366 bool WantResult) { 1367 assert(LHSReg && RHSReg && "Invalid register number."); 1368 assert(LHSReg != AArch64::SP && LHSReg != AArch64::WSP && 1369 RHSReg != AArch64::SP && RHSReg != AArch64::WSP); 1370 1371 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1372 return 0; 1373 1374 // Don't deal with undefined shifts. 1375 if (ShiftImm >= RetVT.getSizeInBits()) 1376 return 0; 1377 1378 static const unsigned OpcTable[2][2][2] = { 1379 { { AArch64::SUBWrs, AArch64::SUBXrs }, 1380 { AArch64::ADDWrs, AArch64::ADDXrs } }, 1381 { { AArch64::SUBSWrs, AArch64::SUBSXrs }, 1382 { AArch64::ADDSWrs, AArch64::ADDSXrs } } 1383 }; 1384 bool Is64Bit = RetVT == MVT::i64; 1385 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1386 const TargetRegisterClass *RC = 1387 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1388 unsigned ResultReg; 1389 if (WantResult) 1390 ResultReg = createResultReg(RC); 1391 else 1392 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1393 1394 const MCInstrDesc &II = TII.get(Opc); 1395 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1396 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1397 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1398 .addReg(LHSReg) 1399 .addReg(RHSReg) 1400 .addImm(getShifterImm(ShiftType, ShiftImm)); 1401 return ResultReg; 1402 } 1403 1404 unsigned AArch64FastISel::emitAddSub_rx(bool UseAdd, MVT RetVT, unsigned LHSReg, 1405 unsigned RHSReg, 1406 AArch64_AM::ShiftExtendType ExtType, 1407 uint64_t ShiftImm, bool SetFlags, 1408 bool WantResult) { 1409 assert(LHSReg && RHSReg && "Invalid register number."); 1410 assert(LHSReg != AArch64::XZR && LHSReg != AArch64::WZR && 1411 RHSReg != AArch64::XZR && RHSReg != AArch64::WZR); 1412 1413 if (RetVT != MVT::i32 && RetVT != MVT::i64) 1414 return 0; 1415 1416 if (ShiftImm >= 4) 1417 return 0; 1418 1419 static const unsigned OpcTable[2][2][2] = { 1420 { { AArch64::SUBWrx, AArch64::SUBXrx }, 1421 { AArch64::ADDWrx, AArch64::ADDXrx } }, 1422 { { AArch64::SUBSWrx, AArch64::SUBSXrx }, 1423 { AArch64::ADDSWrx, AArch64::ADDSXrx } } 1424 }; 1425 bool Is64Bit = RetVT == MVT::i64; 1426 unsigned Opc = OpcTable[SetFlags][UseAdd][Is64Bit]; 1427 const TargetRegisterClass *RC = nullptr; 1428 if (SetFlags) 1429 RC = Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 1430 else 1431 RC = Is64Bit ? &AArch64::GPR64spRegClass : &AArch64::GPR32spRegClass; 1432 unsigned ResultReg; 1433 if (WantResult) 1434 ResultReg = createResultReg(RC); 1435 else 1436 ResultReg = Is64Bit ? AArch64::XZR : AArch64::WZR; 1437 1438 const MCInstrDesc &II = TII.get(Opc); 1439 LHSReg = constrainOperandRegClass(II, LHSReg, II.getNumDefs()); 1440 RHSReg = constrainOperandRegClass(II, RHSReg, II.getNumDefs() + 1); 1441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg) 1442 .addReg(LHSReg) 1443 .addReg(RHSReg) 1444 .addImm(getArithExtendImm(ExtType, ShiftImm)); 1445 return ResultReg; 1446 } 1447 1448 bool AArch64FastISel::emitCmp(const Value *LHS, const Value *RHS, bool IsZExt) { 1449 Type *Ty = LHS->getType(); 1450 EVT EVT = TLI.getValueType(DL, Ty, true); 1451 if (!EVT.isSimple()) 1452 return false; 1453 MVT VT = EVT.getSimpleVT(); 1454 1455 switch (VT.SimpleTy) { 1456 default: 1457 return false; 1458 case MVT::i1: 1459 case MVT::i8: 1460 case MVT::i16: 1461 case MVT::i32: 1462 case MVT::i64: 1463 return emitICmp(VT, LHS, RHS, IsZExt); 1464 case MVT::f32: 1465 case MVT::f64: 1466 return emitFCmp(VT, LHS, RHS); 1467 } 1468 } 1469 1470 bool AArch64FastISel::emitICmp(MVT RetVT, const Value *LHS, const Value *RHS, 1471 bool IsZExt) { 1472 return emitSub(RetVT, LHS, RHS, /*SetFlags=*/true, /*WantResult=*/false, 1473 IsZExt) != 0; 1474 } 1475 1476 bool AArch64FastISel::emitICmp_ri(MVT RetVT, unsigned LHSReg, uint64_t Imm) { 1477 return emitAddSub_ri(/*UseAdd=*/false, RetVT, LHSReg, Imm, 1478 /*SetFlags=*/true, /*WantResult=*/false) != 0; 1479 } 1480 1481 bool AArch64FastISel::emitFCmp(MVT RetVT, const Value *LHS, const Value *RHS) { 1482 if (RetVT != MVT::f32 && RetVT != MVT::f64) 1483 return false; 1484 1485 // Check to see if the 2nd operand is a constant that we can encode directly 1486 // in the compare. 1487 bool UseImm = false; 1488 if (const auto *CFP = dyn_cast<ConstantFP>(RHS)) 1489 if (CFP->isZero() && !CFP->isNegative()) 1490 UseImm = true; 1491 1492 Register LHSReg = getRegForValue(LHS); 1493 if (!LHSReg) 1494 return false; 1495 1496 if (UseImm) { 1497 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDri : AArch64::FCMPSri; 1498 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1499 .addReg(LHSReg); 1500 return true; 1501 } 1502 1503 Register RHSReg = getRegForValue(RHS); 1504 if (!RHSReg) 1505 return false; 1506 1507 unsigned Opc = (RetVT == MVT::f64) ? AArch64::FCMPDrr : AArch64::FCMPSrr; 1508 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 1509 .addReg(LHSReg) 1510 .addReg(RHSReg); 1511 return true; 1512 } 1513 1514 unsigned AArch64FastISel::emitAdd(MVT RetVT, const Value *LHS, const Value *RHS, 1515 bool SetFlags, bool WantResult, bool IsZExt) { 1516 return emitAddSub(/*UseAdd=*/true, RetVT, LHS, RHS, SetFlags, WantResult, 1517 IsZExt); 1518 } 1519 1520 /// This method is a wrapper to simplify add emission. 1521 /// 1522 /// First try to emit an add with an immediate operand using emitAddSub_ri. If 1523 /// that fails, then try to materialize the immediate into a register and use 1524 /// emitAddSub_rr instead. 1525 unsigned AArch64FastISel::emitAdd_ri_(MVT VT, unsigned Op0, int64_t Imm) { 1526 unsigned ResultReg; 1527 if (Imm < 0) 1528 ResultReg = emitAddSub_ri(false, VT, Op0, -Imm); 1529 else 1530 ResultReg = emitAddSub_ri(true, VT, Op0, Imm); 1531 1532 if (ResultReg) 1533 return ResultReg; 1534 1535 unsigned CReg = fastEmit_i(VT, VT, ISD::Constant, Imm); 1536 if (!CReg) 1537 return 0; 1538 1539 ResultReg = emitAddSub_rr(true, VT, Op0, CReg); 1540 return ResultReg; 1541 } 1542 1543 unsigned AArch64FastISel::emitSub(MVT RetVT, const Value *LHS, const Value *RHS, 1544 bool SetFlags, bool WantResult, bool IsZExt) { 1545 return emitAddSub(/*UseAdd=*/false, RetVT, LHS, RHS, SetFlags, WantResult, 1546 IsZExt); 1547 } 1548 1549 unsigned AArch64FastISel::emitSubs_rr(MVT RetVT, unsigned LHSReg, 1550 unsigned RHSReg, bool WantResult) { 1551 return emitAddSub_rr(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, 1552 /*SetFlags=*/true, WantResult); 1553 } 1554 1555 unsigned AArch64FastISel::emitSubs_rs(MVT RetVT, unsigned LHSReg, 1556 unsigned RHSReg, 1557 AArch64_AM::ShiftExtendType ShiftType, 1558 uint64_t ShiftImm, bool WantResult) { 1559 return emitAddSub_rs(/*UseAdd=*/false, RetVT, LHSReg, RHSReg, ShiftType, 1560 ShiftImm, /*SetFlags=*/true, WantResult); 1561 } 1562 1563 unsigned AArch64FastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT, 1564 const Value *LHS, const Value *RHS) { 1565 // Canonicalize immediates to the RHS first. 1566 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS)) 1567 std::swap(LHS, RHS); 1568 1569 // Canonicalize mul by power-of-2 to the RHS. 1570 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1571 if (isMulPowOf2(LHS)) 1572 std::swap(LHS, RHS); 1573 1574 // Canonicalize shift immediate to the RHS. 1575 if (LHS->hasOneUse() && isValueAvailable(LHS)) 1576 if (const auto *SI = dyn_cast<ShlOperator>(LHS)) 1577 if (isa<ConstantInt>(SI->getOperand(1))) 1578 std::swap(LHS, RHS); 1579 1580 Register LHSReg = getRegForValue(LHS); 1581 if (!LHSReg) 1582 return 0; 1583 1584 unsigned ResultReg = 0; 1585 if (const auto *C = dyn_cast<ConstantInt>(RHS)) { 1586 uint64_t Imm = C->getZExtValue(); 1587 ResultReg = emitLogicalOp_ri(ISDOpc, RetVT, LHSReg, Imm); 1588 } 1589 if (ResultReg) 1590 return ResultReg; 1591 1592 // Check if the mul can be folded into the instruction. 1593 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1594 if (isMulPowOf2(RHS)) { 1595 const Value *MulLHS = cast<MulOperator>(RHS)->getOperand(0); 1596 const Value *MulRHS = cast<MulOperator>(RHS)->getOperand(1); 1597 1598 if (const auto *C = dyn_cast<ConstantInt>(MulLHS)) 1599 if (C->getValue().isPowerOf2()) 1600 std::swap(MulLHS, MulRHS); 1601 1602 assert(isa<ConstantInt>(MulRHS) && "Expected a ConstantInt."); 1603 uint64_t ShiftVal = cast<ConstantInt>(MulRHS)->getValue().logBase2(); 1604 1605 Register RHSReg = getRegForValue(MulLHS); 1606 if (!RHSReg) 1607 return 0; 1608 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1609 if (ResultReg) 1610 return ResultReg; 1611 } 1612 } 1613 1614 // Check if the shift can be folded into the instruction. 1615 if (RHS->hasOneUse() && isValueAvailable(RHS)) { 1616 if (const auto *SI = dyn_cast<ShlOperator>(RHS)) 1617 if (const auto *C = dyn_cast<ConstantInt>(SI->getOperand(1))) { 1618 uint64_t ShiftVal = C->getZExtValue(); 1619 Register RHSReg = getRegForValue(SI->getOperand(0)); 1620 if (!RHSReg) 1621 return 0; 1622 ResultReg = emitLogicalOp_rs(ISDOpc, RetVT, LHSReg, RHSReg, ShiftVal); 1623 if (ResultReg) 1624 return ResultReg; 1625 } 1626 } 1627 1628 Register RHSReg = getRegForValue(RHS); 1629 if (!RHSReg) 1630 return 0; 1631 1632 MVT VT = std::max(MVT::i32, RetVT.SimpleTy); 1633 ResultReg = fastEmit_rr(VT, VT, ISDOpc, LHSReg, RHSReg); 1634 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1635 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1636 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1637 } 1638 return ResultReg; 1639 } 1640 1641 unsigned AArch64FastISel::emitLogicalOp_ri(unsigned ISDOpc, MVT RetVT, 1642 unsigned LHSReg, uint64_t Imm) { 1643 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1644 "ISD nodes are not consecutive!"); 1645 static const unsigned OpcTable[3][2] = { 1646 { AArch64::ANDWri, AArch64::ANDXri }, 1647 { AArch64::ORRWri, AArch64::ORRXri }, 1648 { AArch64::EORWri, AArch64::EORXri } 1649 }; 1650 const TargetRegisterClass *RC; 1651 unsigned Opc; 1652 unsigned RegSize; 1653 switch (RetVT.SimpleTy) { 1654 default: 1655 return 0; 1656 case MVT::i1: 1657 case MVT::i8: 1658 case MVT::i16: 1659 case MVT::i32: { 1660 unsigned Idx = ISDOpc - ISD::AND; 1661 Opc = OpcTable[Idx][0]; 1662 RC = &AArch64::GPR32spRegClass; 1663 RegSize = 32; 1664 break; 1665 } 1666 case MVT::i64: 1667 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1668 RC = &AArch64::GPR64spRegClass; 1669 RegSize = 64; 1670 break; 1671 } 1672 1673 if (!AArch64_AM::isLogicalImmediate(Imm, RegSize)) 1674 return 0; 1675 1676 Register ResultReg = 1677 fastEmitInst_ri(Opc, RC, LHSReg, 1678 AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 1679 if (RetVT >= MVT::i8 && RetVT <= MVT::i16 && ISDOpc != ISD::AND) { 1680 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1681 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1682 } 1683 return ResultReg; 1684 } 1685 1686 unsigned AArch64FastISel::emitLogicalOp_rs(unsigned ISDOpc, MVT RetVT, 1687 unsigned LHSReg, unsigned RHSReg, 1688 uint64_t ShiftImm) { 1689 static_assert((ISD::AND + 1 == ISD::OR) && (ISD::AND + 2 == ISD::XOR), 1690 "ISD nodes are not consecutive!"); 1691 static const unsigned OpcTable[3][2] = { 1692 { AArch64::ANDWrs, AArch64::ANDXrs }, 1693 { AArch64::ORRWrs, AArch64::ORRXrs }, 1694 { AArch64::EORWrs, AArch64::EORXrs } 1695 }; 1696 1697 // Don't deal with undefined shifts. 1698 if (ShiftImm >= RetVT.getSizeInBits()) 1699 return 0; 1700 1701 const TargetRegisterClass *RC; 1702 unsigned Opc; 1703 switch (RetVT.SimpleTy) { 1704 default: 1705 return 0; 1706 case MVT::i1: 1707 case MVT::i8: 1708 case MVT::i16: 1709 case MVT::i32: 1710 Opc = OpcTable[ISDOpc - ISD::AND][0]; 1711 RC = &AArch64::GPR32RegClass; 1712 break; 1713 case MVT::i64: 1714 Opc = OpcTable[ISDOpc - ISD::AND][1]; 1715 RC = &AArch64::GPR64RegClass; 1716 break; 1717 } 1718 Register ResultReg = 1719 fastEmitInst_rri(Opc, RC, LHSReg, RHSReg, 1720 AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftImm)); 1721 if (RetVT >= MVT::i8 && RetVT <= MVT::i16) { 1722 uint64_t Mask = (RetVT == MVT::i8) ? 0xff : 0xffff; 1723 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 1724 } 1725 return ResultReg; 1726 } 1727 1728 unsigned AArch64FastISel::emitAnd_ri(MVT RetVT, unsigned LHSReg, 1729 uint64_t Imm) { 1730 return emitLogicalOp_ri(ISD::AND, RetVT, LHSReg, Imm); 1731 } 1732 1733 unsigned AArch64FastISel::emitLoad(MVT VT, MVT RetVT, Address Addr, 1734 bool WantZExt, MachineMemOperand *MMO) { 1735 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 1736 return 0; 1737 1738 // Simplify this down to something we can handle. 1739 if (!simplifyAddress(Addr, VT)) 1740 return 0; 1741 1742 unsigned ScaleFactor = getImplicitScaleFactor(VT); 1743 if (!ScaleFactor) 1744 llvm_unreachable("Unexpected value type."); 1745 1746 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 1747 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 1748 bool UseScaled = true; 1749 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 1750 UseScaled = false; 1751 ScaleFactor = 1; 1752 } 1753 1754 static const unsigned GPOpcTable[2][8][4] = { 1755 // Sign-extend. 1756 { { AArch64::LDURSBWi, AArch64::LDURSHWi, AArch64::LDURWi, 1757 AArch64::LDURXi }, 1758 { AArch64::LDURSBXi, AArch64::LDURSHXi, AArch64::LDURSWi, 1759 AArch64::LDURXi }, 1760 { AArch64::LDRSBWui, AArch64::LDRSHWui, AArch64::LDRWui, 1761 AArch64::LDRXui }, 1762 { AArch64::LDRSBXui, AArch64::LDRSHXui, AArch64::LDRSWui, 1763 AArch64::LDRXui }, 1764 { AArch64::LDRSBWroX, AArch64::LDRSHWroX, AArch64::LDRWroX, 1765 AArch64::LDRXroX }, 1766 { AArch64::LDRSBXroX, AArch64::LDRSHXroX, AArch64::LDRSWroX, 1767 AArch64::LDRXroX }, 1768 { AArch64::LDRSBWroW, AArch64::LDRSHWroW, AArch64::LDRWroW, 1769 AArch64::LDRXroW }, 1770 { AArch64::LDRSBXroW, AArch64::LDRSHXroW, AArch64::LDRSWroW, 1771 AArch64::LDRXroW } 1772 }, 1773 // Zero-extend. 1774 { { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1775 AArch64::LDURXi }, 1776 { AArch64::LDURBBi, AArch64::LDURHHi, AArch64::LDURWi, 1777 AArch64::LDURXi }, 1778 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1779 AArch64::LDRXui }, 1780 { AArch64::LDRBBui, AArch64::LDRHHui, AArch64::LDRWui, 1781 AArch64::LDRXui }, 1782 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1783 AArch64::LDRXroX }, 1784 { AArch64::LDRBBroX, AArch64::LDRHHroX, AArch64::LDRWroX, 1785 AArch64::LDRXroX }, 1786 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1787 AArch64::LDRXroW }, 1788 { AArch64::LDRBBroW, AArch64::LDRHHroW, AArch64::LDRWroW, 1789 AArch64::LDRXroW } 1790 } 1791 }; 1792 1793 static const unsigned FPOpcTable[4][2] = { 1794 { AArch64::LDURSi, AArch64::LDURDi }, 1795 { AArch64::LDRSui, AArch64::LDRDui }, 1796 { AArch64::LDRSroX, AArch64::LDRDroX }, 1797 { AArch64::LDRSroW, AArch64::LDRDroW } 1798 }; 1799 1800 unsigned Opc; 1801 const TargetRegisterClass *RC; 1802 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 1803 Addr.getOffsetReg(); 1804 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 1805 if (Addr.getExtendType() == AArch64_AM::UXTW || 1806 Addr.getExtendType() == AArch64_AM::SXTW) 1807 Idx++; 1808 1809 bool IsRet64Bit = RetVT == MVT::i64; 1810 switch (VT.SimpleTy) { 1811 default: 1812 llvm_unreachable("Unexpected value type."); 1813 case MVT::i1: // Intentional fall-through. 1814 case MVT::i8: 1815 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][0]; 1816 RC = (IsRet64Bit && !WantZExt) ? 1817 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1818 break; 1819 case MVT::i16: 1820 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][1]; 1821 RC = (IsRet64Bit && !WantZExt) ? 1822 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1823 break; 1824 case MVT::i32: 1825 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][2]; 1826 RC = (IsRet64Bit && !WantZExt) ? 1827 &AArch64::GPR64RegClass: &AArch64::GPR32RegClass; 1828 break; 1829 case MVT::i64: 1830 Opc = GPOpcTable[WantZExt][2 * Idx + IsRet64Bit][3]; 1831 RC = &AArch64::GPR64RegClass; 1832 break; 1833 case MVT::f32: 1834 Opc = FPOpcTable[Idx][0]; 1835 RC = &AArch64::FPR32RegClass; 1836 break; 1837 case MVT::f64: 1838 Opc = FPOpcTable[Idx][1]; 1839 RC = &AArch64::FPR64RegClass; 1840 break; 1841 } 1842 1843 // Create the base instruction, then add the operands. 1844 Register ResultReg = createResultReg(RC); 1845 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1846 TII.get(Opc), ResultReg); 1847 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOLoad, ScaleFactor, MMO); 1848 1849 // Loading an i1 requires special handling. 1850 if (VT == MVT::i1) { 1851 unsigned ANDReg = emitAnd_ri(MVT::i32, ResultReg, 1); 1852 assert(ANDReg && "Unexpected AND instruction emission failure."); 1853 ResultReg = ANDReg; 1854 } 1855 1856 // For zero-extending loads to 64bit we emit a 32bit load and then convert 1857 // the 32bit reg to a 64bit reg. 1858 if (WantZExt && RetVT == MVT::i64 && VT <= MVT::i32) { 1859 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 1860 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 1861 TII.get(AArch64::SUBREG_TO_REG), Reg64) 1862 .addImm(0) 1863 .addReg(ResultReg, getKillRegState(true)) 1864 .addImm(AArch64::sub_32); 1865 ResultReg = Reg64; 1866 } 1867 return ResultReg; 1868 } 1869 1870 bool AArch64FastISel::selectAddSub(const Instruction *I) { 1871 MVT VT; 1872 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1873 return false; 1874 1875 if (VT.isVector()) 1876 return selectOperator(I, I->getOpcode()); 1877 1878 unsigned ResultReg; 1879 switch (I->getOpcode()) { 1880 default: 1881 llvm_unreachable("Unexpected instruction."); 1882 case Instruction::Add: 1883 ResultReg = emitAdd(VT, I->getOperand(0), I->getOperand(1)); 1884 break; 1885 case Instruction::Sub: 1886 ResultReg = emitSub(VT, I->getOperand(0), I->getOperand(1)); 1887 break; 1888 } 1889 if (!ResultReg) 1890 return false; 1891 1892 updateValueMap(I, ResultReg); 1893 return true; 1894 } 1895 1896 bool AArch64FastISel::selectLogicalOp(const Instruction *I) { 1897 MVT VT; 1898 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 1899 return false; 1900 1901 if (VT.isVector()) 1902 return selectOperator(I, I->getOpcode()); 1903 1904 unsigned ResultReg; 1905 switch (I->getOpcode()) { 1906 default: 1907 llvm_unreachable("Unexpected instruction."); 1908 case Instruction::And: 1909 ResultReg = emitLogicalOp(ISD::AND, VT, I->getOperand(0), I->getOperand(1)); 1910 break; 1911 case Instruction::Or: 1912 ResultReg = emitLogicalOp(ISD::OR, VT, I->getOperand(0), I->getOperand(1)); 1913 break; 1914 case Instruction::Xor: 1915 ResultReg = emitLogicalOp(ISD::XOR, VT, I->getOperand(0), I->getOperand(1)); 1916 break; 1917 } 1918 if (!ResultReg) 1919 return false; 1920 1921 updateValueMap(I, ResultReg); 1922 return true; 1923 } 1924 1925 bool AArch64FastISel::selectLoad(const Instruction *I) { 1926 MVT VT; 1927 // Verify we have a legal type before going any further. Currently, we handle 1928 // simple types that will directly fit in a register (i32/f32/i64/f64) or 1929 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 1930 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true) || 1931 cast<LoadInst>(I)->isAtomic()) 1932 return false; 1933 1934 const Value *SV = I->getOperand(0); 1935 if (TLI.supportSwiftError()) { 1936 // Swifterror values can come from either a function parameter with 1937 // swifterror attribute or an alloca with swifterror attribute. 1938 if (const Argument *Arg = dyn_cast<Argument>(SV)) { 1939 if (Arg->hasSwiftErrorAttr()) 1940 return false; 1941 } 1942 1943 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) { 1944 if (Alloca->isSwiftError()) 1945 return false; 1946 } 1947 } 1948 1949 // See if we can handle this address. 1950 Address Addr; 1951 if (!computeAddress(I->getOperand(0), Addr, I->getType())) 1952 return false; 1953 1954 // Fold the following sign-/zero-extend into the load instruction. 1955 bool WantZExt = true; 1956 MVT RetVT = VT; 1957 const Value *IntExtVal = nullptr; 1958 if (I->hasOneUse()) { 1959 if (const auto *ZE = dyn_cast<ZExtInst>(I->use_begin()->getUser())) { 1960 if (isTypeSupported(ZE->getType(), RetVT)) 1961 IntExtVal = ZE; 1962 else 1963 RetVT = VT; 1964 } else if (const auto *SE = dyn_cast<SExtInst>(I->use_begin()->getUser())) { 1965 if (isTypeSupported(SE->getType(), RetVT)) 1966 IntExtVal = SE; 1967 else 1968 RetVT = VT; 1969 WantZExt = false; 1970 } 1971 } 1972 1973 unsigned ResultReg = 1974 emitLoad(VT, RetVT, Addr, WantZExt, createMachineMemOperandFor(I)); 1975 if (!ResultReg) 1976 return false; 1977 1978 // There are a few different cases we have to handle, because the load or the 1979 // sign-/zero-extend might not be selected by FastISel if we fall-back to 1980 // SelectionDAG. There is also an ordering issue when both instructions are in 1981 // different basic blocks. 1982 // 1.) The load instruction is selected by FastISel, but the integer extend 1983 // not. This usually happens when the integer extend is in a different 1984 // basic block and SelectionDAG took over for that basic block. 1985 // 2.) The load instruction is selected before the integer extend. This only 1986 // happens when the integer extend is in a different basic block. 1987 // 3.) The load instruction is selected by SelectionDAG and the integer extend 1988 // by FastISel. This happens if there are instructions between the load 1989 // and the integer extend that couldn't be selected by FastISel. 1990 if (IntExtVal) { 1991 // The integer extend hasn't been emitted yet. FastISel or SelectionDAG 1992 // could select it. Emit a copy to subreg if necessary. FastISel will remove 1993 // it when it selects the integer extend. 1994 Register Reg = lookUpRegForValue(IntExtVal); 1995 auto *MI = MRI.getUniqueVRegDef(Reg); 1996 if (!MI) { 1997 if (RetVT == MVT::i64 && VT <= MVT::i32) { 1998 if (WantZExt) { 1999 // Delete the last emitted instruction from emitLoad (SUBREG_TO_REG). 2000 MachineBasicBlock::iterator I(std::prev(FuncInfo.InsertPt)); 2001 ResultReg = std::prev(I)->getOperand(0).getReg(); 2002 removeDeadCode(I, std::next(I)); 2003 } else 2004 ResultReg = fastEmitInst_extractsubreg(MVT::i32, ResultReg, 2005 AArch64::sub_32); 2006 } 2007 updateValueMap(I, ResultReg); 2008 return true; 2009 } 2010 2011 // The integer extend has already been emitted - delete all the instructions 2012 // that have been emitted by the integer extend lowering code and use the 2013 // result from the load instruction directly. 2014 while (MI) { 2015 Reg = 0; 2016 for (auto &Opnd : MI->uses()) { 2017 if (Opnd.isReg()) { 2018 Reg = Opnd.getReg(); 2019 break; 2020 } 2021 } 2022 MachineBasicBlock::iterator I(MI); 2023 removeDeadCode(I, std::next(I)); 2024 MI = nullptr; 2025 if (Reg) 2026 MI = MRI.getUniqueVRegDef(Reg); 2027 } 2028 updateValueMap(IntExtVal, ResultReg); 2029 return true; 2030 } 2031 2032 updateValueMap(I, ResultReg); 2033 return true; 2034 } 2035 2036 bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg, 2037 unsigned AddrReg, 2038 MachineMemOperand *MMO) { 2039 unsigned Opc; 2040 switch (VT.SimpleTy) { 2041 default: return false; 2042 case MVT::i8: Opc = AArch64::STLRB; break; 2043 case MVT::i16: Opc = AArch64::STLRH; break; 2044 case MVT::i32: Opc = AArch64::STLRW; break; 2045 case MVT::i64: Opc = AArch64::STLRX; break; 2046 } 2047 2048 const MCInstrDesc &II = TII.get(Opc); 2049 SrcReg = constrainOperandRegClass(II, SrcReg, 0); 2050 AddrReg = constrainOperandRegClass(II, AddrReg, 1); 2051 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2052 .addReg(SrcReg) 2053 .addReg(AddrReg) 2054 .addMemOperand(MMO); 2055 return true; 2056 } 2057 2058 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr, 2059 MachineMemOperand *MMO) { 2060 if (!TLI.allowsMisalignedMemoryAccesses(VT)) 2061 return false; 2062 2063 // Simplify this down to something we can handle. 2064 if (!simplifyAddress(Addr, VT)) 2065 return false; 2066 2067 unsigned ScaleFactor = getImplicitScaleFactor(VT); 2068 if (!ScaleFactor) 2069 llvm_unreachable("Unexpected value type."); 2070 2071 // Negative offsets require unscaled, 9-bit, signed immediate offsets. 2072 // Otherwise, we try using scaled, 12-bit, unsigned immediate offsets. 2073 bool UseScaled = true; 2074 if ((Addr.getOffset() < 0) || (Addr.getOffset() & (ScaleFactor - 1))) { 2075 UseScaled = false; 2076 ScaleFactor = 1; 2077 } 2078 2079 static const unsigned OpcTable[4][6] = { 2080 { AArch64::STURBBi, AArch64::STURHHi, AArch64::STURWi, AArch64::STURXi, 2081 AArch64::STURSi, AArch64::STURDi }, 2082 { AArch64::STRBBui, AArch64::STRHHui, AArch64::STRWui, AArch64::STRXui, 2083 AArch64::STRSui, AArch64::STRDui }, 2084 { AArch64::STRBBroX, AArch64::STRHHroX, AArch64::STRWroX, AArch64::STRXroX, 2085 AArch64::STRSroX, AArch64::STRDroX }, 2086 { AArch64::STRBBroW, AArch64::STRHHroW, AArch64::STRWroW, AArch64::STRXroW, 2087 AArch64::STRSroW, AArch64::STRDroW } 2088 }; 2089 2090 unsigned Opc; 2091 bool VTIsi1 = false; 2092 bool UseRegOffset = Addr.isRegBase() && !Addr.getOffset() && Addr.getReg() && 2093 Addr.getOffsetReg(); 2094 unsigned Idx = UseRegOffset ? 2 : UseScaled ? 1 : 0; 2095 if (Addr.getExtendType() == AArch64_AM::UXTW || 2096 Addr.getExtendType() == AArch64_AM::SXTW) 2097 Idx++; 2098 2099 switch (VT.SimpleTy) { 2100 default: llvm_unreachable("Unexpected value type."); 2101 case MVT::i1: VTIsi1 = true; LLVM_FALLTHROUGH; 2102 case MVT::i8: Opc = OpcTable[Idx][0]; break; 2103 case MVT::i16: Opc = OpcTable[Idx][1]; break; 2104 case MVT::i32: Opc = OpcTable[Idx][2]; break; 2105 case MVT::i64: Opc = OpcTable[Idx][3]; break; 2106 case MVT::f32: Opc = OpcTable[Idx][4]; break; 2107 case MVT::f64: Opc = OpcTable[Idx][5]; break; 2108 } 2109 2110 // Storing an i1 requires special handling. 2111 if (VTIsi1 && SrcReg != AArch64::WZR) { 2112 unsigned ANDReg = emitAnd_ri(MVT::i32, SrcReg, 1); 2113 assert(ANDReg && "Unexpected AND instruction emission failure."); 2114 SrcReg = ANDReg; 2115 } 2116 // Create the base instruction, then add the operands. 2117 const MCInstrDesc &II = TII.get(Opc); 2118 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2119 MachineInstrBuilder MIB = 2120 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(SrcReg); 2121 addLoadStoreOperands(Addr, MIB, MachineMemOperand::MOStore, ScaleFactor, MMO); 2122 2123 return true; 2124 } 2125 2126 bool AArch64FastISel::selectStore(const Instruction *I) { 2127 MVT VT; 2128 const Value *Op0 = I->getOperand(0); 2129 // Verify we have a legal type before going any further. Currently, we handle 2130 // simple types that will directly fit in a register (i32/f32/i64/f64) or 2131 // those that can be sign or zero-extended to a basic operation (i1/i8/i16). 2132 if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true)) 2133 return false; 2134 2135 const Value *PtrV = I->getOperand(1); 2136 if (TLI.supportSwiftError()) { 2137 // Swifterror values can come from either a function parameter with 2138 // swifterror attribute or an alloca with swifterror attribute. 2139 if (const Argument *Arg = dyn_cast<Argument>(PtrV)) { 2140 if (Arg->hasSwiftErrorAttr()) 2141 return false; 2142 } 2143 2144 if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) { 2145 if (Alloca->isSwiftError()) 2146 return false; 2147 } 2148 } 2149 2150 // Get the value to be stored into a register. Use the zero register directly 2151 // when possible to avoid an unnecessary copy and a wasted register. 2152 unsigned SrcReg = 0; 2153 if (const auto *CI = dyn_cast<ConstantInt>(Op0)) { 2154 if (CI->isZero()) 2155 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2156 } else if (const auto *CF = dyn_cast<ConstantFP>(Op0)) { 2157 if (CF->isZero() && !CF->isNegative()) { 2158 VT = MVT::getIntegerVT(VT.getSizeInBits()); 2159 SrcReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 2160 } 2161 } 2162 2163 if (!SrcReg) 2164 SrcReg = getRegForValue(Op0); 2165 2166 if (!SrcReg) 2167 return false; 2168 2169 auto *SI = cast<StoreInst>(I); 2170 2171 // Try to emit a STLR for seq_cst/release. 2172 if (SI->isAtomic()) { 2173 AtomicOrdering Ord = SI->getOrdering(); 2174 // The non-atomic instructions are sufficient for relaxed stores. 2175 if (isReleaseOrStronger(Ord)) { 2176 // The STLR addressing mode only supports a base reg; pass that directly. 2177 Register AddrReg = getRegForValue(PtrV); 2178 return emitStoreRelease(VT, SrcReg, AddrReg, 2179 createMachineMemOperandFor(I)); 2180 } 2181 } 2182 2183 // See if we can handle this address. 2184 Address Addr; 2185 if (!computeAddress(PtrV, Addr, Op0->getType())) 2186 return false; 2187 2188 if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I))) 2189 return false; 2190 return true; 2191 } 2192 2193 static AArch64CC::CondCode getCompareCC(CmpInst::Predicate Pred) { 2194 switch (Pred) { 2195 case CmpInst::FCMP_ONE: 2196 case CmpInst::FCMP_UEQ: 2197 default: 2198 // AL is our "false" for now. The other two need more compares. 2199 return AArch64CC::AL; 2200 case CmpInst::ICMP_EQ: 2201 case CmpInst::FCMP_OEQ: 2202 return AArch64CC::EQ; 2203 case CmpInst::ICMP_SGT: 2204 case CmpInst::FCMP_OGT: 2205 return AArch64CC::GT; 2206 case CmpInst::ICMP_SGE: 2207 case CmpInst::FCMP_OGE: 2208 return AArch64CC::GE; 2209 case CmpInst::ICMP_UGT: 2210 case CmpInst::FCMP_UGT: 2211 return AArch64CC::HI; 2212 case CmpInst::FCMP_OLT: 2213 return AArch64CC::MI; 2214 case CmpInst::ICMP_ULE: 2215 case CmpInst::FCMP_OLE: 2216 return AArch64CC::LS; 2217 case CmpInst::FCMP_ORD: 2218 return AArch64CC::VC; 2219 case CmpInst::FCMP_UNO: 2220 return AArch64CC::VS; 2221 case CmpInst::FCMP_UGE: 2222 return AArch64CC::PL; 2223 case CmpInst::ICMP_SLT: 2224 case CmpInst::FCMP_ULT: 2225 return AArch64CC::LT; 2226 case CmpInst::ICMP_SLE: 2227 case CmpInst::FCMP_ULE: 2228 return AArch64CC::LE; 2229 case CmpInst::FCMP_UNE: 2230 case CmpInst::ICMP_NE: 2231 return AArch64CC::NE; 2232 case CmpInst::ICMP_UGE: 2233 return AArch64CC::HS; 2234 case CmpInst::ICMP_ULT: 2235 return AArch64CC::LO; 2236 } 2237 } 2238 2239 /// Try to emit a combined compare-and-branch instruction. 2240 bool AArch64FastISel::emitCompareAndBranch(const BranchInst *BI) { 2241 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions 2242 // will not be produced, as they are conditional branch instructions that do 2243 // not set flags. 2244 if (FuncInfo.MF->getFunction().hasFnAttribute( 2245 Attribute::SpeculativeLoadHardening)) 2246 return false; 2247 2248 assert(isa<CmpInst>(BI->getCondition()) && "Expected cmp instruction"); 2249 const CmpInst *CI = cast<CmpInst>(BI->getCondition()); 2250 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2251 2252 const Value *LHS = CI->getOperand(0); 2253 const Value *RHS = CI->getOperand(1); 2254 2255 MVT VT; 2256 if (!isTypeSupported(LHS->getType(), VT)) 2257 return false; 2258 2259 unsigned BW = VT.getSizeInBits(); 2260 if (BW > 64) 2261 return false; 2262 2263 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2264 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2265 2266 // Try to take advantage of fallthrough opportunities. 2267 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2268 std::swap(TBB, FBB); 2269 Predicate = CmpInst::getInversePredicate(Predicate); 2270 } 2271 2272 int TestBit = -1; 2273 bool IsCmpNE; 2274 switch (Predicate) { 2275 default: 2276 return false; 2277 case CmpInst::ICMP_EQ: 2278 case CmpInst::ICMP_NE: 2279 if (isa<Constant>(LHS) && cast<Constant>(LHS)->isNullValue()) 2280 std::swap(LHS, RHS); 2281 2282 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2283 return false; 2284 2285 if (const auto *AI = dyn_cast<BinaryOperator>(LHS)) 2286 if (AI->getOpcode() == Instruction::And && isValueAvailable(AI)) { 2287 const Value *AndLHS = AI->getOperand(0); 2288 const Value *AndRHS = AI->getOperand(1); 2289 2290 if (const auto *C = dyn_cast<ConstantInt>(AndLHS)) 2291 if (C->getValue().isPowerOf2()) 2292 std::swap(AndLHS, AndRHS); 2293 2294 if (const auto *C = dyn_cast<ConstantInt>(AndRHS)) 2295 if (C->getValue().isPowerOf2()) { 2296 TestBit = C->getValue().logBase2(); 2297 LHS = AndLHS; 2298 } 2299 } 2300 2301 if (VT == MVT::i1) 2302 TestBit = 0; 2303 2304 IsCmpNE = Predicate == CmpInst::ICMP_NE; 2305 break; 2306 case CmpInst::ICMP_SLT: 2307 case CmpInst::ICMP_SGE: 2308 if (!isa<Constant>(RHS) || !cast<Constant>(RHS)->isNullValue()) 2309 return false; 2310 2311 TestBit = BW - 1; 2312 IsCmpNE = Predicate == CmpInst::ICMP_SLT; 2313 break; 2314 case CmpInst::ICMP_SGT: 2315 case CmpInst::ICMP_SLE: 2316 if (!isa<ConstantInt>(RHS)) 2317 return false; 2318 2319 if (cast<ConstantInt>(RHS)->getValue() != APInt(BW, -1, true)) 2320 return false; 2321 2322 TestBit = BW - 1; 2323 IsCmpNE = Predicate == CmpInst::ICMP_SLE; 2324 break; 2325 } // end switch 2326 2327 static const unsigned OpcTable[2][2][2] = { 2328 { {AArch64::CBZW, AArch64::CBZX }, 2329 {AArch64::CBNZW, AArch64::CBNZX} }, 2330 { {AArch64::TBZW, AArch64::TBZX }, 2331 {AArch64::TBNZW, AArch64::TBNZX} } 2332 }; 2333 2334 bool IsBitTest = TestBit != -1; 2335 bool Is64Bit = BW == 64; 2336 if (TestBit < 32 && TestBit >= 0) 2337 Is64Bit = false; 2338 2339 unsigned Opc = OpcTable[IsBitTest][IsCmpNE][Is64Bit]; 2340 const MCInstrDesc &II = TII.get(Opc); 2341 2342 Register SrcReg = getRegForValue(LHS); 2343 if (!SrcReg) 2344 return false; 2345 2346 if (BW == 64 && !Is64Bit) 2347 SrcReg = fastEmitInst_extractsubreg(MVT::i32, SrcReg, AArch64::sub_32); 2348 2349 if ((BW < 32) && !IsBitTest) 2350 SrcReg = emitIntExt(VT, SrcReg, MVT::i32, /*isZExt=*/true); 2351 2352 // Emit the combined compare and branch instruction. 2353 SrcReg = constrainOperandRegClass(II, SrcReg, II.getNumDefs()); 2354 MachineInstrBuilder MIB = 2355 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)) 2356 .addReg(SrcReg); 2357 if (IsBitTest) 2358 MIB.addImm(TestBit); 2359 MIB.addMBB(TBB); 2360 2361 finishCondBranch(BI->getParent(), TBB, FBB); 2362 return true; 2363 } 2364 2365 bool AArch64FastISel::selectBranch(const Instruction *I) { 2366 const BranchInst *BI = cast<BranchInst>(I); 2367 if (BI->isUnconditional()) { 2368 MachineBasicBlock *MSucc = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2369 fastEmitBranch(MSucc, BI->getDebugLoc()); 2370 return true; 2371 } 2372 2373 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 2374 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 2375 2376 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 2377 if (CI->hasOneUse() && isValueAvailable(CI)) { 2378 // Try to optimize or fold the cmp. 2379 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2380 switch (Predicate) { 2381 default: 2382 break; 2383 case CmpInst::FCMP_FALSE: 2384 fastEmitBranch(FBB, DbgLoc); 2385 return true; 2386 case CmpInst::FCMP_TRUE: 2387 fastEmitBranch(TBB, DbgLoc); 2388 return true; 2389 } 2390 2391 // Try to emit a combined compare-and-branch first. 2392 if (emitCompareAndBranch(BI)) 2393 return true; 2394 2395 // Try to take advantage of fallthrough opportunities. 2396 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2397 std::swap(TBB, FBB); 2398 Predicate = CmpInst::getInversePredicate(Predicate); 2399 } 2400 2401 // Emit the cmp. 2402 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2403 return false; 2404 2405 // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch 2406 // instruction. 2407 AArch64CC::CondCode CC = getCompareCC(Predicate); 2408 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2409 switch (Predicate) { 2410 default: 2411 break; 2412 case CmpInst::FCMP_UEQ: 2413 ExtraCC = AArch64CC::EQ; 2414 CC = AArch64CC::VS; 2415 break; 2416 case CmpInst::FCMP_ONE: 2417 ExtraCC = AArch64CC::MI; 2418 CC = AArch64CC::GT; 2419 break; 2420 } 2421 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2422 2423 // Emit the extra branch for FCMP_UEQ and FCMP_ONE. 2424 if (ExtraCC != AArch64CC::AL) { 2425 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2426 .addImm(ExtraCC) 2427 .addMBB(TBB); 2428 } 2429 2430 // Emit the branch. 2431 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2432 .addImm(CC) 2433 .addMBB(TBB); 2434 2435 finishCondBranch(BI->getParent(), TBB, FBB); 2436 return true; 2437 } 2438 } else if (const auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 2439 uint64_t Imm = CI->getZExtValue(); 2440 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 2441 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::B)) 2442 .addMBB(Target); 2443 2444 // Obtain the branch probability and add the target to the successor list. 2445 if (FuncInfo.BPI) { 2446 auto BranchProbability = FuncInfo.BPI->getEdgeProbability( 2447 BI->getParent(), Target->getBasicBlock()); 2448 FuncInfo.MBB->addSuccessor(Target, BranchProbability); 2449 } else 2450 FuncInfo.MBB->addSuccessorWithoutProb(Target); 2451 return true; 2452 } else { 2453 AArch64CC::CondCode CC = AArch64CC::NE; 2454 if (foldXALUIntrinsic(CC, I, BI->getCondition())) { 2455 // Fake request the condition, otherwise the intrinsic might be completely 2456 // optimized away. 2457 Register CondReg = getRegForValue(BI->getCondition()); 2458 if (!CondReg) 2459 return false; 2460 2461 // Emit the branch. 2462 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) 2463 .addImm(CC) 2464 .addMBB(TBB); 2465 2466 finishCondBranch(BI->getParent(), TBB, FBB); 2467 return true; 2468 } 2469 } 2470 2471 Register CondReg = getRegForValue(BI->getCondition()); 2472 if (CondReg == 0) 2473 return false; 2474 2475 // i1 conditions come as i32 values, test the lowest bit with tb(n)z. 2476 unsigned Opcode = AArch64::TBNZW; 2477 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 2478 std::swap(TBB, FBB); 2479 Opcode = AArch64::TBZW; 2480 } 2481 2482 const MCInstrDesc &II = TII.get(Opcode); 2483 Register ConstrainedCondReg 2484 = constrainOperandRegClass(II, CondReg, II.getNumDefs()); 2485 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 2486 .addReg(ConstrainedCondReg) 2487 .addImm(0) 2488 .addMBB(TBB); 2489 2490 finishCondBranch(BI->getParent(), TBB, FBB); 2491 return true; 2492 } 2493 2494 bool AArch64FastISel::selectIndirectBr(const Instruction *I) { 2495 const IndirectBrInst *BI = cast<IndirectBrInst>(I); 2496 Register AddrReg = getRegForValue(BI->getOperand(0)); 2497 if (AddrReg == 0) 2498 return false; 2499 2500 // Emit the indirect branch. 2501 const MCInstrDesc &II = TII.get(AArch64::BR); 2502 AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs()); 2503 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(AddrReg); 2504 2505 // Make sure the CFG is up-to-date. 2506 for (auto *Succ : BI->successors()) 2507 FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[Succ]); 2508 2509 return true; 2510 } 2511 2512 bool AArch64FastISel::selectCmp(const Instruction *I) { 2513 const CmpInst *CI = cast<CmpInst>(I); 2514 2515 // Vectors of i1 are weird: bail out. 2516 if (CI->getType()->isVectorTy()) 2517 return false; 2518 2519 // Try to optimize or fold the cmp. 2520 CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); 2521 unsigned ResultReg = 0; 2522 switch (Predicate) { 2523 default: 2524 break; 2525 case CmpInst::FCMP_FALSE: 2526 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2527 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2528 TII.get(TargetOpcode::COPY), ResultReg) 2529 .addReg(AArch64::WZR, getKillRegState(true)); 2530 break; 2531 case CmpInst::FCMP_TRUE: 2532 ResultReg = fastEmit_i(MVT::i32, MVT::i32, ISD::Constant, 1); 2533 break; 2534 } 2535 2536 if (ResultReg) { 2537 updateValueMap(I, ResultReg); 2538 return true; 2539 } 2540 2541 // Emit the cmp. 2542 if (!emitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) 2543 return false; 2544 2545 ResultReg = createResultReg(&AArch64::GPR32RegClass); 2546 2547 // FCMP_UEQ and FCMP_ONE cannot be checked with a single instruction. These 2548 // condition codes are inverted, because they are used by CSINC. 2549 static unsigned CondCodeTable[2][2] = { 2550 { AArch64CC::NE, AArch64CC::VC }, 2551 { AArch64CC::PL, AArch64CC::LE } 2552 }; 2553 unsigned *CondCodes = nullptr; 2554 switch (Predicate) { 2555 default: 2556 break; 2557 case CmpInst::FCMP_UEQ: 2558 CondCodes = &CondCodeTable[0][0]; 2559 break; 2560 case CmpInst::FCMP_ONE: 2561 CondCodes = &CondCodeTable[1][0]; 2562 break; 2563 } 2564 2565 if (CondCodes) { 2566 Register TmpReg1 = createResultReg(&AArch64::GPR32RegClass); 2567 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2568 TmpReg1) 2569 .addReg(AArch64::WZR, getKillRegState(true)) 2570 .addReg(AArch64::WZR, getKillRegState(true)) 2571 .addImm(CondCodes[0]); 2572 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2573 ResultReg) 2574 .addReg(TmpReg1, getKillRegState(true)) 2575 .addReg(AArch64::WZR, getKillRegState(true)) 2576 .addImm(CondCodes[1]); 2577 2578 updateValueMap(I, ResultReg); 2579 return true; 2580 } 2581 2582 // Now set a register based on the comparison. 2583 AArch64CC::CondCode CC = getCompareCC(Predicate); 2584 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2585 AArch64CC::CondCode invertedCC = getInvertedCondCode(CC); 2586 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr), 2587 ResultReg) 2588 .addReg(AArch64::WZR, getKillRegState(true)) 2589 .addReg(AArch64::WZR, getKillRegState(true)) 2590 .addImm(invertedCC); 2591 2592 updateValueMap(I, ResultReg); 2593 return true; 2594 } 2595 2596 /// Optimize selects of i1 if one of the operands has a 'true' or 'false' 2597 /// value. 2598 bool AArch64FastISel::optimizeSelect(const SelectInst *SI) { 2599 if (!SI->getType()->isIntegerTy(1)) 2600 return false; 2601 2602 const Value *Src1Val, *Src2Val; 2603 unsigned Opc = 0; 2604 bool NeedExtraOp = false; 2605 if (auto *CI = dyn_cast<ConstantInt>(SI->getTrueValue())) { 2606 if (CI->isOne()) { 2607 Src1Val = SI->getCondition(); 2608 Src2Val = SI->getFalseValue(); 2609 Opc = AArch64::ORRWrr; 2610 } else { 2611 assert(CI->isZero()); 2612 Src1Val = SI->getFalseValue(); 2613 Src2Val = SI->getCondition(); 2614 Opc = AArch64::BICWrr; 2615 } 2616 } else if (auto *CI = dyn_cast<ConstantInt>(SI->getFalseValue())) { 2617 if (CI->isOne()) { 2618 Src1Val = SI->getCondition(); 2619 Src2Val = SI->getTrueValue(); 2620 Opc = AArch64::ORRWrr; 2621 NeedExtraOp = true; 2622 } else { 2623 assert(CI->isZero()); 2624 Src1Val = SI->getCondition(); 2625 Src2Val = SI->getTrueValue(); 2626 Opc = AArch64::ANDWrr; 2627 } 2628 } 2629 2630 if (!Opc) 2631 return false; 2632 2633 Register Src1Reg = getRegForValue(Src1Val); 2634 if (!Src1Reg) 2635 return false; 2636 2637 Register Src2Reg = getRegForValue(Src2Val); 2638 if (!Src2Reg) 2639 return false; 2640 2641 if (NeedExtraOp) 2642 Src1Reg = emitLogicalOp_ri(ISD::XOR, MVT::i32, Src1Reg, 1); 2643 2644 Register ResultReg = fastEmitInst_rr(Opc, &AArch64::GPR32RegClass, Src1Reg, 2645 Src2Reg); 2646 updateValueMap(SI, ResultReg); 2647 return true; 2648 } 2649 2650 bool AArch64FastISel::selectSelect(const Instruction *I) { 2651 assert(isa<SelectInst>(I) && "Expected a select instruction."); 2652 MVT VT; 2653 if (!isTypeSupported(I->getType(), VT)) 2654 return false; 2655 2656 unsigned Opc; 2657 const TargetRegisterClass *RC; 2658 switch (VT.SimpleTy) { 2659 default: 2660 return false; 2661 case MVT::i1: 2662 case MVT::i8: 2663 case MVT::i16: 2664 case MVT::i32: 2665 Opc = AArch64::CSELWr; 2666 RC = &AArch64::GPR32RegClass; 2667 break; 2668 case MVT::i64: 2669 Opc = AArch64::CSELXr; 2670 RC = &AArch64::GPR64RegClass; 2671 break; 2672 case MVT::f32: 2673 Opc = AArch64::FCSELSrrr; 2674 RC = &AArch64::FPR32RegClass; 2675 break; 2676 case MVT::f64: 2677 Opc = AArch64::FCSELDrrr; 2678 RC = &AArch64::FPR64RegClass; 2679 break; 2680 } 2681 2682 const SelectInst *SI = cast<SelectInst>(I); 2683 const Value *Cond = SI->getCondition(); 2684 AArch64CC::CondCode CC = AArch64CC::NE; 2685 AArch64CC::CondCode ExtraCC = AArch64CC::AL; 2686 2687 if (optimizeSelect(SI)) 2688 return true; 2689 2690 // Try to pickup the flags, so we don't have to emit another compare. 2691 if (foldXALUIntrinsic(CC, I, Cond)) { 2692 // Fake request the condition to force emission of the XALU intrinsic. 2693 Register CondReg = getRegForValue(Cond); 2694 if (!CondReg) 2695 return false; 2696 } else if (isa<CmpInst>(Cond) && cast<CmpInst>(Cond)->hasOneUse() && 2697 isValueAvailable(Cond)) { 2698 const auto *Cmp = cast<CmpInst>(Cond); 2699 // Try to optimize or fold the cmp. 2700 CmpInst::Predicate Predicate = optimizeCmpPredicate(Cmp); 2701 const Value *FoldSelect = nullptr; 2702 switch (Predicate) { 2703 default: 2704 break; 2705 case CmpInst::FCMP_FALSE: 2706 FoldSelect = SI->getFalseValue(); 2707 break; 2708 case CmpInst::FCMP_TRUE: 2709 FoldSelect = SI->getTrueValue(); 2710 break; 2711 } 2712 2713 if (FoldSelect) { 2714 Register SrcReg = getRegForValue(FoldSelect); 2715 if (!SrcReg) 2716 return false; 2717 2718 updateValueMap(I, SrcReg); 2719 return true; 2720 } 2721 2722 // Emit the cmp. 2723 if (!emitCmp(Cmp->getOperand(0), Cmp->getOperand(1), Cmp->isUnsigned())) 2724 return false; 2725 2726 // FCMP_UEQ and FCMP_ONE cannot be checked with a single select instruction. 2727 CC = getCompareCC(Predicate); 2728 switch (Predicate) { 2729 default: 2730 break; 2731 case CmpInst::FCMP_UEQ: 2732 ExtraCC = AArch64CC::EQ; 2733 CC = AArch64CC::VS; 2734 break; 2735 case CmpInst::FCMP_ONE: 2736 ExtraCC = AArch64CC::MI; 2737 CC = AArch64CC::GT; 2738 break; 2739 } 2740 assert((CC != AArch64CC::AL) && "Unexpected condition code."); 2741 } else { 2742 Register CondReg = getRegForValue(Cond); 2743 if (!CondReg) 2744 return false; 2745 2746 const MCInstrDesc &II = TII.get(AArch64::ANDSWri); 2747 CondReg = constrainOperandRegClass(II, CondReg, 1); 2748 2749 // Emit a TST instruction (ANDS wzr, reg, #imm). 2750 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, 2751 AArch64::WZR) 2752 .addReg(CondReg) 2753 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2754 } 2755 2756 Register Src1Reg = getRegForValue(SI->getTrueValue()); 2757 Register Src2Reg = getRegForValue(SI->getFalseValue()); 2758 2759 if (!Src1Reg || !Src2Reg) 2760 return false; 2761 2762 if (ExtraCC != AArch64CC::AL) 2763 Src2Reg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, ExtraCC); 2764 2765 Register ResultReg = fastEmitInst_rri(Opc, RC, Src1Reg, Src2Reg, CC); 2766 updateValueMap(I, ResultReg); 2767 return true; 2768 } 2769 2770 bool AArch64FastISel::selectFPExt(const Instruction *I) { 2771 Value *V = I->getOperand(0); 2772 if (!I->getType()->isDoubleTy() || !V->getType()->isFloatTy()) 2773 return false; 2774 2775 Register Op = getRegForValue(V); 2776 if (Op == 0) 2777 return false; 2778 2779 Register ResultReg = createResultReg(&AArch64::FPR64RegClass); 2780 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTDSr), 2781 ResultReg).addReg(Op); 2782 updateValueMap(I, ResultReg); 2783 return true; 2784 } 2785 2786 bool AArch64FastISel::selectFPTrunc(const Instruction *I) { 2787 Value *V = I->getOperand(0); 2788 if (!I->getType()->isFloatTy() || !V->getType()->isDoubleTy()) 2789 return false; 2790 2791 Register Op = getRegForValue(V); 2792 if (Op == 0) 2793 return false; 2794 2795 Register ResultReg = createResultReg(&AArch64::FPR32RegClass); 2796 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::FCVTSDr), 2797 ResultReg).addReg(Op); 2798 updateValueMap(I, ResultReg); 2799 return true; 2800 } 2801 2802 // FPToUI and FPToSI 2803 bool AArch64FastISel::selectFPToInt(const Instruction *I, bool Signed) { 2804 MVT DestVT; 2805 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2806 return false; 2807 2808 Register SrcReg = getRegForValue(I->getOperand(0)); 2809 if (SrcReg == 0) 2810 return false; 2811 2812 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2813 if (SrcVT == MVT::f128 || SrcVT == MVT::f16) 2814 return false; 2815 2816 unsigned Opc; 2817 if (SrcVT == MVT::f64) { 2818 if (Signed) 2819 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWDr : AArch64::FCVTZSUXDr; 2820 else 2821 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWDr : AArch64::FCVTZUUXDr; 2822 } else { 2823 if (Signed) 2824 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZSUWSr : AArch64::FCVTZSUXSr; 2825 else 2826 Opc = (DestVT == MVT::i32) ? AArch64::FCVTZUUWSr : AArch64::FCVTZUUXSr; 2827 } 2828 Register ResultReg = createResultReg( 2829 DestVT == MVT::i32 ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass); 2830 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 2831 .addReg(SrcReg); 2832 updateValueMap(I, ResultReg); 2833 return true; 2834 } 2835 2836 bool AArch64FastISel::selectIntToFP(const Instruction *I, bool Signed) { 2837 MVT DestVT; 2838 if (!isTypeLegal(I->getType(), DestVT) || DestVT.isVector()) 2839 return false; 2840 // Let regular ISEL handle FP16 2841 if (DestVT == MVT::f16) 2842 return false; 2843 2844 assert((DestVT == MVT::f32 || DestVT == MVT::f64) && 2845 "Unexpected value type."); 2846 2847 Register SrcReg = getRegForValue(I->getOperand(0)); 2848 if (!SrcReg) 2849 return false; 2850 2851 EVT SrcVT = TLI.getValueType(DL, I->getOperand(0)->getType(), true); 2852 2853 // Handle sign-extension. 2854 if (SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { 2855 SrcReg = 2856 emitIntExt(SrcVT.getSimpleVT(), SrcReg, MVT::i32, /*isZExt*/ !Signed); 2857 if (!SrcReg) 2858 return false; 2859 } 2860 2861 unsigned Opc; 2862 if (SrcVT == MVT::i64) { 2863 if (Signed) 2864 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUXSri : AArch64::SCVTFUXDri; 2865 else 2866 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUXSri : AArch64::UCVTFUXDri; 2867 } else { 2868 if (Signed) 2869 Opc = (DestVT == MVT::f32) ? AArch64::SCVTFUWSri : AArch64::SCVTFUWDri; 2870 else 2871 Opc = (DestVT == MVT::f32) ? AArch64::UCVTFUWSri : AArch64::UCVTFUWDri; 2872 } 2873 2874 Register ResultReg = fastEmitInst_r(Opc, TLI.getRegClassFor(DestVT), SrcReg); 2875 updateValueMap(I, ResultReg); 2876 return true; 2877 } 2878 2879 bool AArch64FastISel::fastLowerArguments() { 2880 if (!FuncInfo.CanLowerReturn) 2881 return false; 2882 2883 const Function *F = FuncInfo.Fn; 2884 if (F->isVarArg()) 2885 return false; 2886 2887 CallingConv::ID CC = F->getCallingConv(); 2888 if (CC != CallingConv::C && CC != CallingConv::Swift) 2889 return false; 2890 2891 if (Subtarget->hasCustomCallingConv()) 2892 return false; 2893 2894 // Only handle simple cases of up to 8 GPR and FPR each. 2895 unsigned GPRCnt = 0; 2896 unsigned FPRCnt = 0; 2897 for (auto const &Arg : F->args()) { 2898 if (Arg.hasAttribute(Attribute::ByVal) || 2899 Arg.hasAttribute(Attribute::InReg) || 2900 Arg.hasAttribute(Attribute::StructRet) || 2901 Arg.hasAttribute(Attribute::SwiftSelf) || 2902 Arg.hasAttribute(Attribute::SwiftAsync) || 2903 Arg.hasAttribute(Attribute::SwiftError) || 2904 Arg.hasAttribute(Attribute::Nest)) 2905 return false; 2906 2907 Type *ArgTy = Arg.getType(); 2908 if (ArgTy->isStructTy() || ArgTy->isArrayTy()) 2909 return false; 2910 2911 EVT ArgVT = TLI.getValueType(DL, ArgTy); 2912 if (!ArgVT.isSimple()) 2913 return false; 2914 2915 MVT VT = ArgVT.getSimpleVT().SimpleTy; 2916 if (VT.isFloatingPoint() && !Subtarget->hasFPARMv8()) 2917 return false; 2918 2919 if (VT.isVector() && 2920 (!Subtarget->hasNEON() || !Subtarget->isLittleEndian())) 2921 return false; 2922 2923 if (VT >= MVT::i1 && VT <= MVT::i64) 2924 ++GPRCnt; 2925 else if ((VT >= MVT::f16 && VT <= MVT::f64) || VT.is64BitVector() || 2926 VT.is128BitVector()) 2927 ++FPRCnt; 2928 else 2929 return false; 2930 2931 if (GPRCnt > 8 || FPRCnt > 8) 2932 return false; 2933 } 2934 2935 static const MCPhysReg Registers[6][8] = { 2936 { AArch64::W0, AArch64::W1, AArch64::W2, AArch64::W3, AArch64::W4, 2937 AArch64::W5, AArch64::W6, AArch64::W7 }, 2938 { AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3, AArch64::X4, 2939 AArch64::X5, AArch64::X6, AArch64::X7 }, 2940 { AArch64::H0, AArch64::H1, AArch64::H2, AArch64::H3, AArch64::H4, 2941 AArch64::H5, AArch64::H6, AArch64::H7 }, 2942 { AArch64::S0, AArch64::S1, AArch64::S2, AArch64::S3, AArch64::S4, 2943 AArch64::S5, AArch64::S6, AArch64::S7 }, 2944 { AArch64::D0, AArch64::D1, AArch64::D2, AArch64::D3, AArch64::D4, 2945 AArch64::D5, AArch64::D6, AArch64::D7 }, 2946 { AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3, AArch64::Q4, 2947 AArch64::Q5, AArch64::Q6, AArch64::Q7 } 2948 }; 2949 2950 unsigned GPRIdx = 0; 2951 unsigned FPRIdx = 0; 2952 for (auto const &Arg : F->args()) { 2953 MVT VT = TLI.getSimpleValueType(DL, Arg.getType()); 2954 unsigned SrcReg; 2955 const TargetRegisterClass *RC; 2956 if (VT >= MVT::i1 && VT <= MVT::i32) { 2957 SrcReg = Registers[0][GPRIdx++]; 2958 RC = &AArch64::GPR32RegClass; 2959 VT = MVT::i32; 2960 } else if (VT == MVT::i64) { 2961 SrcReg = Registers[1][GPRIdx++]; 2962 RC = &AArch64::GPR64RegClass; 2963 } else if (VT == MVT::f16) { 2964 SrcReg = Registers[2][FPRIdx++]; 2965 RC = &AArch64::FPR16RegClass; 2966 } else if (VT == MVT::f32) { 2967 SrcReg = Registers[3][FPRIdx++]; 2968 RC = &AArch64::FPR32RegClass; 2969 } else if ((VT == MVT::f64) || VT.is64BitVector()) { 2970 SrcReg = Registers[4][FPRIdx++]; 2971 RC = &AArch64::FPR64RegClass; 2972 } else if (VT.is128BitVector()) { 2973 SrcReg = Registers[5][FPRIdx++]; 2974 RC = &AArch64::FPR128RegClass; 2975 } else 2976 llvm_unreachable("Unexpected value type."); 2977 2978 Register DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); 2979 // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. 2980 // Without this, EmitLiveInCopies may eliminate the livein if its only 2981 // use is a bitcast (which isn't turned into an instruction). 2982 Register ResultReg = createResultReg(RC); 2983 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 2984 TII.get(TargetOpcode::COPY), ResultReg) 2985 .addReg(DstReg, getKillRegState(true)); 2986 updateValueMap(&Arg, ResultReg); 2987 } 2988 return true; 2989 } 2990 2991 bool AArch64FastISel::processCallArgs(CallLoweringInfo &CLI, 2992 SmallVectorImpl<MVT> &OutVTs, 2993 unsigned &NumBytes) { 2994 CallingConv::ID CC = CLI.CallConv; 2995 SmallVector<CCValAssign, 16> ArgLocs; 2996 CCState CCInfo(CC, false, *FuncInfo.MF, ArgLocs, *Context); 2997 CCInfo.AnalyzeCallOperands(OutVTs, CLI.OutFlags, CCAssignFnForCall(CC)); 2998 2999 // Get a count of how many bytes are to be pushed on the stack. 3000 NumBytes = CCInfo.getNextStackOffset(); 3001 3002 // Issue CALLSEQ_START 3003 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 3004 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) 3005 .addImm(NumBytes).addImm(0); 3006 3007 // Process the args. 3008 for (CCValAssign &VA : ArgLocs) { 3009 const Value *ArgVal = CLI.OutVals[VA.getValNo()]; 3010 MVT ArgVT = OutVTs[VA.getValNo()]; 3011 3012 Register ArgReg = getRegForValue(ArgVal); 3013 if (!ArgReg) 3014 return false; 3015 3016 // Handle arg promotion: SExt, ZExt, AExt. 3017 switch (VA.getLocInfo()) { 3018 case CCValAssign::Full: 3019 break; 3020 case CCValAssign::SExt: { 3021 MVT DestVT = VA.getLocVT(); 3022 MVT SrcVT = ArgVT; 3023 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/false); 3024 if (!ArgReg) 3025 return false; 3026 break; 3027 } 3028 case CCValAssign::AExt: 3029 // Intentional fall-through. 3030 case CCValAssign::ZExt: { 3031 MVT DestVT = VA.getLocVT(); 3032 MVT SrcVT = ArgVT; 3033 ArgReg = emitIntExt(SrcVT, ArgReg, DestVT, /*isZExt=*/true); 3034 if (!ArgReg) 3035 return false; 3036 break; 3037 } 3038 default: 3039 llvm_unreachable("Unknown arg promotion!"); 3040 } 3041 3042 // Now copy/store arg to correct locations. 3043 if (VA.isRegLoc() && !VA.needsCustom()) { 3044 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3045 TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg); 3046 CLI.OutRegs.push_back(VA.getLocReg()); 3047 } else if (VA.needsCustom()) { 3048 // FIXME: Handle custom args. 3049 return false; 3050 } else { 3051 assert(VA.isMemLoc() && "Assuming store on stack."); 3052 3053 // Don't emit stores for undef values. 3054 if (isa<UndefValue>(ArgVal)) 3055 continue; 3056 3057 // Need to store on the stack. 3058 unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; 3059 3060 unsigned BEAlign = 0; 3061 if (ArgSize < 8 && !Subtarget->isLittleEndian()) 3062 BEAlign = 8 - ArgSize; 3063 3064 Address Addr; 3065 Addr.setKind(Address::RegBase); 3066 Addr.setReg(AArch64::SP); 3067 Addr.setOffset(VA.getLocMemOffset() + BEAlign); 3068 3069 Align Alignment = DL.getABITypeAlign(ArgVal->getType()); 3070 MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand( 3071 MachinePointerInfo::getStack(*FuncInfo.MF, Addr.getOffset()), 3072 MachineMemOperand::MOStore, ArgVT.getStoreSize(), Alignment); 3073 3074 if (!emitStore(ArgVT, ArgReg, Addr, MMO)) 3075 return false; 3076 } 3077 } 3078 return true; 3079 } 3080 3081 bool AArch64FastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT, 3082 unsigned NumBytes) { 3083 CallingConv::ID CC = CLI.CallConv; 3084 3085 // Issue CALLSEQ_END 3086 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 3087 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) 3088 .addImm(NumBytes).addImm(0); 3089 3090 // Now the return value. 3091 if (RetVT != MVT::isVoid) { 3092 SmallVector<CCValAssign, 16> RVLocs; 3093 CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context); 3094 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC)); 3095 3096 // Only handle a single return value. 3097 if (RVLocs.size() != 1) 3098 return false; 3099 3100 // Copy all of the result registers out of their specified physreg. 3101 MVT CopyVT = RVLocs[0].getValVT(); 3102 3103 // TODO: Handle big-endian results 3104 if (CopyVT.isVector() && !Subtarget->isLittleEndian()) 3105 return false; 3106 3107 Register ResultReg = createResultReg(TLI.getRegClassFor(CopyVT)); 3108 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3109 TII.get(TargetOpcode::COPY), ResultReg) 3110 .addReg(RVLocs[0].getLocReg()); 3111 CLI.InRegs.push_back(RVLocs[0].getLocReg()); 3112 3113 CLI.ResultReg = ResultReg; 3114 CLI.NumResultRegs = 1; 3115 } 3116 3117 return true; 3118 } 3119 3120 bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { 3121 CallingConv::ID CC = CLI.CallConv; 3122 bool IsTailCall = CLI.IsTailCall; 3123 bool IsVarArg = CLI.IsVarArg; 3124 const Value *Callee = CLI.Callee; 3125 MCSymbol *Symbol = CLI.Symbol; 3126 3127 if (!Callee && !Symbol) 3128 return false; 3129 3130 // Allow SelectionDAG isel to handle calls to functions like setjmp that need 3131 // a bti instruction following the call. 3132 if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && 3133 !Subtarget->noBTIAtReturnTwice() && 3134 MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement()) 3135 return false; 3136 3137 // Allow SelectionDAG isel to handle tail calls. 3138 if (IsTailCall) 3139 return false; 3140 3141 // FIXME: we could and should support this, but for now correctness at -O0 is 3142 // more important. 3143 if (Subtarget->isTargetILP32()) 3144 return false; 3145 3146 CodeModel::Model CM = TM.getCodeModel(); 3147 // Only support the small-addressing and large code models. 3148 if (CM != CodeModel::Large && !Subtarget->useSmallAddressing()) 3149 return false; 3150 3151 // FIXME: Add large code model support for ELF. 3152 if (CM == CodeModel::Large && !Subtarget->isTargetMachO()) 3153 return false; 3154 3155 // Let SDISel handle vararg functions. 3156 if (IsVarArg) 3157 return false; 3158 3159 // FIXME: Only handle *simple* calls for now. 3160 MVT RetVT; 3161 if (CLI.RetTy->isVoidTy()) 3162 RetVT = MVT::isVoid; 3163 else if (!isTypeLegal(CLI.RetTy, RetVT)) 3164 return false; 3165 3166 for (auto Flag : CLI.OutFlags) 3167 if (Flag.isInReg() || Flag.isSRet() || Flag.isNest() || Flag.isByVal() || 3168 Flag.isSwiftSelf() || Flag.isSwiftAsync() || Flag.isSwiftError()) 3169 return false; 3170 3171 // Set up the argument vectors. 3172 SmallVector<MVT, 16> OutVTs; 3173 OutVTs.reserve(CLI.OutVals.size()); 3174 3175 for (auto *Val : CLI.OutVals) { 3176 MVT VT; 3177 if (!isTypeLegal(Val->getType(), VT) && 3178 !(VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)) 3179 return false; 3180 3181 // We don't handle vector parameters yet. 3182 if (VT.isVector() || VT.getSizeInBits() > 64) 3183 return false; 3184 3185 OutVTs.push_back(VT); 3186 } 3187 3188 Address Addr; 3189 if (Callee && !computeCallAddress(Callee, Addr)) 3190 return false; 3191 3192 // The weak function target may be zero; in that case we must use indirect 3193 // addressing via a stub on windows as it may be out of range for a 3194 // PC-relative jump. 3195 if (Subtarget->isTargetWindows() && Addr.getGlobalValue() && 3196 Addr.getGlobalValue()->hasExternalWeakLinkage()) 3197 return false; 3198 3199 // Handle the arguments now that we've gotten them. 3200 unsigned NumBytes; 3201 if (!processCallArgs(CLI, OutVTs, NumBytes)) 3202 return false; 3203 3204 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3205 if (RegInfo->isAnyArgRegReserved(*MF)) 3206 RegInfo->emitReservedArgRegCallError(*MF); 3207 3208 // Issue the call. 3209 MachineInstrBuilder MIB; 3210 if (Subtarget->useSmallAddressing()) { 3211 const MCInstrDesc &II = 3212 TII.get(Addr.getReg() ? getBLRCallOpcode(*MF) : (unsigned)AArch64::BL); 3213 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II); 3214 if (Symbol) 3215 MIB.addSym(Symbol, 0); 3216 else if (Addr.getGlobalValue()) 3217 MIB.addGlobalAddress(Addr.getGlobalValue(), 0, 0); 3218 else if (Addr.getReg()) { 3219 Register Reg = constrainOperandRegClass(II, Addr.getReg(), 0); 3220 MIB.addReg(Reg); 3221 } else 3222 return false; 3223 } else { 3224 unsigned CallReg = 0; 3225 if (Symbol) { 3226 Register ADRPReg = createResultReg(&AArch64::GPR64commonRegClass); 3227 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADRP), 3228 ADRPReg) 3229 .addSym(Symbol, AArch64II::MO_GOT | AArch64II::MO_PAGE); 3230 3231 CallReg = createResultReg(&AArch64::GPR64RegClass); 3232 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3233 TII.get(AArch64::LDRXui), CallReg) 3234 .addReg(ADRPReg) 3235 .addSym(Symbol, 3236 AArch64II::MO_GOT | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3237 } else if (Addr.getGlobalValue()) 3238 CallReg = materializeGV(Addr.getGlobalValue()); 3239 else if (Addr.getReg()) 3240 CallReg = Addr.getReg(); 3241 3242 if (!CallReg) 3243 return false; 3244 3245 const MCInstrDesc &II = TII.get(getBLRCallOpcode(*MF)); 3246 CallReg = constrainOperandRegClass(II, CallReg, 0); 3247 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II).addReg(CallReg); 3248 } 3249 3250 // Add implicit physical register uses to the call. 3251 for (auto Reg : CLI.OutRegs) 3252 MIB.addReg(Reg, RegState::Implicit); 3253 3254 // Add a register mask with the call-preserved registers. 3255 // Proper defs for return values will be added by setPhysRegsDeadExcept(). 3256 MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC)); 3257 3258 CLI.Call = MIB; 3259 3260 // Finish off the call including any return values. 3261 return finishCall(CLI, RetVT, NumBytes); 3262 } 3263 3264 bool AArch64FastISel::isMemCpySmall(uint64_t Len, unsigned Alignment) { 3265 if (Alignment) 3266 return Len / Alignment <= 4; 3267 else 3268 return Len < 32; 3269 } 3270 3271 bool AArch64FastISel::tryEmitSmallMemCpy(Address Dest, Address Src, 3272 uint64_t Len, unsigned Alignment) { 3273 // Make sure we don't bloat code by inlining very large memcpy's. 3274 if (!isMemCpySmall(Len, Alignment)) 3275 return false; 3276 3277 int64_t UnscaledOffset = 0; 3278 Address OrigDest = Dest; 3279 Address OrigSrc = Src; 3280 3281 while (Len) { 3282 MVT VT; 3283 if (!Alignment || Alignment >= 8) { 3284 if (Len >= 8) 3285 VT = MVT::i64; 3286 else if (Len >= 4) 3287 VT = MVT::i32; 3288 else if (Len >= 2) 3289 VT = MVT::i16; 3290 else { 3291 VT = MVT::i8; 3292 } 3293 } else { 3294 // Bound based on alignment. 3295 if (Len >= 4 && Alignment == 4) 3296 VT = MVT::i32; 3297 else if (Len >= 2 && Alignment == 2) 3298 VT = MVT::i16; 3299 else { 3300 VT = MVT::i8; 3301 } 3302 } 3303 3304 unsigned ResultReg = emitLoad(VT, VT, Src); 3305 if (!ResultReg) 3306 return false; 3307 3308 if (!emitStore(VT, ResultReg, Dest)) 3309 return false; 3310 3311 int64_t Size = VT.getSizeInBits() / 8; 3312 Len -= Size; 3313 UnscaledOffset += Size; 3314 3315 // We need to recompute the unscaled offset for each iteration. 3316 Dest.setOffset(OrigDest.getOffset() + UnscaledOffset); 3317 Src.setOffset(OrigSrc.getOffset() + UnscaledOffset); 3318 } 3319 3320 return true; 3321 } 3322 3323 /// Check if it is possible to fold the condition from the XALU intrinsic 3324 /// into the user. The condition code will only be updated on success. 3325 bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC, 3326 const Instruction *I, 3327 const Value *Cond) { 3328 if (!isa<ExtractValueInst>(Cond)) 3329 return false; 3330 3331 const auto *EV = cast<ExtractValueInst>(Cond); 3332 if (!isa<IntrinsicInst>(EV->getAggregateOperand())) 3333 return false; 3334 3335 const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); 3336 MVT RetVT; 3337 const Function *Callee = II->getCalledFunction(); 3338 Type *RetTy = 3339 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); 3340 if (!isTypeLegal(RetTy, RetVT)) 3341 return false; 3342 3343 if (RetVT != MVT::i32 && RetVT != MVT::i64) 3344 return false; 3345 3346 const Value *LHS = II->getArgOperand(0); 3347 const Value *RHS = II->getArgOperand(1); 3348 3349 // Canonicalize immediate to the RHS. 3350 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3351 std::swap(LHS, RHS); 3352 3353 // Simplify multiplies. 3354 Intrinsic::ID IID = II->getIntrinsicID(); 3355 switch (IID) { 3356 default: 3357 break; 3358 case Intrinsic::smul_with_overflow: 3359 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3360 if (C->getValue() == 2) 3361 IID = Intrinsic::sadd_with_overflow; 3362 break; 3363 case Intrinsic::umul_with_overflow: 3364 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3365 if (C->getValue() == 2) 3366 IID = Intrinsic::uadd_with_overflow; 3367 break; 3368 } 3369 3370 AArch64CC::CondCode TmpCC; 3371 switch (IID) { 3372 default: 3373 return false; 3374 case Intrinsic::sadd_with_overflow: 3375 case Intrinsic::ssub_with_overflow: 3376 TmpCC = AArch64CC::VS; 3377 break; 3378 case Intrinsic::uadd_with_overflow: 3379 TmpCC = AArch64CC::HS; 3380 break; 3381 case Intrinsic::usub_with_overflow: 3382 TmpCC = AArch64CC::LO; 3383 break; 3384 case Intrinsic::smul_with_overflow: 3385 case Intrinsic::umul_with_overflow: 3386 TmpCC = AArch64CC::NE; 3387 break; 3388 } 3389 3390 // Check if both instructions are in the same basic block. 3391 if (!isValueAvailable(II)) 3392 return false; 3393 3394 // Make sure nothing is in the way 3395 BasicBlock::const_iterator Start(I); 3396 BasicBlock::const_iterator End(II); 3397 for (auto Itr = std::prev(Start); Itr != End; --Itr) { 3398 // We only expect extractvalue instructions between the intrinsic and the 3399 // instruction to be selected. 3400 if (!isa<ExtractValueInst>(Itr)) 3401 return false; 3402 3403 // Check that the extractvalue operand comes from the intrinsic. 3404 const auto *EVI = cast<ExtractValueInst>(Itr); 3405 if (EVI->getAggregateOperand() != II) 3406 return false; 3407 } 3408 3409 CC = TmpCC; 3410 return true; 3411 } 3412 3413 bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) { 3414 // FIXME: Handle more intrinsics. 3415 switch (II->getIntrinsicID()) { 3416 default: return false; 3417 case Intrinsic::frameaddress: { 3418 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3419 MFI.setFrameAddressIsTaken(true); 3420 3421 const AArch64RegisterInfo *RegInfo = Subtarget->getRegisterInfo(); 3422 Register FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF)); 3423 Register SrcReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3424 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3425 TII.get(TargetOpcode::COPY), SrcReg).addReg(FramePtr); 3426 // Recursively load frame address 3427 // ldr x0, [fp] 3428 // ldr x0, [x0] 3429 // ldr x0, [x0] 3430 // ... 3431 unsigned DestReg; 3432 unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue(); 3433 while (Depth--) { 3434 DestReg = fastEmitInst_ri(AArch64::LDRXui, &AArch64::GPR64RegClass, 3435 SrcReg, 0); 3436 assert(DestReg && "Unexpected LDR instruction emission failure."); 3437 SrcReg = DestReg; 3438 } 3439 3440 updateValueMap(II, SrcReg); 3441 return true; 3442 } 3443 case Intrinsic::sponentry: { 3444 MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo(); 3445 3446 // SP = FP + Fixed Object + 16 3447 int FI = MFI.CreateFixedObject(4, 0, false); 3448 Register ResultReg = createResultReg(&AArch64::GPR64spRegClass); 3449 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3450 TII.get(AArch64::ADDXri), ResultReg) 3451 .addFrameIndex(FI) 3452 .addImm(0) 3453 .addImm(0); 3454 3455 updateValueMap(II, ResultReg); 3456 return true; 3457 } 3458 case Intrinsic::memcpy: 3459 case Intrinsic::memmove: { 3460 const auto *MTI = cast<MemTransferInst>(II); 3461 // Don't handle volatile. 3462 if (MTI->isVolatile()) 3463 return false; 3464 3465 // Disable inlining for memmove before calls to ComputeAddress. Otherwise, 3466 // we would emit dead code because we don't currently handle memmoves. 3467 bool IsMemCpy = (II->getIntrinsicID() == Intrinsic::memcpy); 3468 if (isa<ConstantInt>(MTI->getLength()) && IsMemCpy) { 3469 // Small memcpy's are common enough that we want to do them without a call 3470 // if possible. 3471 uint64_t Len = cast<ConstantInt>(MTI->getLength())->getZExtValue(); 3472 unsigned Alignment = MinAlign(MTI->getDestAlignment(), 3473 MTI->getSourceAlignment()); 3474 if (isMemCpySmall(Len, Alignment)) { 3475 Address Dest, Src; 3476 if (!computeAddress(MTI->getRawDest(), Dest) || 3477 !computeAddress(MTI->getRawSource(), Src)) 3478 return false; 3479 if (tryEmitSmallMemCpy(Dest, Src, Len, Alignment)) 3480 return true; 3481 } 3482 } 3483 3484 if (!MTI->getLength()->getType()->isIntegerTy(64)) 3485 return false; 3486 3487 if (MTI->getSourceAddressSpace() > 255 || MTI->getDestAddressSpace() > 255) 3488 // Fast instruction selection doesn't support the special 3489 // address spaces. 3490 return false; 3491 3492 const char *IntrMemName = isa<MemCpyInst>(II) ? "memcpy" : "memmove"; 3493 return lowerCallTo(II, IntrMemName, II->arg_size() - 1); 3494 } 3495 case Intrinsic::memset: { 3496 const MemSetInst *MSI = cast<MemSetInst>(II); 3497 // Don't handle volatile. 3498 if (MSI->isVolatile()) 3499 return false; 3500 3501 if (!MSI->getLength()->getType()->isIntegerTy(64)) 3502 return false; 3503 3504 if (MSI->getDestAddressSpace() > 255) 3505 // Fast instruction selection doesn't support the special 3506 // address spaces. 3507 return false; 3508 3509 return lowerCallTo(II, "memset", II->arg_size() - 1); 3510 } 3511 case Intrinsic::sin: 3512 case Intrinsic::cos: 3513 case Intrinsic::pow: { 3514 MVT RetVT; 3515 if (!isTypeLegal(II->getType(), RetVT)) 3516 return false; 3517 3518 if (RetVT != MVT::f32 && RetVT != MVT::f64) 3519 return false; 3520 3521 static const RTLIB::Libcall LibCallTable[3][2] = { 3522 { RTLIB::SIN_F32, RTLIB::SIN_F64 }, 3523 { RTLIB::COS_F32, RTLIB::COS_F64 }, 3524 { RTLIB::POW_F32, RTLIB::POW_F64 } 3525 }; 3526 RTLIB::Libcall LC; 3527 bool Is64Bit = RetVT == MVT::f64; 3528 switch (II->getIntrinsicID()) { 3529 default: 3530 llvm_unreachable("Unexpected intrinsic."); 3531 case Intrinsic::sin: 3532 LC = LibCallTable[0][Is64Bit]; 3533 break; 3534 case Intrinsic::cos: 3535 LC = LibCallTable[1][Is64Bit]; 3536 break; 3537 case Intrinsic::pow: 3538 LC = LibCallTable[2][Is64Bit]; 3539 break; 3540 } 3541 3542 ArgListTy Args; 3543 Args.reserve(II->arg_size()); 3544 3545 // Populate the argument list. 3546 for (auto &Arg : II->args()) { 3547 ArgListEntry Entry; 3548 Entry.Val = Arg; 3549 Entry.Ty = Arg->getType(); 3550 Args.push_back(Entry); 3551 } 3552 3553 CallLoweringInfo CLI; 3554 MCContext &Ctx = MF->getContext(); 3555 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), II->getType(), 3556 TLI.getLibcallName(LC), std::move(Args)); 3557 if (!lowerCallTo(CLI)) 3558 return false; 3559 updateValueMap(II, CLI.ResultReg); 3560 return true; 3561 } 3562 case Intrinsic::fabs: { 3563 MVT VT; 3564 if (!isTypeLegal(II->getType(), VT)) 3565 return false; 3566 3567 unsigned Opc; 3568 switch (VT.SimpleTy) { 3569 default: 3570 return false; 3571 case MVT::f32: 3572 Opc = AArch64::FABSSr; 3573 break; 3574 case MVT::f64: 3575 Opc = AArch64::FABSDr; 3576 break; 3577 } 3578 Register SrcReg = getRegForValue(II->getOperand(0)); 3579 if (!SrcReg) 3580 return false; 3581 Register ResultReg = createResultReg(TLI.getRegClassFor(VT)); 3582 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) 3583 .addReg(SrcReg); 3584 updateValueMap(II, ResultReg); 3585 return true; 3586 } 3587 case Intrinsic::trap: 3588 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3589 .addImm(1); 3590 return true; 3591 case Intrinsic::debugtrap: 3592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::BRK)) 3593 .addImm(0xF000); 3594 return true; 3595 3596 case Intrinsic::sqrt: { 3597 Type *RetTy = II->getCalledFunction()->getReturnType(); 3598 3599 MVT VT; 3600 if (!isTypeLegal(RetTy, VT)) 3601 return false; 3602 3603 Register Op0Reg = getRegForValue(II->getOperand(0)); 3604 if (!Op0Reg) 3605 return false; 3606 3607 unsigned ResultReg = fastEmit_r(VT, VT, ISD::FSQRT, Op0Reg); 3608 if (!ResultReg) 3609 return false; 3610 3611 updateValueMap(II, ResultReg); 3612 return true; 3613 } 3614 case Intrinsic::sadd_with_overflow: 3615 case Intrinsic::uadd_with_overflow: 3616 case Intrinsic::ssub_with_overflow: 3617 case Intrinsic::usub_with_overflow: 3618 case Intrinsic::smul_with_overflow: 3619 case Intrinsic::umul_with_overflow: { 3620 // This implements the basic lowering of the xalu with overflow intrinsics. 3621 const Function *Callee = II->getCalledFunction(); 3622 auto *Ty = cast<StructType>(Callee->getReturnType()); 3623 Type *RetTy = Ty->getTypeAtIndex(0U); 3624 3625 MVT VT; 3626 if (!isTypeLegal(RetTy, VT)) 3627 return false; 3628 3629 if (VT != MVT::i32 && VT != MVT::i64) 3630 return false; 3631 3632 const Value *LHS = II->getArgOperand(0); 3633 const Value *RHS = II->getArgOperand(1); 3634 // Canonicalize immediate to the RHS. 3635 if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && II->isCommutative()) 3636 std::swap(LHS, RHS); 3637 3638 // Simplify multiplies. 3639 Intrinsic::ID IID = II->getIntrinsicID(); 3640 switch (IID) { 3641 default: 3642 break; 3643 case Intrinsic::smul_with_overflow: 3644 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3645 if (C->getValue() == 2) { 3646 IID = Intrinsic::sadd_with_overflow; 3647 RHS = LHS; 3648 } 3649 break; 3650 case Intrinsic::umul_with_overflow: 3651 if (const auto *C = dyn_cast<ConstantInt>(RHS)) 3652 if (C->getValue() == 2) { 3653 IID = Intrinsic::uadd_with_overflow; 3654 RHS = LHS; 3655 } 3656 break; 3657 } 3658 3659 unsigned ResultReg1 = 0, ResultReg2 = 0, MulReg = 0; 3660 AArch64CC::CondCode CC = AArch64CC::Invalid; 3661 switch (IID) { 3662 default: llvm_unreachable("Unexpected intrinsic!"); 3663 case Intrinsic::sadd_with_overflow: 3664 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3665 CC = AArch64CC::VS; 3666 break; 3667 case Intrinsic::uadd_with_overflow: 3668 ResultReg1 = emitAdd(VT, LHS, RHS, /*SetFlags=*/true); 3669 CC = AArch64CC::HS; 3670 break; 3671 case Intrinsic::ssub_with_overflow: 3672 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3673 CC = AArch64CC::VS; 3674 break; 3675 case Intrinsic::usub_with_overflow: 3676 ResultReg1 = emitSub(VT, LHS, RHS, /*SetFlags=*/true); 3677 CC = AArch64CC::LO; 3678 break; 3679 case Intrinsic::smul_with_overflow: { 3680 CC = AArch64CC::NE; 3681 Register LHSReg = getRegForValue(LHS); 3682 if (!LHSReg) 3683 return false; 3684 3685 Register RHSReg = getRegForValue(RHS); 3686 if (!RHSReg) 3687 return false; 3688 3689 if (VT == MVT::i32) { 3690 MulReg = emitSMULL_rr(MVT::i64, LHSReg, RHSReg); 3691 Register MulSubReg = 3692 fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3693 // cmp xreg, wreg, sxtw 3694 emitAddSub_rx(/*UseAdd=*/false, MVT::i64, MulReg, MulSubReg, 3695 AArch64_AM::SXTW, /*ShiftImm=*/0, /*SetFlags=*/true, 3696 /*WantResult=*/false); 3697 MulReg = MulSubReg; 3698 } else { 3699 assert(VT == MVT::i64 && "Unexpected value type."); 3700 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3701 // reused in the next instruction. 3702 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3703 unsigned SMULHReg = fastEmit_rr(VT, VT, ISD::MULHS, LHSReg, RHSReg); 3704 emitSubs_rs(VT, SMULHReg, MulReg, AArch64_AM::ASR, 63, 3705 /*WantResult=*/false); 3706 } 3707 break; 3708 } 3709 case Intrinsic::umul_with_overflow: { 3710 CC = AArch64CC::NE; 3711 Register LHSReg = getRegForValue(LHS); 3712 if (!LHSReg) 3713 return false; 3714 3715 Register RHSReg = getRegForValue(RHS); 3716 if (!RHSReg) 3717 return false; 3718 3719 if (VT == MVT::i32) { 3720 MulReg = emitUMULL_rr(MVT::i64, LHSReg, RHSReg); 3721 // tst xreg, #0xffffffff00000000 3722 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3723 TII.get(AArch64::ANDSXri), AArch64::XZR) 3724 .addReg(MulReg) 3725 .addImm(AArch64_AM::encodeLogicalImmediate(0xFFFFFFFF00000000, 64)); 3726 MulReg = fastEmitInst_extractsubreg(VT, MulReg, AArch64::sub_32); 3727 } else { 3728 assert(VT == MVT::i64 && "Unexpected value type."); 3729 // LHSReg and RHSReg cannot be killed by this Mul, since they are 3730 // reused in the next instruction. 3731 MulReg = emitMul_rr(VT, LHSReg, RHSReg); 3732 unsigned UMULHReg = fastEmit_rr(VT, VT, ISD::MULHU, LHSReg, RHSReg); 3733 emitSubs_rr(VT, AArch64::XZR, UMULHReg, /*WantResult=*/false); 3734 } 3735 break; 3736 } 3737 } 3738 3739 if (MulReg) { 3740 ResultReg1 = createResultReg(TLI.getRegClassFor(VT)); 3741 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3742 TII.get(TargetOpcode::COPY), ResultReg1).addReg(MulReg); 3743 } 3744 3745 if (!ResultReg1) 3746 return false; 3747 3748 ResultReg2 = fastEmitInst_rri(AArch64::CSINCWr, &AArch64::GPR32RegClass, 3749 AArch64::WZR, AArch64::WZR, 3750 getInvertedCondCode(CC)); 3751 (void)ResultReg2; 3752 assert((ResultReg1 + 1) == ResultReg2 && 3753 "Nonconsecutive result registers."); 3754 updateValueMap(II, ResultReg1, 2); 3755 return true; 3756 } 3757 } 3758 return false; 3759 } 3760 3761 bool AArch64FastISel::selectRet(const Instruction *I) { 3762 const ReturnInst *Ret = cast<ReturnInst>(I); 3763 const Function &F = *I->getParent()->getParent(); 3764 3765 if (!FuncInfo.CanLowerReturn) 3766 return false; 3767 3768 if (F.isVarArg()) 3769 return false; 3770 3771 if (TLI.supportSwiftError() && 3772 F.getAttributes().hasAttrSomewhere(Attribute::SwiftError)) 3773 return false; 3774 3775 if (TLI.supportSplitCSR(FuncInfo.MF)) 3776 return false; 3777 3778 // Build a list of return value registers. 3779 SmallVector<unsigned, 4> RetRegs; 3780 3781 if (Ret->getNumOperands() > 0) { 3782 CallingConv::ID CC = F.getCallingConv(); 3783 SmallVector<ISD::OutputArg, 4> Outs; 3784 GetReturnInfo(CC, F.getReturnType(), F.getAttributes(), Outs, TLI, DL); 3785 3786 // Analyze operands of the call, assigning locations to each operand. 3787 SmallVector<CCValAssign, 16> ValLocs; 3788 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext()); 3789 CCAssignFn *RetCC = CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS 3790 : RetCC_AArch64_AAPCS; 3791 CCInfo.AnalyzeReturn(Outs, RetCC); 3792 3793 // Only handle a single return value for now. 3794 if (ValLocs.size() != 1) 3795 return false; 3796 3797 CCValAssign &VA = ValLocs[0]; 3798 const Value *RV = Ret->getOperand(0); 3799 3800 // Don't bother handling odd stuff for now. 3801 if ((VA.getLocInfo() != CCValAssign::Full) && 3802 (VA.getLocInfo() != CCValAssign::BCvt)) 3803 return false; 3804 3805 // Only handle register returns for now. 3806 if (!VA.isRegLoc()) 3807 return false; 3808 3809 Register Reg = getRegForValue(RV); 3810 if (Reg == 0) 3811 return false; 3812 3813 unsigned SrcReg = Reg + VA.getValNo(); 3814 Register DestReg = VA.getLocReg(); 3815 // Avoid a cross-class copy. This is very unlikely. 3816 if (!MRI.getRegClass(SrcReg)->contains(DestReg)) 3817 return false; 3818 3819 EVT RVEVT = TLI.getValueType(DL, RV->getType()); 3820 if (!RVEVT.isSimple()) 3821 return false; 3822 3823 // Vectors (of > 1 lane) in big endian need tricky handling. 3824 if (RVEVT.isVector() && RVEVT.getVectorElementCount().isVector() && 3825 !Subtarget->isLittleEndian()) 3826 return false; 3827 3828 MVT RVVT = RVEVT.getSimpleVT(); 3829 if (RVVT == MVT::f128) 3830 return false; 3831 3832 MVT DestVT = VA.getValVT(); 3833 // Special handling for extended integers. 3834 if (RVVT != DestVT) { 3835 if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) 3836 return false; 3837 3838 if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) 3839 return false; 3840 3841 bool IsZExt = Outs[0].Flags.isZExt(); 3842 SrcReg = emitIntExt(RVVT, SrcReg, DestVT, IsZExt); 3843 if (SrcReg == 0) 3844 return false; 3845 } 3846 3847 // "Callee" (i.e. value producer) zero extends pointers at function 3848 // boundary. 3849 if (Subtarget->isTargetILP32() && RV->getType()->isPointerTy()) 3850 SrcReg = emitAnd_ri(MVT::i64, SrcReg, 0xffffffff); 3851 3852 // Make the copy. 3853 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3854 TII.get(TargetOpcode::COPY), DestReg).addReg(SrcReg); 3855 3856 // Add register to return instruction. 3857 RetRegs.push_back(VA.getLocReg()); 3858 } 3859 3860 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3861 TII.get(AArch64::RET_ReallyLR)); 3862 for (unsigned RetReg : RetRegs) 3863 MIB.addReg(RetReg, RegState::Implicit); 3864 return true; 3865 } 3866 3867 bool AArch64FastISel::selectTrunc(const Instruction *I) { 3868 Type *DestTy = I->getType(); 3869 Value *Op = I->getOperand(0); 3870 Type *SrcTy = Op->getType(); 3871 3872 EVT SrcEVT = TLI.getValueType(DL, SrcTy, true); 3873 EVT DestEVT = TLI.getValueType(DL, DestTy, true); 3874 if (!SrcEVT.isSimple()) 3875 return false; 3876 if (!DestEVT.isSimple()) 3877 return false; 3878 3879 MVT SrcVT = SrcEVT.getSimpleVT(); 3880 MVT DestVT = DestEVT.getSimpleVT(); 3881 3882 if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16 && 3883 SrcVT != MVT::i8) 3884 return false; 3885 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8 && 3886 DestVT != MVT::i1) 3887 return false; 3888 3889 Register SrcReg = getRegForValue(Op); 3890 if (!SrcReg) 3891 return false; 3892 3893 // If we're truncating from i64 to a smaller non-legal type then generate an 3894 // AND. Otherwise, we know the high bits are undefined and a truncate only 3895 // generate a COPY. We cannot mark the source register also as result 3896 // register, because this can incorrectly transfer the kill flag onto the 3897 // source register. 3898 unsigned ResultReg; 3899 if (SrcVT == MVT::i64) { 3900 uint64_t Mask = 0; 3901 switch (DestVT.SimpleTy) { 3902 default: 3903 // Trunc i64 to i32 is handled by the target-independent fast-isel. 3904 return false; 3905 case MVT::i1: 3906 Mask = 0x1; 3907 break; 3908 case MVT::i8: 3909 Mask = 0xff; 3910 break; 3911 case MVT::i16: 3912 Mask = 0xffff; 3913 break; 3914 } 3915 // Issue an extract_subreg to get the lower 32-bits. 3916 Register Reg32 = fastEmitInst_extractsubreg(MVT::i32, SrcReg, 3917 AArch64::sub_32); 3918 // Create the AND instruction which performs the actual truncation. 3919 ResultReg = emitAnd_ri(MVT::i32, Reg32, Mask); 3920 assert(ResultReg && "Unexpected AND instruction emission failure."); 3921 } else { 3922 ResultReg = createResultReg(&AArch64::GPR32RegClass); 3923 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3924 TII.get(TargetOpcode::COPY), ResultReg) 3925 .addReg(SrcReg); 3926 } 3927 3928 updateValueMap(I, ResultReg); 3929 return true; 3930 } 3931 3932 unsigned AArch64FastISel::emiti1Ext(unsigned SrcReg, MVT DestVT, bool IsZExt) { 3933 assert((DestVT == MVT::i8 || DestVT == MVT::i16 || DestVT == MVT::i32 || 3934 DestVT == MVT::i64) && 3935 "Unexpected value type."); 3936 // Handle i8 and i16 as i32. 3937 if (DestVT == MVT::i8 || DestVT == MVT::i16) 3938 DestVT = MVT::i32; 3939 3940 if (IsZExt) { 3941 unsigned ResultReg = emitAnd_ri(MVT::i32, SrcReg, 1); 3942 assert(ResultReg && "Unexpected AND instruction emission failure."); 3943 if (DestVT == MVT::i64) { 3944 // We're ZExt i1 to i64. The ANDWri Wd, Ws, #1 implicitly clears the 3945 // upper 32 bits. Emit a SUBREG_TO_REG to extend from Wd to Xd. 3946 Register Reg64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3947 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 3948 TII.get(AArch64::SUBREG_TO_REG), Reg64) 3949 .addImm(0) 3950 .addReg(ResultReg) 3951 .addImm(AArch64::sub_32); 3952 ResultReg = Reg64; 3953 } 3954 return ResultReg; 3955 } else { 3956 if (DestVT == MVT::i64) { 3957 // FIXME: We're SExt i1 to i64. 3958 return 0; 3959 } 3960 return fastEmitInst_rii(AArch64::SBFMWri, &AArch64::GPR32RegClass, SrcReg, 3961 0, 0); 3962 } 3963 } 3964 3965 unsigned AArch64FastISel::emitMul_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 3966 unsigned Opc, ZReg; 3967 switch (RetVT.SimpleTy) { 3968 default: return 0; 3969 case MVT::i8: 3970 case MVT::i16: 3971 case MVT::i32: 3972 RetVT = MVT::i32; 3973 Opc = AArch64::MADDWrrr; ZReg = AArch64::WZR; break; 3974 case MVT::i64: 3975 Opc = AArch64::MADDXrrr; ZReg = AArch64::XZR; break; 3976 } 3977 3978 const TargetRegisterClass *RC = 3979 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 3980 return fastEmitInst_rrr(Opc, RC, Op0, Op1, ZReg); 3981 } 3982 3983 unsigned AArch64FastISel::emitSMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 3984 if (RetVT != MVT::i64) 3985 return 0; 3986 3987 return fastEmitInst_rrr(AArch64::SMADDLrrr, &AArch64::GPR64RegClass, 3988 Op0, Op1, AArch64::XZR); 3989 } 3990 3991 unsigned AArch64FastISel::emitUMULL_rr(MVT RetVT, unsigned Op0, unsigned Op1) { 3992 if (RetVT != MVT::i64) 3993 return 0; 3994 3995 return fastEmitInst_rrr(AArch64::UMADDLrrr, &AArch64::GPR64RegClass, 3996 Op0, Op1, AArch64::XZR); 3997 } 3998 3999 unsigned AArch64FastISel::emitLSL_rr(MVT RetVT, unsigned Op0Reg, 4000 unsigned Op1Reg) { 4001 unsigned Opc = 0; 4002 bool NeedTrunc = false; 4003 uint64_t Mask = 0; 4004 switch (RetVT.SimpleTy) { 4005 default: return 0; 4006 case MVT::i8: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xff; break; 4007 case MVT::i16: Opc = AArch64::LSLVWr; NeedTrunc = true; Mask = 0xffff; break; 4008 case MVT::i32: Opc = AArch64::LSLVWr; break; 4009 case MVT::i64: Opc = AArch64::LSLVXr; break; 4010 } 4011 4012 const TargetRegisterClass *RC = 4013 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4014 if (NeedTrunc) 4015 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4016 4017 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4018 if (NeedTrunc) 4019 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4020 return ResultReg; 4021 } 4022 4023 unsigned AArch64FastISel::emitLSL_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4024 uint64_t Shift, bool IsZExt) { 4025 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4026 "Unexpected source/return type pair."); 4027 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4028 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4029 "Unexpected source value type."); 4030 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4031 RetVT == MVT::i64) && "Unexpected return value type."); 4032 4033 bool Is64Bit = (RetVT == MVT::i64); 4034 unsigned RegSize = Is64Bit ? 64 : 32; 4035 unsigned DstBits = RetVT.getSizeInBits(); 4036 unsigned SrcBits = SrcVT.getSizeInBits(); 4037 const TargetRegisterClass *RC = 4038 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4039 4040 // Just emit a copy for "zero" shifts. 4041 if (Shift == 0) { 4042 if (RetVT == SrcVT) { 4043 Register ResultReg = createResultReg(RC); 4044 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4045 TII.get(TargetOpcode::COPY), ResultReg) 4046 .addReg(Op0); 4047 return ResultReg; 4048 } else 4049 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4050 } 4051 4052 // Don't deal with undefined shifts. 4053 if (Shift >= DstBits) 4054 return 0; 4055 4056 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4057 // {S|U}BFM Wd, Wn, #r, #s 4058 // Wd<32+s-r,32-r> = Wn<s:0> when r > s 4059 4060 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4061 // %2 = shl i16 %1, 4 4062 // Wd<32+7-28,32-28> = Wn<7:0> <- clamp s to 7 4063 // 0b1111_1111_1111_1111__1111_1010_1010_0000 sext 4064 // 0b0000_0000_0000_0000__0000_0101_0101_0000 sext | zext 4065 // 0b0000_0000_0000_0000__0000_1010_1010_0000 zext 4066 4067 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4068 // %2 = shl i16 %1, 8 4069 // Wd<32+7-24,32-24> = Wn<7:0> 4070 // 0b1111_1111_1111_1111__1010_1010_0000_0000 sext 4071 // 0b0000_0000_0000_0000__0101_0101_0000_0000 sext | zext 4072 // 0b0000_0000_0000_0000__1010_1010_0000_0000 zext 4073 4074 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4075 // %2 = shl i16 %1, 12 4076 // Wd<32+3-20,32-20> = Wn<3:0> 4077 // 0b1111_1111_1111_1111__1010_0000_0000_0000 sext 4078 // 0b0000_0000_0000_0000__0101_0000_0000_0000 sext | zext 4079 // 0b0000_0000_0000_0000__1010_0000_0000_0000 zext 4080 4081 unsigned ImmR = RegSize - Shift; 4082 // Limit the width to the length of the source type. 4083 unsigned ImmS = std::min<unsigned>(SrcBits - 1, DstBits - 1 - Shift); 4084 static const unsigned OpcTable[2][2] = { 4085 {AArch64::SBFMWri, AArch64::SBFMXri}, 4086 {AArch64::UBFMWri, AArch64::UBFMXri} 4087 }; 4088 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4089 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4090 Register TmpReg = MRI.createVirtualRegister(RC); 4091 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4092 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4093 .addImm(0) 4094 .addReg(Op0) 4095 .addImm(AArch64::sub_32); 4096 Op0 = TmpReg; 4097 } 4098 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4099 } 4100 4101 unsigned AArch64FastISel::emitLSR_rr(MVT RetVT, unsigned Op0Reg, 4102 unsigned Op1Reg) { 4103 unsigned Opc = 0; 4104 bool NeedTrunc = false; 4105 uint64_t Mask = 0; 4106 switch (RetVT.SimpleTy) { 4107 default: return 0; 4108 case MVT::i8: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xff; break; 4109 case MVT::i16: Opc = AArch64::LSRVWr; NeedTrunc = true; Mask = 0xffff; break; 4110 case MVT::i32: Opc = AArch64::LSRVWr; break; 4111 case MVT::i64: Opc = AArch64::LSRVXr; break; 4112 } 4113 4114 const TargetRegisterClass *RC = 4115 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4116 if (NeedTrunc) { 4117 Op0Reg = emitAnd_ri(MVT::i32, Op0Reg, Mask); 4118 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4119 } 4120 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4121 if (NeedTrunc) 4122 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4123 return ResultReg; 4124 } 4125 4126 unsigned AArch64FastISel::emitLSR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4127 uint64_t Shift, bool IsZExt) { 4128 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4129 "Unexpected source/return type pair."); 4130 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4131 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4132 "Unexpected source value type."); 4133 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4134 RetVT == MVT::i64) && "Unexpected return value type."); 4135 4136 bool Is64Bit = (RetVT == MVT::i64); 4137 unsigned RegSize = Is64Bit ? 64 : 32; 4138 unsigned DstBits = RetVT.getSizeInBits(); 4139 unsigned SrcBits = SrcVT.getSizeInBits(); 4140 const TargetRegisterClass *RC = 4141 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4142 4143 // Just emit a copy for "zero" shifts. 4144 if (Shift == 0) { 4145 if (RetVT == SrcVT) { 4146 Register ResultReg = createResultReg(RC); 4147 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4148 TII.get(TargetOpcode::COPY), ResultReg) 4149 .addReg(Op0); 4150 return ResultReg; 4151 } else 4152 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4153 } 4154 4155 // Don't deal with undefined shifts. 4156 if (Shift >= DstBits) 4157 return 0; 4158 4159 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4160 // {S|U}BFM Wd, Wn, #r, #s 4161 // Wd<s-r:0> = Wn<s:r> when r <= s 4162 4163 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4164 // %2 = lshr i16 %1, 4 4165 // Wd<7-4:0> = Wn<7:4> 4166 // 0b0000_0000_0000_0000__0000_1111_1111_1010 sext 4167 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4168 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4169 4170 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4171 // %2 = lshr i16 %1, 8 4172 // Wd<7-7,0> = Wn<7:7> 4173 // 0b0000_0000_0000_0000__0000_0000_1111_1111 sext 4174 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4175 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4176 4177 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4178 // %2 = lshr i16 %1, 12 4179 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4180 // 0b0000_0000_0000_0000__0000_0000_0000_1111 sext 4181 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4182 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4183 4184 if (Shift >= SrcBits && IsZExt) 4185 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4186 4187 // It is not possible to fold a sign-extend into the LShr instruction. In this 4188 // case emit a sign-extend. 4189 if (!IsZExt) { 4190 Op0 = emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4191 if (!Op0) 4192 return 0; 4193 SrcVT = RetVT; 4194 SrcBits = SrcVT.getSizeInBits(); 4195 IsZExt = true; 4196 } 4197 4198 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4199 unsigned ImmS = SrcBits - 1; 4200 static const unsigned OpcTable[2][2] = { 4201 {AArch64::SBFMWri, AArch64::SBFMXri}, 4202 {AArch64::UBFMWri, AArch64::UBFMXri} 4203 }; 4204 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4205 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4206 Register TmpReg = MRI.createVirtualRegister(RC); 4207 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4208 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4209 .addImm(0) 4210 .addReg(Op0) 4211 .addImm(AArch64::sub_32); 4212 Op0 = TmpReg; 4213 } 4214 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4215 } 4216 4217 unsigned AArch64FastISel::emitASR_rr(MVT RetVT, unsigned Op0Reg, 4218 unsigned Op1Reg) { 4219 unsigned Opc = 0; 4220 bool NeedTrunc = false; 4221 uint64_t Mask = 0; 4222 switch (RetVT.SimpleTy) { 4223 default: return 0; 4224 case MVT::i8: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xff; break; 4225 case MVT::i16: Opc = AArch64::ASRVWr; NeedTrunc = true; Mask = 0xffff; break; 4226 case MVT::i32: Opc = AArch64::ASRVWr; break; 4227 case MVT::i64: Opc = AArch64::ASRVXr; break; 4228 } 4229 4230 const TargetRegisterClass *RC = 4231 (RetVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4232 if (NeedTrunc) { 4233 Op0Reg = emitIntExt(RetVT, Op0Reg, MVT::i32, /*isZExt=*/false); 4234 Op1Reg = emitAnd_ri(MVT::i32, Op1Reg, Mask); 4235 } 4236 Register ResultReg = fastEmitInst_rr(Opc, RC, Op0Reg, Op1Reg); 4237 if (NeedTrunc) 4238 ResultReg = emitAnd_ri(MVT::i32, ResultReg, Mask); 4239 return ResultReg; 4240 } 4241 4242 unsigned AArch64FastISel::emitASR_ri(MVT RetVT, MVT SrcVT, unsigned Op0, 4243 uint64_t Shift, bool IsZExt) { 4244 assert(RetVT.SimpleTy >= SrcVT.SimpleTy && 4245 "Unexpected source/return type pair."); 4246 assert((SrcVT == MVT::i1 || SrcVT == MVT::i8 || SrcVT == MVT::i16 || 4247 SrcVT == MVT::i32 || SrcVT == MVT::i64) && 4248 "Unexpected source value type."); 4249 assert((RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32 || 4250 RetVT == MVT::i64) && "Unexpected return value type."); 4251 4252 bool Is64Bit = (RetVT == MVT::i64); 4253 unsigned RegSize = Is64Bit ? 64 : 32; 4254 unsigned DstBits = RetVT.getSizeInBits(); 4255 unsigned SrcBits = SrcVT.getSizeInBits(); 4256 const TargetRegisterClass *RC = 4257 Is64Bit ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4258 4259 // Just emit a copy for "zero" shifts. 4260 if (Shift == 0) { 4261 if (RetVT == SrcVT) { 4262 Register ResultReg = createResultReg(RC); 4263 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4264 TII.get(TargetOpcode::COPY), ResultReg) 4265 .addReg(Op0); 4266 return ResultReg; 4267 } else 4268 return emitIntExt(SrcVT, Op0, RetVT, IsZExt); 4269 } 4270 4271 // Don't deal with undefined shifts. 4272 if (Shift >= DstBits) 4273 return 0; 4274 4275 // For immediate shifts we can fold the zero-/sign-extension into the shift. 4276 // {S|U}BFM Wd, Wn, #r, #s 4277 // Wd<s-r:0> = Wn<s:r> when r <= s 4278 4279 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4280 // %2 = ashr i16 %1, 4 4281 // Wd<7-4:0> = Wn<7:4> 4282 // 0b1111_1111_1111_1111__1111_1111_1111_1010 sext 4283 // 0b0000_0000_0000_0000__0000_0000_0000_0101 sext | zext 4284 // 0b0000_0000_0000_0000__0000_0000_0000_1010 zext 4285 4286 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4287 // %2 = ashr i16 %1, 8 4288 // Wd<7-7,0> = Wn<7:7> 4289 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4290 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4291 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4292 4293 // %1 = {s|z}ext i8 {0b1010_1010|0b0101_0101} to i16 4294 // %2 = ashr i16 %1, 12 4295 // Wd<7-7,0> = Wn<7:7> <- clamp r to 7 4296 // 0b1111_1111_1111_1111__1111_1111_1111_1111 sext 4297 // 0b0000_0000_0000_0000__0000_0000_0000_0000 sext 4298 // 0b0000_0000_0000_0000__0000_0000_0000_0000 zext 4299 4300 if (Shift >= SrcBits && IsZExt) 4301 return materializeInt(ConstantInt::get(*Context, APInt(RegSize, 0)), RetVT); 4302 4303 unsigned ImmR = std::min<unsigned>(SrcBits - 1, Shift); 4304 unsigned ImmS = SrcBits - 1; 4305 static const unsigned OpcTable[2][2] = { 4306 {AArch64::SBFMWri, AArch64::SBFMXri}, 4307 {AArch64::UBFMWri, AArch64::UBFMXri} 4308 }; 4309 unsigned Opc = OpcTable[IsZExt][Is64Bit]; 4310 if (SrcVT.SimpleTy <= MVT::i32 && RetVT == MVT::i64) { 4311 Register TmpReg = MRI.createVirtualRegister(RC); 4312 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4313 TII.get(AArch64::SUBREG_TO_REG), TmpReg) 4314 .addImm(0) 4315 .addReg(Op0) 4316 .addImm(AArch64::sub_32); 4317 Op0 = TmpReg; 4318 } 4319 return fastEmitInst_rii(Opc, RC, Op0, ImmR, ImmS); 4320 } 4321 4322 unsigned AArch64FastISel::emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, 4323 bool IsZExt) { 4324 assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); 4325 4326 // FastISel does not have plumbing to deal with extensions where the SrcVT or 4327 // DestVT are odd things, so test to make sure that they are both types we can 4328 // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise 4329 // bail out to SelectionDAG. 4330 if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && 4331 (DestVT != MVT::i32) && (DestVT != MVT::i64)) || 4332 ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && 4333 (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) 4334 return 0; 4335 4336 unsigned Opc; 4337 unsigned Imm = 0; 4338 4339 switch (SrcVT.SimpleTy) { 4340 default: 4341 return 0; 4342 case MVT::i1: 4343 return emiti1Ext(SrcReg, DestVT, IsZExt); 4344 case MVT::i8: 4345 if (DestVT == MVT::i64) 4346 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4347 else 4348 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4349 Imm = 7; 4350 break; 4351 case MVT::i16: 4352 if (DestVT == MVT::i64) 4353 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4354 else 4355 Opc = IsZExt ? AArch64::UBFMWri : AArch64::SBFMWri; 4356 Imm = 15; 4357 break; 4358 case MVT::i32: 4359 assert(DestVT == MVT::i64 && "IntExt i32 to i32?!?"); 4360 Opc = IsZExt ? AArch64::UBFMXri : AArch64::SBFMXri; 4361 Imm = 31; 4362 break; 4363 } 4364 4365 // Handle i8 and i16 as i32. 4366 if (DestVT == MVT::i8 || DestVT == MVT::i16) 4367 DestVT = MVT::i32; 4368 else if (DestVT == MVT::i64) { 4369 Register Src64 = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 4370 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4371 TII.get(AArch64::SUBREG_TO_REG), Src64) 4372 .addImm(0) 4373 .addReg(SrcReg) 4374 .addImm(AArch64::sub_32); 4375 SrcReg = Src64; 4376 } 4377 4378 const TargetRegisterClass *RC = 4379 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4380 return fastEmitInst_rii(Opc, RC, SrcReg, 0, Imm); 4381 } 4382 4383 static bool isZExtLoad(const MachineInstr *LI) { 4384 switch (LI->getOpcode()) { 4385 default: 4386 return false; 4387 case AArch64::LDURBBi: 4388 case AArch64::LDURHHi: 4389 case AArch64::LDURWi: 4390 case AArch64::LDRBBui: 4391 case AArch64::LDRHHui: 4392 case AArch64::LDRWui: 4393 case AArch64::LDRBBroX: 4394 case AArch64::LDRHHroX: 4395 case AArch64::LDRWroX: 4396 case AArch64::LDRBBroW: 4397 case AArch64::LDRHHroW: 4398 case AArch64::LDRWroW: 4399 return true; 4400 } 4401 } 4402 4403 static bool isSExtLoad(const MachineInstr *LI) { 4404 switch (LI->getOpcode()) { 4405 default: 4406 return false; 4407 case AArch64::LDURSBWi: 4408 case AArch64::LDURSHWi: 4409 case AArch64::LDURSBXi: 4410 case AArch64::LDURSHXi: 4411 case AArch64::LDURSWi: 4412 case AArch64::LDRSBWui: 4413 case AArch64::LDRSHWui: 4414 case AArch64::LDRSBXui: 4415 case AArch64::LDRSHXui: 4416 case AArch64::LDRSWui: 4417 case AArch64::LDRSBWroX: 4418 case AArch64::LDRSHWroX: 4419 case AArch64::LDRSBXroX: 4420 case AArch64::LDRSHXroX: 4421 case AArch64::LDRSWroX: 4422 case AArch64::LDRSBWroW: 4423 case AArch64::LDRSHWroW: 4424 case AArch64::LDRSBXroW: 4425 case AArch64::LDRSHXroW: 4426 case AArch64::LDRSWroW: 4427 return true; 4428 } 4429 } 4430 4431 bool AArch64FastISel::optimizeIntExtLoad(const Instruction *I, MVT RetVT, 4432 MVT SrcVT) { 4433 const auto *LI = dyn_cast<LoadInst>(I->getOperand(0)); 4434 if (!LI || !LI->hasOneUse()) 4435 return false; 4436 4437 // Check if the load instruction has already been selected. 4438 Register Reg = lookUpRegForValue(LI); 4439 if (!Reg) 4440 return false; 4441 4442 MachineInstr *MI = MRI.getUniqueVRegDef(Reg); 4443 if (!MI) 4444 return false; 4445 4446 // Check if the correct load instruction has been emitted - SelectionDAG might 4447 // have emitted a zero-extending load, but we need a sign-extending load. 4448 bool IsZExt = isa<ZExtInst>(I); 4449 const auto *LoadMI = MI; 4450 if (LoadMI->getOpcode() == TargetOpcode::COPY && 4451 LoadMI->getOperand(1).getSubReg() == AArch64::sub_32) { 4452 Register LoadReg = MI->getOperand(1).getReg(); 4453 LoadMI = MRI.getUniqueVRegDef(LoadReg); 4454 assert(LoadMI && "Expected valid instruction"); 4455 } 4456 if (!(IsZExt && isZExtLoad(LoadMI)) && !(!IsZExt && isSExtLoad(LoadMI))) 4457 return false; 4458 4459 // Nothing to be done. 4460 if (RetVT != MVT::i64 || SrcVT > MVT::i32) { 4461 updateValueMap(I, Reg); 4462 return true; 4463 } 4464 4465 if (IsZExt) { 4466 Register Reg64 = createResultReg(&AArch64::GPR64RegClass); 4467 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4468 TII.get(AArch64::SUBREG_TO_REG), Reg64) 4469 .addImm(0) 4470 .addReg(Reg, getKillRegState(true)) 4471 .addImm(AArch64::sub_32); 4472 Reg = Reg64; 4473 } else { 4474 assert((MI->getOpcode() == TargetOpcode::COPY && 4475 MI->getOperand(1).getSubReg() == AArch64::sub_32) && 4476 "Expected copy instruction"); 4477 Reg = MI->getOperand(1).getReg(); 4478 MachineBasicBlock::iterator I(MI); 4479 removeDeadCode(I, std::next(I)); 4480 } 4481 updateValueMap(I, Reg); 4482 return true; 4483 } 4484 4485 bool AArch64FastISel::selectIntExt(const Instruction *I) { 4486 assert((isa<ZExtInst>(I) || isa<SExtInst>(I)) && 4487 "Unexpected integer extend instruction."); 4488 MVT RetVT; 4489 MVT SrcVT; 4490 if (!isTypeSupported(I->getType(), RetVT)) 4491 return false; 4492 4493 if (!isTypeSupported(I->getOperand(0)->getType(), SrcVT)) 4494 return false; 4495 4496 // Try to optimize already sign-/zero-extended values from load instructions. 4497 if (optimizeIntExtLoad(I, RetVT, SrcVT)) 4498 return true; 4499 4500 Register SrcReg = getRegForValue(I->getOperand(0)); 4501 if (!SrcReg) 4502 return false; 4503 4504 // Try to optimize already sign-/zero-extended values from function arguments. 4505 bool IsZExt = isa<ZExtInst>(I); 4506 if (const auto *Arg = dyn_cast<Argument>(I->getOperand(0))) { 4507 if ((IsZExt && Arg->hasZExtAttr()) || (!IsZExt && Arg->hasSExtAttr())) { 4508 if (RetVT == MVT::i64 && SrcVT != MVT::i64) { 4509 Register ResultReg = createResultReg(&AArch64::GPR64RegClass); 4510 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, 4511 TII.get(AArch64::SUBREG_TO_REG), ResultReg) 4512 .addImm(0) 4513 .addReg(SrcReg) 4514 .addImm(AArch64::sub_32); 4515 SrcReg = ResultReg; 4516 } 4517 4518 updateValueMap(I, SrcReg); 4519 return true; 4520 } 4521 } 4522 4523 unsigned ResultReg = emitIntExt(SrcVT, SrcReg, RetVT, IsZExt); 4524 if (!ResultReg) 4525 return false; 4526 4527 updateValueMap(I, ResultReg); 4528 return true; 4529 } 4530 4531 bool AArch64FastISel::selectRem(const Instruction *I, unsigned ISDOpcode) { 4532 EVT DestEVT = TLI.getValueType(DL, I->getType(), true); 4533 if (!DestEVT.isSimple()) 4534 return false; 4535 4536 MVT DestVT = DestEVT.getSimpleVT(); 4537 if (DestVT != MVT::i64 && DestVT != MVT::i32) 4538 return false; 4539 4540 unsigned DivOpc; 4541 bool Is64bit = (DestVT == MVT::i64); 4542 switch (ISDOpcode) { 4543 default: 4544 return false; 4545 case ISD::SREM: 4546 DivOpc = Is64bit ? AArch64::SDIVXr : AArch64::SDIVWr; 4547 break; 4548 case ISD::UREM: 4549 DivOpc = Is64bit ? AArch64::UDIVXr : AArch64::UDIVWr; 4550 break; 4551 } 4552 unsigned MSubOpc = Is64bit ? AArch64::MSUBXrrr : AArch64::MSUBWrrr; 4553 Register Src0Reg = getRegForValue(I->getOperand(0)); 4554 if (!Src0Reg) 4555 return false; 4556 4557 Register Src1Reg = getRegForValue(I->getOperand(1)); 4558 if (!Src1Reg) 4559 return false; 4560 4561 const TargetRegisterClass *RC = 4562 (DestVT == MVT::i64) ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass; 4563 Register QuotReg = fastEmitInst_rr(DivOpc, RC, Src0Reg, Src1Reg); 4564 assert(QuotReg && "Unexpected DIV instruction emission failure."); 4565 // The remainder is computed as numerator - (quotient * denominator) using the 4566 // MSUB instruction. 4567 Register ResultReg = fastEmitInst_rrr(MSubOpc, RC, QuotReg, Src1Reg, Src0Reg); 4568 updateValueMap(I, ResultReg); 4569 return true; 4570 } 4571 4572 bool AArch64FastISel::selectMul(const Instruction *I) { 4573 MVT VT; 4574 if (!isTypeSupported(I->getType(), VT, /*IsVectorAllowed=*/true)) 4575 return false; 4576 4577 if (VT.isVector()) 4578 return selectBinaryOp(I, ISD::MUL); 4579 4580 const Value *Src0 = I->getOperand(0); 4581 const Value *Src1 = I->getOperand(1); 4582 if (const auto *C = dyn_cast<ConstantInt>(Src0)) 4583 if (C->getValue().isPowerOf2()) 4584 std::swap(Src0, Src1); 4585 4586 // Try to simplify to a shift instruction. 4587 if (const auto *C = dyn_cast<ConstantInt>(Src1)) 4588 if (C->getValue().isPowerOf2()) { 4589 uint64_t ShiftVal = C->getValue().logBase2(); 4590 MVT SrcVT = VT; 4591 bool IsZExt = true; 4592 if (const auto *ZExt = dyn_cast<ZExtInst>(Src0)) { 4593 if (!isIntExtFree(ZExt)) { 4594 MVT VT; 4595 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), VT)) { 4596 SrcVT = VT; 4597 IsZExt = true; 4598 Src0 = ZExt->getOperand(0); 4599 } 4600 } 4601 } else if (const auto *SExt = dyn_cast<SExtInst>(Src0)) { 4602 if (!isIntExtFree(SExt)) { 4603 MVT VT; 4604 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), VT)) { 4605 SrcVT = VT; 4606 IsZExt = false; 4607 Src0 = SExt->getOperand(0); 4608 } 4609 } 4610 } 4611 4612 Register Src0Reg = getRegForValue(Src0); 4613 if (!Src0Reg) 4614 return false; 4615 4616 unsigned ResultReg = 4617 emitLSL_ri(VT, SrcVT, Src0Reg, ShiftVal, IsZExt); 4618 4619 if (ResultReg) { 4620 updateValueMap(I, ResultReg); 4621 return true; 4622 } 4623 } 4624 4625 Register Src0Reg = getRegForValue(I->getOperand(0)); 4626 if (!Src0Reg) 4627 return false; 4628 4629 Register Src1Reg = getRegForValue(I->getOperand(1)); 4630 if (!Src1Reg) 4631 return false; 4632 4633 unsigned ResultReg = emitMul_rr(VT, Src0Reg, Src1Reg); 4634 4635 if (!ResultReg) 4636 return false; 4637 4638 updateValueMap(I, ResultReg); 4639 return true; 4640 } 4641 4642 bool AArch64FastISel::selectShift(const Instruction *I) { 4643 MVT RetVT; 4644 if (!isTypeSupported(I->getType(), RetVT, /*IsVectorAllowed=*/true)) 4645 return false; 4646 4647 if (RetVT.isVector()) 4648 return selectOperator(I, I->getOpcode()); 4649 4650 if (const auto *C = dyn_cast<ConstantInt>(I->getOperand(1))) { 4651 unsigned ResultReg = 0; 4652 uint64_t ShiftVal = C->getZExtValue(); 4653 MVT SrcVT = RetVT; 4654 bool IsZExt = I->getOpcode() != Instruction::AShr; 4655 const Value *Op0 = I->getOperand(0); 4656 if (const auto *ZExt = dyn_cast<ZExtInst>(Op0)) { 4657 if (!isIntExtFree(ZExt)) { 4658 MVT TmpVT; 4659 if (isValueAvailable(ZExt) && isTypeSupported(ZExt->getSrcTy(), TmpVT)) { 4660 SrcVT = TmpVT; 4661 IsZExt = true; 4662 Op0 = ZExt->getOperand(0); 4663 } 4664 } 4665 } else if (const auto *SExt = dyn_cast<SExtInst>(Op0)) { 4666 if (!isIntExtFree(SExt)) { 4667 MVT TmpVT; 4668 if (isValueAvailable(SExt) && isTypeSupported(SExt->getSrcTy(), TmpVT)) { 4669 SrcVT = TmpVT; 4670 IsZExt = false; 4671 Op0 = SExt->getOperand(0); 4672 } 4673 } 4674 } 4675 4676 Register Op0Reg = getRegForValue(Op0); 4677 if (!Op0Reg) 4678 return false; 4679 4680 switch (I->getOpcode()) { 4681 default: llvm_unreachable("Unexpected instruction."); 4682 case Instruction::Shl: 4683 ResultReg = emitLSL_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4684 break; 4685 case Instruction::AShr: 4686 ResultReg = emitASR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4687 break; 4688 case Instruction::LShr: 4689 ResultReg = emitLSR_ri(RetVT, SrcVT, Op0Reg, ShiftVal, IsZExt); 4690 break; 4691 } 4692 if (!ResultReg) 4693 return false; 4694 4695 updateValueMap(I, ResultReg); 4696 return true; 4697 } 4698 4699 Register Op0Reg = getRegForValue(I->getOperand(0)); 4700 if (!Op0Reg) 4701 return false; 4702 4703 Register Op1Reg = getRegForValue(I->getOperand(1)); 4704 if (!Op1Reg) 4705 return false; 4706 4707 unsigned ResultReg = 0; 4708 switch (I->getOpcode()) { 4709 default: llvm_unreachable("Unexpected instruction."); 4710 case Instruction::Shl: 4711 ResultReg = emitLSL_rr(RetVT, Op0Reg, Op1Reg); 4712 break; 4713 case Instruction::AShr: 4714 ResultReg = emitASR_rr(RetVT, Op0Reg, Op1Reg); 4715 break; 4716 case Instruction::LShr: 4717 ResultReg = emitLSR_rr(RetVT, Op0Reg, Op1Reg); 4718 break; 4719 } 4720 4721 if (!ResultReg) 4722 return false; 4723 4724 updateValueMap(I, ResultReg); 4725 return true; 4726 } 4727 4728 bool AArch64FastISel::selectBitCast(const Instruction *I) { 4729 MVT RetVT, SrcVT; 4730 4731 if (!isTypeLegal(I->getOperand(0)->getType(), SrcVT)) 4732 return false; 4733 if (!isTypeLegal(I->getType(), RetVT)) 4734 return false; 4735 4736 unsigned Opc; 4737 if (RetVT == MVT::f32 && SrcVT == MVT::i32) 4738 Opc = AArch64::FMOVWSr; 4739 else if (RetVT == MVT::f64 && SrcVT == MVT::i64) 4740 Opc = AArch64::FMOVXDr; 4741 else if (RetVT == MVT::i32 && SrcVT == MVT::f32) 4742 Opc = AArch64::FMOVSWr; 4743 else if (RetVT == MVT::i64 && SrcVT == MVT::f64) 4744 Opc = AArch64::FMOVDXr; 4745 else 4746 return false; 4747 4748 const TargetRegisterClass *RC = nullptr; 4749 switch (RetVT.SimpleTy) { 4750 default: llvm_unreachable("Unexpected value type."); 4751 case MVT::i32: RC = &AArch64::GPR32RegClass; break; 4752 case MVT::i64: RC = &AArch64::GPR64RegClass; break; 4753 case MVT::f32: RC = &AArch64::FPR32RegClass; break; 4754 case MVT::f64: RC = &AArch64::FPR64RegClass; break; 4755 } 4756 Register Op0Reg = getRegForValue(I->getOperand(0)); 4757 if (!Op0Reg) 4758 return false; 4759 4760 Register ResultReg = fastEmitInst_r(Opc, RC, Op0Reg); 4761 if (!ResultReg) 4762 return false; 4763 4764 updateValueMap(I, ResultReg); 4765 return true; 4766 } 4767 4768 bool AArch64FastISel::selectFRem(const Instruction *I) { 4769 MVT RetVT; 4770 if (!isTypeLegal(I->getType(), RetVT)) 4771 return false; 4772 4773 RTLIB::Libcall LC; 4774 switch (RetVT.SimpleTy) { 4775 default: 4776 return false; 4777 case MVT::f32: 4778 LC = RTLIB::REM_F32; 4779 break; 4780 case MVT::f64: 4781 LC = RTLIB::REM_F64; 4782 break; 4783 } 4784 4785 ArgListTy Args; 4786 Args.reserve(I->getNumOperands()); 4787 4788 // Populate the argument list. 4789 for (auto &Arg : I->operands()) { 4790 ArgListEntry Entry; 4791 Entry.Val = Arg; 4792 Entry.Ty = Arg->getType(); 4793 Args.push_back(Entry); 4794 } 4795 4796 CallLoweringInfo CLI; 4797 MCContext &Ctx = MF->getContext(); 4798 CLI.setCallee(DL, Ctx, TLI.getLibcallCallingConv(LC), I->getType(), 4799 TLI.getLibcallName(LC), std::move(Args)); 4800 if (!lowerCallTo(CLI)) 4801 return false; 4802 updateValueMap(I, CLI.ResultReg); 4803 return true; 4804 } 4805 4806 bool AArch64FastISel::selectSDiv(const Instruction *I) { 4807 MVT VT; 4808 if (!isTypeLegal(I->getType(), VT)) 4809 return false; 4810 4811 if (!isa<ConstantInt>(I->getOperand(1))) 4812 return selectBinaryOp(I, ISD::SDIV); 4813 4814 const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue(); 4815 if ((VT != MVT::i32 && VT != MVT::i64) || !C || 4816 !(C.isPowerOf2() || C.isNegatedPowerOf2())) 4817 return selectBinaryOp(I, ISD::SDIV); 4818 4819 unsigned Lg2 = C.countTrailingZeros(); 4820 Register Src0Reg = getRegForValue(I->getOperand(0)); 4821 if (!Src0Reg) 4822 return false; 4823 4824 if (cast<BinaryOperator>(I)->isExact()) { 4825 unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Lg2); 4826 if (!ResultReg) 4827 return false; 4828 updateValueMap(I, ResultReg); 4829 return true; 4830 } 4831 4832 int64_t Pow2MinusOne = (1ULL << Lg2) - 1; 4833 unsigned AddReg = emitAdd_ri_(VT, Src0Reg, Pow2MinusOne); 4834 if (!AddReg) 4835 return false; 4836 4837 // (Src0 < 0) ? Pow2 - 1 : 0; 4838 if (!emitICmp_ri(VT, Src0Reg, 0)) 4839 return false; 4840 4841 unsigned SelectOpc; 4842 const TargetRegisterClass *RC; 4843 if (VT == MVT::i64) { 4844 SelectOpc = AArch64::CSELXr; 4845 RC = &AArch64::GPR64RegClass; 4846 } else { 4847 SelectOpc = AArch64::CSELWr; 4848 RC = &AArch64::GPR32RegClass; 4849 } 4850 Register SelectReg = fastEmitInst_rri(SelectOpc, RC, AddReg, Src0Reg, 4851 AArch64CC::LT); 4852 if (!SelectReg) 4853 return false; 4854 4855 // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also 4856 // negate the result. 4857 unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR; 4858 unsigned ResultReg; 4859 if (C.isNegative()) 4860 ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, SelectReg, 4861 AArch64_AM::ASR, Lg2); 4862 else 4863 ResultReg = emitASR_ri(VT, VT, SelectReg, Lg2); 4864 4865 if (!ResultReg) 4866 return false; 4867 4868 updateValueMap(I, ResultReg); 4869 return true; 4870 } 4871 4872 /// This is mostly a copy of the existing FastISel getRegForGEPIndex code. We 4873 /// have to duplicate it for AArch64, because otherwise we would fail during the 4874 /// sign-extend emission. 4875 unsigned AArch64FastISel::getRegForGEPIndex(const Value *Idx) { 4876 Register IdxN = getRegForValue(Idx); 4877 if (IdxN == 0) 4878 // Unhandled operand. Halt "fast" selection and bail. 4879 return 0; 4880 4881 // If the index is smaller or larger than intptr_t, truncate or extend it. 4882 MVT PtrVT = TLI.getPointerTy(DL); 4883 EVT IdxVT = EVT::getEVT(Idx->getType(), /*HandleUnknown=*/false); 4884 if (IdxVT.bitsLT(PtrVT)) { 4885 IdxN = emitIntExt(IdxVT.getSimpleVT(), IdxN, PtrVT, /*isZExt=*/false); 4886 } else if (IdxVT.bitsGT(PtrVT)) 4887 llvm_unreachable("AArch64 FastISel doesn't support types larger than i64"); 4888 return IdxN; 4889 } 4890 4891 /// This is mostly a copy of the existing FastISel GEP code, but we have to 4892 /// duplicate it for AArch64, because otherwise we would bail out even for 4893 /// simple cases. This is because the standard fastEmit functions don't cover 4894 /// MUL at all and ADD is lowered very inefficientily. 4895 bool AArch64FastISel::selectGetElementPtr(const Instruction *I) { 4896 if (Subtarget->isTargetILP32()) 4897 return false; 4898 4899 Register N = getRegForValue(I->getOperand(0)); 4900 if (!N) 4901 return false; 4902 4903 // Keep a running tab of the total offset to coalesce multiple N = N + Offset 4904 // into a single N = N + TotalOffset. 4905 uint64_t TotalOffs = 0; 4906 MVT VT = TLI.getPointerTy(DL); 4907 for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I); 4908 GTI != E; ++GTI) { 4909 const Value *Idx = GTI.getOperand(); 4910 if (auto *StTy = GTI.getStructTypeOrNull()) { 4911 unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); 4912 // N = N + Offset 4913 if (Field) 4914 TotalOffs += DL.getStructLayout(StTy)->getElementOffset(Field); 4915 } else { 4916 Type *Ty = GTI.getIndexedType(); 4917 4918 // If this is a constant subscript, handle it quickly. 4919 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 4920 if (CI->isZero()) 4921 continue; 4922 // N = N + Offset 4923 TotalOffs += 4924 DL.getTypeAllocSize(Ty) * cast<ConstantInt>(CI)->getSExtValue(); 4925 continue; 4926 } 4927 if (TotalOffs) { 4928 N = emitAdd_ri_(VT, N, TotalOffs); 4929 if (!N) 4930 return false; 4931 TotalOffs = 0; 4932 } 4933 4934 // N = N + Idx * ElementSize; 4935 uint64_t ElementSize = DL.getTypeAllocSize(Ty); 4936 unsigned IdxN = getRegForGEPIndex(Idx); 4937 if (!IdxN) 4938 return false; 4939 4940 if (ElementSize != 1) { 4941 unsigned C = fastEmit_i(VT, VT, ISD::Constant, ElementSize); 4942 if (!C) 4943 return false; 4944 IdxN = emitMul_rr(VT, IdxN, C); 4945 if (!IdxN) 4946 return false; 4947 } 4948 N = fastEmit_rr(VT, VT, ISD::ADD, N, IdxN); 4949 if (!N) 4950 return false; 4951 } 4952 } 4953 if (TotalOffs) { 4954 N = emitAdd_ri_(VT, N, TotalOffs); 4955 if (!N) 4956 return false; 4957 } 4958 updateValueMap(I, N); 4959 return true; 4960 } 4961 4962 bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { 4963 assert(TM.getOptLevel() == CodeGenOpt::None && 4964 "cmpxchg survived AtomicExpand at optlevel > -O0"); 4965 4966 auto *RetPairTy = cast<StructType>(I->getType()); 4967 Type *RetTy = RetPairTy->getTypeAtIndex(0U); 4968 assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) && 4969 "cmpxchg has a non-i1 status result"); 4970 4971 MVT VT; 4972 if (!isTypeLegal(RetTy, VT)) 4973 return false; 4974 4975 const TargetRegisterClass *ResRC; 4976 unsigned Opc, CmpOpc; 4977 // This only supports i32/i64, because i8/i16 aren't legal, and the generic 4978 // extractvalue selection doesn't support that. 4979 if (VT == MVT::i32) { 4980 Opc = AArch64::CMP_SWAP_32; 4981 CmpOpc = AArch64::SUBSWrs; 4982 ResRC = &AArch64::GPR32RegClass; 4983 } else if (VT == MVT::i64) { 4984 Opc = AArch64::CMP_SWAP_64; 4985 CmpOpc = AArch64::SUBSXrs; 4986 ResRC = &AArch64::GPR64RegClass; 4987 } else { 4988 return false; 4989 } 4990 4991 const MCInstrDesc &II = TII.get(Opc); 4992 4993 const Register AddrReg = constrainOperandRegClass( 4994 II, getRegForValue(I->getPointerOperand()), II.getNumDefs()); 4995 const Register DesiredReg = constrainOperandRegClass( 4996 II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1); 4997 const Register NewReg = constrainOperandRegClass( 4998 II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2); 4999 5000 const Register ResultReg1 = createResultReg(ResRC); 5001 const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass); 5002 const Register ScratchReg = createResultReg(&AArch64::GPR32RegClass); 5003 5004 // FIXME: MachineMemOperand doesn't support cmpxchg yet. 5005 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) 5006 .addDef(ResultReg1) 5007 .addDef(ScratchReg) 5008 .addUse(AddrReg) 5009 .addUse(DesiredReg) 5010 .addUse(NewReg); 5011 5012 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc)) 5013 .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR) 5014 .addUse(ResultReg1) 5015 .addUse(DesiredReg) 5016 .addImm(0); 5017 5018 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr)) 5019 .addDef(ResultReg2) 5020 .addUse(AArch64::WZR) 5021 .addUse(AArch64::WZR) 5022 .addImm(AArch64CC::NE); 5023 5024 assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers."); 5025 updateValueMap(I, ResultReg1, 2); 5026 return true; 5027 } 5028 5029 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { 5030 switch (I->getOpcode()) { 5031 default: 5032 break; 5033 case Instruction::Add: 5034 case Instruction::Sub: 5035 return selectAddSub(I); 5036 case Instruction::Mul: 5037 return selectMul(I); 5038 case Instruction::SDiv: 5039 return selectSDiv(I); 5040 case Instruction::SRem: 5041 if (!selectBinaryOp(I, ISD::SREM)) 5042 return selectRem(I, ISD::SREM); 5043 return true; 5044 case Instruction::URem: 5045 if (!selectBinaryOp(I, ISD::UREM)) 5046 return selectRem(I, ISD::UREM); 5047 return true; 5048 case Instruction::Shl: 5049 case Instruction::LShr: 5050 case Instruction::AShr: 5051 return selectShift(I); 5052 case Instruction::And: 5053 case Instruction::Or: 5054 case Instruction::Xor: 5055 return selectLogicalOp(I); 5056 case Instruction::Br: 5057 return selectBranch(I); 5058 case Instruction::IndirectBr: 5059 return selectIndirectBr(I); 5060 case Instruction::BitCast: 5061 if (!FastISel::selectBitCast(I)) 5062 return selectBitCast(I); 5063 return true; 5064 case Instruction::FPToSI: 5065 if (!selectCast(I, ISD::FP_TO_SINT)) 5066 return selectFPToInt(I, /*Signed=*/true); 5067 return true; 5068 case Instruction::FPToUI: 5069 return selectFPToInt(I, /*Signed=*/false); 5070 case Instruction::ZExt: 5071 case Instruction::SExt: 5072 return selectIntExt(I); 5073 case Instruction::Trunc: 5074 if (!selectCast(I, ISD::TRUNCATE)) 5075 return selectTrunc(I); 5076 return true; 5077 case Instruction::FPExt: 5078 return selectFPExt(I); 5079 case Instruction::FPTrunc: 5080 return selectFPTrunc(I); 5081 case Instruction::SIToFP: 5082 if (!selectCast(I, ISD::SINT_TO_FP)) 5083 return selectIntToFP(I, /*Signed=*/true); 5084 return true; 5085 case Instruction::UIToFP: 5086 return selectIntToFP(I, /*Signed=*/false); 5087 case Instruction::Load: 5088 return selectLoad(I); 5089 case Instruction::Store: 5090 return selectStore(I); 5091 case Instruction::FCmp: 5092 case Instruction::ICmp: 5093 return selectCmp(I); 5094 case Instruction::Select: 5095 return selectSelect(I); 5096 case Instruction::Ret: 5097 return selectRet(I); 5098 case Instruction::FRem: 5099 return selectFRem(I); 5100 case Instruction::GetElementPtr: 5101 return selectGetElementPtr(I); 5102 case Instruction::AtomicCmpXchg: 5103 return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I)); 5104 } 5105 5106 // fall-back to target-independent instruction selection. 5107 return selectOperator(I, I->getOpcode()); 5108 } 5109 5110 FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, 5111 const TargetLibraryInfo *LibInfo) { 5112 return new AArch64FastISel(FuncInfo, LibInfo); 5113 } 5114