1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64InstrInfo.h" 15 #include "AArch64MachineFunctionInfo.h" 16 #include "AArch64RegisterBankInfo.h" 17 #include "AArch64RegisterInfo.h" 18 #include "AArch64Subtarget.h" 19 #include "AArch64TargetMachine.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "llvm/ADT/Optional.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 26 #include "llvm/CodeGen/GlobalISel/Utils.h" 27 #include "llvm/CodeGen/MachineBasicBlock.h" 28 #include "llvm/CodeGen/MachineConstantPool.h" 29 #include "llvm/CodeGen/MachineFunction.h" 30 #include "llvm/CodeGen/MachineInstr.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include "llvm/CodeGen/MachineOperand.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/TargetOpcodes.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/Type.h" 37 #include "llvm/IR/IntrinsicsAArch64.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/raw_ostream.h" 40 41 #define DEBUG_TYPE "aarch64-isel" 42 43 using namespace llvm; 44 45 namespace { 46 47 #define GET_GLOBALISEL_PREDICATE_BITSET 48 #include "AArch64GenGlobalISel.inc" 49 #undef GET_GLOBALISEL_PREDICATE_BITSET 50 51 class AArch64InstructionSelector : public InstructionSelector { 52 public: 53 AArch64InstructionSelector(const AArch64TargetMachine &TM, 54 const AArch64Subtarget &STI, 55 const AArch64RegisterBankInfo &RBI); 56 57 bool select(MachineInstr &I) override; 58 static const char *getName() { return DEBUG_TYPE; } 59 60 void setupMF(MachineFunction &MF, GISelKnownBits &KB, 61 CodeGenCoverage &CoverageInfo) override { 62 InstructionSelector::setupMF(MF, KB, CoverageInfo); 63 64 // hasFnAttribute() is expensive to call on every BRCOND selection, so 65 // cache it here for each run of the selector. 66 ProduceNonFlagSettingCondBr = 67 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); 68 MFReturnAddr = Register(); 69 70 processPHIs(MF); 71 } 72 73 private: 74 /// tblgen-erated 'select' implementation, used as the initial selector for 75 /// the patterns that don't require complex C++. 76 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 77 78 // A lowering phase that runs before any selection attempts. 79 // Returns true if the instruction was modified. 80 bool preISelLower(MachineInstr &I); 81 82 // An early selection function that runs before the selectImpl() call. 83 bool earlySelect(MachineInstr &I) const; 84 85 // Do some preprocessing of G_PHIs before we begin selection. 86 void processPHIs(MachineFunction &MF); 87 88 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; 89 90 /// Eliminate same-sized cross-bank copies into stores before selectImpl(). 91 bool contractCrossBankCopyIntoStore(MachineInstr &I, 92 MachineRegisterInfo &MRI); 93 94 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); 95 96 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, 97 MachineRegisterInfo &MRI) const; 98 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, 99 MachineRegisterInfo &MRI) const; 100 101 bool tryOptAndIntoCompareBranch(MachineInstr *LHS, 102 int64_t CmpConstant, 103 const CmpInst::Predicate &Pred, 104 MachineBasicBlock *DstMBB, 105 MachineIRBuilder &MIB) const; 106 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, 107 MachineRegisterInfo &MRI) const; 108 109 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const; 110 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; 111 112 // Helper to generate an equivalent of scalar_to_vector into a new register, 113 // returned via 'Dst'. 114 MachineInstr *emitScalarToVector(unsigned EltSize, 115 const TargetRegisterClass *DstRC, 116 Register Scalar, 117 MachineIRBuilder &MIRBuilder) const; 118 119 /// Emit a lane insert into \p DstReg, or a new vector register if None is 120 /// provided. 121 /// 122 /// The lane inserted into is defined by \p LaneIdx. The vector source 123 /// register is given by \p SrcReg. The register containing the element is 124 /// given by \p EltReg. 125 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg, 126 Register EltReg, unsigned LaneIdx, 127 const RegisterBank &RB, 128 MachineIRBuilder &MIRBuilder) const; 129 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const; 130 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, 131 MachineRegisterInfo &MRI) const; 132 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const; 133 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; 134 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; 135 136 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; 137 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; 138 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; 139 bool selectSplitVectorUnmerge(MachineInstr &I, 140 MachineRegisterInfo &MRI) const; 141 bool selectIntrinsicWithSideEffects(MachineInstr &I, 142 MachineRegisterInfo &MRI) const; 143 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); 144 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const; 145 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; 146 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; 147 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const; 148 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const; 149 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const; 150 151 unsigned emitConstantPoolEntry(const Constant *CPVal, 152 MachineFunction &MF) const; 153 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, 154 MachineIRBuilder &MIRBuilder) const; 155 156 // Emit a vector concat operation. 157 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1, 158 Register Op2, 159 MachineIRBuilder &MIRBuilder) const; 160 161 // Emit an integer compare between LHS and RHS, which checks for Predicate. 162 // 163 // This returns the produced compare instruction, and the predicate which 164 // was ultimately used in the compare. The predicate may differ from what 165 // is passed in \p Predicate due to optimization. 166 std::pair<MachineInstr *, CmpInst::Predicate> 167 emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 168 MachineOperand &Predicate, 169 MachineIRBuilder &MIRBuilder) const; 170 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS, 171 MachineIRBuilder &MIRBuilder) const; 172 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, 173 MachineIRBuilder &MIRBuilder) const; 174 MachineInstr *emitTST(const Register &LHS, const Register &RHS, 175 MachineIRBuilder &MIRBuilder) const; 176 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg, 177 const RegisterBank &DstRB, LLT ScalarTy, 178 Register VecReg, unsigned LaneIdx, 179 MachineIRBuilder &MIRBuilder) const; 180 181 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be 182 /// materialized using a FMOV instruction, then update MI and return it. 183 /// Otherwise, do nothing and return a nullptr. 184 MachineInstr *emitFMovForFConstant(MachineInstr &MI, 185 MachineRegisterInfo &MRI) const; 186 187 /// Emit a CSet for a compare. 188 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, 189 MachineIRBuilder &MIRBuilder) const; 190 191 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. 192 /// \p IsNegative is true if the test should be "not zero". 193 /// This will also optimize the test bit instruction when possible. 194 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, 195 MachineBasicBlock *DstMBB, 196 MachineIRBuilder &MIB) const; 197 198 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. 199 // We use these manually instead of using the importer since it doesn't 200 // support SDNodeXForm. 201 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; 202 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; 203 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; 204 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; 205 206 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; 207 ComplexRendererFns selectArithImmed(MachineOperand &Root) const; 208 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; 209 210 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, 211 unsigned Size) const; 212 213 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { 214 return selectAddrModeUnscaled(Root, 1); 215 } 216 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { 217 return selectAddrModeUnscaled(Root, 2); 218 } 219 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { 220 return selectAddrModeUnscaled(Root, 4); 221 } 222 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { 223 return selectAddrModeUnscaled(Root, 8); 224 } 225 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { 226 return selectAddrModeUnscaled(Root, 16); 227 } 228 229 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used 230 /// from complex pattern matchers like selectAddrModeIndexed(). 231 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, 232 MachineRegisterInfo &MRI) const; 233 234 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, 235 unsigned Size) const; 236 template <int Width> 237 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { 238 return selectAddrModeIndexed(Root, Width / 8); 239 } 240 241 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, 242 const MachineRegisterInfo &MRI) const; 243 ComplexRendererFns 244 selectAddrModeShiftedExtendXReg(MachineOperand &Root, 245 unsigned SizeInBytes) const; 246 247 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether 248 /// or not a shift + extend should be folded into an addressing mode. Returns 249 /// None when this is not profitable or possible. 250 ComplexRendererFns 251 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, 252 MachineOperand &Offset, unsigned SizeInBytes, 253 bool WantsExt) const; 254 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; 255 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, 256 unsigned SizeInBytes) const; 257 template <int Width> 258 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { 259 return selectAddrModeXRO(Root, Width / 8); 260 } 261 262 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, 263 unsigned SizeInBytes) const; 264 template <int Width> 265 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { 266 return selectAddrModeWRO(Root, Width / 8); 267 } 268 269 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; 270 271 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { 272 return selectShiftedRegister(Root); 273 } 274 275 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { 276 // TODO: selectShiftedRegister should allow for rotates on logical shifts. 277 // For now, make them the same. The only difference between the two is that 278 // logical shifts are allowed to fold in rotates. Otherwise, these are 279 // functionally the same. 280 return selectShiftedRegister(Root); 281 } 282 283 /// Given an extend instruction, determine the correct shift-extend type for 284 /// that instruction. 285 /// 286 /// If the instruction is going to be used in a load or store, pass 287 /// \p IsLoadStore = true. 288 AArch64_AM::ShiftExtendType 289 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, 290 bool IsLoadStore = false) const; 291 292 /// Move \p Reg to \p RC if \p Reg is not already on \p RC. 293 /// 294 /// \returns Either \p Reg if no change was necessary, or the new register 295 /// created by moving \p Reg. 296 /// 297 /// Note: This uses emitCopy right now. 298 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, 299 MachineIRBuilder &MIB) const; 300 301 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; 302 303 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 304 int OpIdx = -1) const; 305 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, 306 int OpIdx = -1) const; 307 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, 308 int OpIdx = -1) const; 309 310 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. 311 void materializeLargeCMVal(MachineInstr &I, const Value *V, 312 unsigned OpFlags) const; 313 314 // Optimization methods. 315 bool tryOptSelect(MachineInstr &MI) const; 316 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 317 MachineOperand &Predicate, 318 MachineIRBuilder &MIRBuilder) const; 319 MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS, 320 MachineOperand &RHS, 321 CmpInst::Predicate &Predicate, 322 MachineIRBuilder &MIB) const; 323 MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS, 324 MachineOperand &RHS, 325 MachineIRBuilder &MIB) const; 326 327 /// Return true if \p MI is a load or store of \p NumBytes bytes. 328 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; 329 330 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit 331 /// register zeroed out. In other words, the result of MI has been explicitly 332 /// zero extended. 333 bool isDef32(const MachineInstr &MI) const; 334 335 const AArch64TargetMachine &TM; 336 const AArch64Subtarget &STI; 337 const AArch64InstrInfo &TII; 338 const AArch64RegisterInfo &TRI; 339 const AArch64RegisterBankInfo &RBI; 340 341 bool ProduceNonFlagSettingCondBr = false; 342 343 // Some cached values used during selection. 344 // We use LR as a live-in register, and we keep track of it here as it can be 345 // clobbered by calls. 346 Register MFReturnAddr; 347 348 #define GET_GLOBALISEL_PREDICATES_DECL 349 #include "AArch64GenGlobalISel.inc" 350 #undef GET_GLOBALISEL_PREDICATES_DECL 351 352 // We declare the temporaries used by selectImpl() in the class to minimize the 353 // cost of constructing placeholder values. 354 #define GET_GLOBALISEL_TEMPORARIES_DECL 355 #include "AArch64GenGlobalISel.inc" 356 #undef GET_GLOBALISEL_TEMPORARIES_DECL 357 }; 358 359 } // end anonymous namespace 360 361 #define GET_GLOBALISEL_IMPL 362 #include "AArch64GenGlobalISel.inc" 363 #undef GET_GLOBALISEL_IMPL 364 365 AArch64InstructionSelector::AArch64InstructionSelector( 366 const AArch64TargetMachine &TM, const AArch64Subtarget &STI, 367 const AArch64RegisterBankInfo &RBI) 368 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), 369 TRI(*STI.getRegisterInfo()), RBI(RBI), 370 #define GET_GLOBALISEL_PREDICATES_INIT 371 #include "AArch64GenGlobalISel.inc" 372 #undef GET_GLOBALISEL_PREDICATES_INIT 373 #define GET_GLOBALISEL_TEMPORARIES_INIT 374 #include "AArch64GenGlobalISel.inc" 375 #undef GET_GLOBALISEL_TEMPORARIES_INIT 376 { 377 } 378 379 // FIXME: This should be target-independent, inferred from the types declared 380 // for each class in the bank. 381 static const TargetRegisterClass * 382 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, 383 const RegisterBankInfo &RBI, 384 bool GetAllRegSet = false) { 385 if (RB.getID() == AArch64::GPRRegBankID) { 386 if (Ty.getSizeInBits() <= 32) 387 return GetAllRegSet ? &AArch64::GPR32allRegClass 388 : &AArch64::GPR32RegClass; 389 if (Ty.getSizeInBits() == 64) 390 return GetAllRegSet ? &AArch64::GPR64allRegClass 391 : &AArch64::GPR64RegClass; 392 return nullptr; 393 } 394 395 if (RB.getID() == AArch64::FPRRegBankID) { 396 if (Ty.getSizeInBits() <= 16) 397 return &AArch64::FPR16RegClass; 398 if (Ty.getSizeInBits() == 32) 399 return &AArch64::FPR32RegClass; 400 if (Ty.getSizeInBits() == 64) 401 return &AArch64::FPR64RegClass; 402 if (Ty.getSizeInBits() == 128) 403 return &AArch64::FPR128RegClass; 404 return nullptr; 405 } 406 407 return nullptr; 408 } 409 410 /// Given a register bank, and size in bits, return the smallest register class 411 /// that can represent that combination. 412 static const TargetRegisterClass * 413 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, 414 bool GetAllRegSet = false) { 415 unsigned RegBankID = RB.getID(); 416 417 if (RegBankID == AArch64::GPRRegBankID) { 418 if (SizeInBits <= 32) 419 return GetAllRegSet ? &AArch64::GPR32allRegClass 420 : &AArch64::GPR32RegClass; 421 if (SizeInBits == 64) 422 return GetAllRegSet ? &AArch64::GPR64allRegClass 423 : &AArch64::GPR64RegClass; 424 } 425 426 if (RegBankID == AArch64::FPRRegBankID) { 427 switch (SizeInBits) { 428 default: 429 return nullptr; 430 case 8: 431 return &AArch64::FPR8RegClass; 432 case 16: 433 return &AArch64::FPR16RegClass; 434 case 32: 435 return &AArch64::FPR32RegClass; 436 case 64: 437 return &AArch64::FPR64RegClass; 438 case 128: 439 return &AArch64::FPR128RegClass; 440 } 441 } 442 443 return nullptr; 444 } 445 446 /// Returns the correct subregister to use for a given register class. 447 static bool getSubRegForClass(const TargetRegisterClass *RC, 448 const TargetRegisterInfo &TRI, unsigned &SubReg) { 449 switch (TRI.getRegSizeInBits(*RC)) { 450 case 8: 451 SubReg = AArch64::bsub; 452 break; 453 case 16: 454 SubReg = AArch64::hsub; 455 break; 456 case 32: 457 if (RC != &AArch64::FPR32RegClass) 458 SubReg = AArch64::sub_32; 459 else 460 SubReg = AArch64::ssub; 461 break; 462 case 64: 463 SubReg = AArch64::dsub; 464 break; 465 default: 466 LLVM_DEBUG( 467 dbgs() << "Couldn't find appropriate subregister for register class."); 468 return false; 469 } 470 471 return true; 472 } 473 474 /// Returns the minimum size the given register bank can hold. 475 static unsigned getMinSizeForRegBank(const RegisterBank &RB) { 476 switch (RB.getID()) { 477 case AArch64::GPRRegBankID: 478 return 32; 479 case AArch64::FPRRegBankID: 480 return 8; 481 default: 482 llvm_unreachable("Tried to get minimum size for unknown register bank."); 483 } 484 } 485 486 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { 487 auto &MI = *Root.getParent(); 488 auto &MBB = *MI.getParent(); 489 auto &MF = *MBB.getParent(); 490 auto &MRI = MF.getRegInfo(); 491 uint64_t Immed; 492 if (Root.isImm()) 493 Immed = Root.getImm(); 494 else if (Root.isCImm()) 495 Immed = Root.getCImm()->getZExtValue(); 496 else if (Root.isReg()) { 497 auto ValAndVReg = 498 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); 499 if (!ValAndVReg) 500 return None; 501 Immed = ValAndVReg->Value; 502 } else 503 return None; 504 return Immed; 505 } 506 507 /// Check whether \p I is a currently unsupported binary operation: 508 /// - it has an unsized type 509 /// - an operand is not a vreg 510 /// - all operands are not in the same bank 511 /// These are checks that should someday live in the verifier, but right now, 512 /// these are mostly limitations of the aarch64 selector. 513 static bool unsupportedBinOp(const MachineInstr &I, 514 const AArch64RegisterBankInfo &RBI, 515 const MachineRegisterInfo &MRI, 516 const AArch64RegisterInfo &TRI) { 517 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 518 if (!Ty.isValid()) { 519 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n"); 520 return true; 521 } 522 523 const RegisterBank *PrevOpBank = nullptr; 524 for (auto &MO : I.operands()) { 525 // FIXME: Support non-register operands. 526 if (!MO.isReg()) { 527 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n"); 528 return true; 529 } 530 531 // FIXME: Can generic operations have physical registers operands? If 532 // so, this will need to be taught about that, and we'll need to get the 533 // bank out of the minimal class for the register. 534 // Either way, this needs to be documented (and possibly verified). 535 if (!Register::isVirtualRegister(MO.getReg())) { 536 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n"); 537 return true; 538 } 539 540 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); 541 if (!OpBank) { 542 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n"); 543 return true; 544 } 545 546 if (PrevOpBank && OpBank != PrevOpBank) { 547 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n"); 548 return true; 549 } 550 PrevOpBank = OpBank; 551 } 552 return false; 553 } 554 555 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc 556 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID 557 /// and of size \p OpSize. 558 /// \returns \p GenericOpc if the combination is unsupported. 559 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, 560 unsigned OpSize) { 561 switch (RegBankID) { 562 case AArch64::GPRRegBankID: 563 if (OpSize == 32) { 564 switch (GenericOpc) { 565 case TargetOpcode::G_SHL: 566 return AArch64::LSLVWr; 567 case TargetOpcode::G_LSHR: 568 return AArch64::LSRVWr; 569 case TargetOpcode::G_ASHR: 570 return AArch64::ASRVWr; 571 default: 572 return GenericOpc; 573 } 574 } else if (OpSize == 64) { 575 switch (GenericOpc) { 576 case TargetOpcode::G_PTR_ADD: 577 return AArch64::ADDXrr; 578 case TargetOpcode::G_SHL: 579 return AArch64::LSLVXr; 580 case TargetOpcode::G_LSHR: 581 return AArch64::LSRVXr; 582 case TargetOpcode::G_ASHR: 583 return AArch64::ASRVXr; 584 default: 585 return GenericOpc; 586 } 587 } 588 break; 589 case AArch64::FPRRegBankID: 590 switch (OpSize) { 591 case 32: 592 switch (GenericOpc) { 593 case TargetOpcode::G_FADD: 594 return AArch64::FADDSrr; 595 case TargetOpcode::G_FSUB: 596 return AArch64::FSUBSrr; 597 case TargetOpcode::G_FMUL: 598 return AArch64::FMULSrr; 599 case TargetOpcode::G_FDIV: 600 return AArch64::FDIVSrr; 601 default: 602 return GenericOpc; 603 } 604 case 64: 605 switch (GenericOpc) { 606 case TargetOpcode::G_FADD: 607 return AArch64::FADDDrr; 608 case TargetOpcode::G_FSUB: 609 return AArch64::FSUBDrr; 610 case TargetOpcode::G_FMUL: 611 return AArch64::FMULDrr; 612 case TargetOpcode::G_FDIV: 613 return AArch64::FDIVDrr; 614 case TargetOpcode::G_OR: 615 return AArch64::ORRv8i8; 616 default: 617 return GenericOpc; 618 } 619 } 620 break; 621 } 622 return GenericOpc; 623 } 624 625 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, 626 /// appropriate for the (value) register bank \p RegBankID and of memory access 627 /// size \p OpSize. This returns the variant with the base+unsigned-immediate 628 /// addressing mode (e.g., LDRXui). 629 /// \returns \p GenericOpc if the combination is unsupported. 630 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, 631 unsigned OpSize) { 632 const bool isStore = GenericOpc == TargetOpcode::G_STORE; 633 switch (RegBankID) { 634 case AArch64::GPRRegBankID: 635 switch (OpSize) { 636 case 8: 637 return isStore ? AArch64::STRBBui : AArch64::LDRBBui; 638 case 16: 639 return isStore ? AArch64::STRHHui : AArch64::LDRHHui; 640 case 32: 641 return isStore ? AArch64::STRWui : AArch64::LDRWui; 642 case 64: 643 return isStore ? AArch64::STRXui : AArch64::LDRXui; 644 } 645 break; 646 case AArch64::FPRRegBankID: 647 switch (OpSize) { 648 case 8: 649 return isStore ? AArch64::STRBui : AArch64::LDRBui; 650 case 16: 651 return isStore ? AArch64::STRHui : AArch64::LDRHui; 652 case 32: 653 return isStore ? AArch64::STRSui : AArch64::LDRSui; 654 case 64: 655 return isStore ? AArch64::STRDui : AArch64::LDRDui; 656 } 657 break; 658 } 659 return GenericOpc; 660 } 661 662 #ifndef NDEBUG 663 /// Helper function that verifies that we have a valid copy at the end of 664 /// selectCopy. Verifies that the source and dest have the expected sizes and 665 /// then returns true. 666 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, 667 const MachineRegisterInfo &MRI, 668 const TargetRegisterInfo &TRI, 669 const RegisterBankInfo &RBI) { 670 const Register DstReg = I.getOperand(0).getReg(); 671 const Register SrcReg = I.getOperand(1).getReg(); 672 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 673 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 674 675 // Make sure the size of the source and dest line up. 676 assert( 677 (DstSize == SrcSize || 678 // Copies are a mean to setup initial types, the number of 679 // bits may not exactly match. 680 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || 681 // Copies are a mean to copy bits around, as long as we are 682 // on the same register class, that's fine. Otherwise, that 683 // means we need some SUBREG_TO_REG or AND & co. 684 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && 685 "Copy with different width?!"); 686 687 // Check the size of the destination. 688 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && 689 "GPRs cannot get more than 64-bit width values"); 690 691 return true; 692 } 693 #endif 694 695 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg 696 /// to \p *To. 697 /// 698 /// E.g "To = COPY SrcReg:SubReg" 699 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, 700 const RegisterBankInfo &RBI, Register SrcReg, 701 const TargetRegisterClass *To, unsigned SubReg) { 702 assert(SrcReg.isValid() && "Expected a valid source register?"); 703 assert(To && "Destination register class cannot be null"); 704 assert(SubReg && "Expected a valid subregister"); 705 706 MachineIRBuilder MIB(I); 707 auto SubRegCopy = 708 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg); 709 MachineOperand &RegOp = I.getOperand(1); 710 RegOp.setReg(SubRegCopy.getReg(0)); 711 712 // It's possible that the destination register won't be constrained. Make 713 // sure that happens. 714 if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) 715 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); 716 717 return true; 718 } 719 720 /// Helper function to get the source and destination register classes for a 721 /// copy. Returns a std::pair containing the source register class for the 722 /// copy, and the destination register class for the copy. If a register class 723 /// cannot be determined, then it will be nullptr. 724 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 725 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, 726 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 727 const RegisterBankInfo &RBI) { 728 Register DstReg = I.getOperand(0).getReg(); 729 Register SrcReg = I.getOperand(1).getReg(); 730 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 731 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 732 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 733 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 734 735 // Special casing for cross-bank copies of s1s. We can technically represent 736 // a 1-bit value with any size of register. The minimum size for a GPR is 32 737 // bits. So, we need to put the FPR on 32 bits as well. 738 // 739 // FIXME: I'm not sure if this case holds true outside of copies. If it does, 740 // then we can pull it into the helpers that get the appropriate class for a 741 // register bank. Or make a new helper that carries along some constraint 742 // information. 743 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) 744 SrcSize = DstSize = 32; 745 746 return {getMinClassForRegBank(SrcRegBank, SrcSize, true), 747 getMinClassForRegBank(DstRegBank, DstSize, true)}; 748 } 749 750 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, 751 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 752 const RegisterBankInfo &RBI) { 753 Register DstReg = I.getOperand(0).getReg(); 754 Register SrcReg = I.getOperand(1).getReg(); 755 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 756 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 757 758 // Find the correct register classes for the source and destination registers. 759 const TargetRegisterClass *SrcRC; 760 const TargetRegisterClass *DstRC; 761 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); 762 763 if (!DstRC) { 764 LLVM_DEBUG(dbgs() << "Unexpected dest size " 765 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); 766 return false; 767 } 768 769 // A couple helpers below, for making sure that the copy we produce is valid. 770 771 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want 772 // to verify that the src and dst are the same size, since that's handled by 773 // the SUBREG_TO_REG. 774 bool KnownValid = false; 775 776 // Returns true, or asserts if something we don't expect happens. Instead of 777 // returning true, we return isValidCopy() to ensure that we verify the 778 // result. 779 auto CheckCopy = [&]() { 780 // If we have a bitcast or something, we can't have physical registers. 781 assert((I.isCopy() || 782 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && 783 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && 784 "No phys reg on generic operator!"); 785 bool ValidCopy = true; 786 #ifndef NDEBUG 787 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI); 788 assert(ValidCopy && "Invalid copy."); 789 #endif 790 return ValidCopy; 791 }; 792 793 // Is this a copy? If so, then we may need to insert a subregister copy. 794 if (I.isCopy()) { 795 // Yes. Check if there's anything to fix up. 796 if (!SrcRC) { 797 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n"); 798 return false; 799 } 800 801 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); 802 unsigned DstSize = TRI.getRegSizeInBits(*DstRC); 803 unsigned SubReg; 804 805 // If the source bank doesn't support a subregister copy small enough, 806 // then we first need to copy to the destination bank. 807 if (getMinSizeForRegBank(SrcRegBank) > DstSize) { 808 const TargetRegisterClass *DstTempRC = 809 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true); 810 getSubRegForClass(DstRC, TRI, SubReg); 811 812 MachineIRBuilder MIB(I); 813 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg}); 814 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); 815 } else if (SrcSize > DstSize) { 816 // If the source register is bigger than the destination we need to 817 // perform a subregister copy. 818 const TargetRegisterClass *SubRegRC = 819 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 820 getSubRegForClass(SubRegRC, TRI, SubReg); 821 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); 822 } else if (DstSize > SrcSize) { 823 // If the destination register is bigger than the source we need to do 824 // a promotion using SUBREG_TO_REG. 825 const TargetRegisterClass *PromotionRC = 826 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 827 getSubRegForClass(SrcRC, TRI, SubReg); 828 829 Register PromoteReg = MRI.createVirtualRegister(PromotionRC); 830 BuildMI(*I.getParent(), I, I.getDebugLoc(), 831 TII.get(AArch64::SUBREG_TO_REG), PromoteReg) 832 .addImm(0) 833 .addUse(SrcReg) 834 .addImm(SubReg); 835 MachineOperand &RegOp = I.getOperand(1); 836 RegOp.setReg(PromoteReg); 837 838 // Promise that the copy is implicitly validated by the SUBREG_TO_REG. 839 KnownValid = true; 840 } 841 842 // If the destination is a physical register, then there's nothing to 843 // change, so we're done. 844 if (Register::isPhysicalRegister(DstReg)) 845 return CheckCopy(); 846 } 847 848 // No need to constrain SrcReg. It will get constrained when we hit another 849 // of its use or its defs. Copies do not have constraints. 850 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 851 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 852 << " operand\n"); 853 return false; 854 } 855 I.setDesc(TII.get(AArch64::COPY)); 856 return CheckCopy(); 857 } 858 859 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { 860 if (!DstTy.isScalar() || !SrcTy.isScalar()) 861 return GenericOpc; 862 863 const unsigned DstSize = DstTy.getSizeInBits(); 864 const unsigned SrcSize = SrcTy.getSizeInBits(); 865 866 switch (DstSize) { 867 case 32: 868 switch (SrcSize) { 869 case 32: 870 switch (GenericOpc) { 871 case TargetOpcode::G_SITOFP: 872 return AArch64::SCVTFUWSri; 873 case TargetOpcode::G_UITOFP: 874 return AArch64::UCVTFUWSri; 875 case TargetOpcode::G_FPTOSI: 876 return AArch64::FCVTZSUWSr; 877 case TargetOpcode::G_FPTOUI: 878 return AArch64::FCVTZUUWSr; 879 default: 880 return GenericOpc; 881 } 882 case 64: 883 switch (GenericOpc) { 884 case TargetOpcode::G_SITOFP: 885 return AArch64::SCVTFUXSri; 886 case TargetOpcode::G_UITOFP: 887 return AArch64::UCVTFUXSri; 888 case TargetOpcode::G_FPTOSI: 889 return AArch64::FCVTZSUWDr; 890 case TargetOpcode::G_FPTOUI: 891 return AArch64::FCVTZUUWDr; 892 default: 893 return GenericOpc; 894 } 895 default: 896 return GenericOpc; 897 } 898 case 64: 899 switch (SrcSize) { 900 case 32: 901 switch (GenericOpc) { 902 case TargetOpcode::G_SITOFP: 903 return AArch64::SCVTFUWDri; 904 case TargetOpcode::G_UITOFP: 905 return AArch64::UCVTFUWDri; 906 case TargetOpcode::G_FPTOSI: 907 return AArch64::FCVTZSUXSr; 908 case TargetOpcode::G_FPTOUI: 909 return AArch64::FCVTZUUXSr; 910 default: 911 return GenericOpc; 912 } 913 case 64: 914 switch (GenericOpc) { 915 case TargetOpcode::G_SITOFP: 916 return AArch64::SCVTFUXDri; 917 case TargetOpcode::G_UITOFP: 918 return AArch64::UCVTFUXDri; 919 case TargetOpcode::G_FPTOSI: 920 return AArch64::FCVTZSUXDr; 921 case TargetOpcode::G_FPTOUI: 922 return AArch64::FCVTZUUXDr; 923 default: 924 return GenericOpc; 925 } 926 default: 927 return GenericOpc; 928 } 929 default: 930 return GenericOpc; 931 }; 932 return GenericOpc; 933 } 934 935 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI, 936 const RegisterBankInfo &RBI) { 937 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 938 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 939 AArch64::GPRRegBankID); 940 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 941 if (Ty == LLT::scalar(32)) 942 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr; 943 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) 944 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr; 945 return 0; 946 } 947 948 /// Helper function to select the opcode for a G_FCMP. 949 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) { 950 // If this is a compare against +0.0, then we don't have to explicitly 951 // materialize a constant. 952 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI); 953 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); 954 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 955 if (OpSize != 32 && OpSize != 64) 956 return 0; 957 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, 958 {AArch64::FCMPSri, AArch64::FCMPDri}}; 959 return CmpOpcTbl[ShouldUseImm][OpSize == 64]; 960 } 961 962 /// Returns true if \p P is an unsigned integer comparison predicate. 963 static bool isUnsignedICMPPred(const CmpInst::Predicate P) { 964 switch (P) { 965 default: 966 return false; 967 case CmpInst::ICMP_UGT: 968 case CmpInst::ICMP_UGE: 969 case CmpInst::ICMP_ULT: 970 case CmpInst::ICMP_ULE: 971 return true; 972 } 973 } 974 975 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { 976 switch (P) { 977 default: 978 llvm_unreachable("Unknown condition code!"); 979 case CmpInst::ICMP_NE: 980 return AArch64CC::NE; 981 case CmpInst::ICMP_EQ: 982 return AArch64CC::EQ; 983 case CmpInst::ICMP_SGT: 984 return AArch64CC::GT; 985 case CmpInst::ICMP_SGE: 986 return AArch64CC::GE; 987 case CmpInst::ICMP_SLT: 988 return AArch64CC::LT; 989 case CmpInst::ICMP_SLE: 990 return AArch64CC::LE; 991 case CmpInst::ICMP_UGT: 992 return AArch64CC::HI; 993 case CmpInst::ICMP_UGE: 994 return AArch64CC::HS; 995 case CmpInst::ICMP_ULT: 996 return AArch64CC::LO; 997 case CmpInst::ICMP_ULE: 998 return AArch64CC::LS; 999 } 1000 } 1001 1002 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P, 1003 AArch64CC::CondCode &CondCode, 1004 AArch64CC::CondCode &CondCode2) { 1005 CondCode2 = AArch64CC::AL; 1006 switch (P) { 1007 default: 1008 llvm_unreachable("Unknown FP condition!"); 1009 case CmpInst::FCMP_OEQ: 1010 CondCode = AArch64CC::EQ; 1011 break; 1012 case CmpInst::FCMP_OGT: 1013 CondCode = AArch64CC::GT; 1014 break; 1015 case CmpInst::FCMP_OGE: 1016 CondCode = AArch64CC::GE; 1017 break; 1018 case CmpInst::FCMP_OLT: 1019 CondCode = AArch64CC::MI; 1020 break; 1021 case CmpInst::FCMP_OLE: 1022 CondCode = AArch64CC::LS; 1023 break; 1024 case CmpInst::FCMP_ONE: 1025 CondCode = AArch64CC::MI; 1026 CondCode2 = AArch64CC::GT; 1027 break; 1028 case CmpInst::FCMP_ORD: 1029 CondCode = AArch64CC::VC; 1030 break; 1031 case CmpInst::FCMP_UNO: 1032 CondCode = AArch64CC::VS; 1033 break; 1034 case CmpInst::FCMP_UEQ: 1035 CondCode = AArch64CC::EQ; 1036 CondCode2 = AArch64CC::VS; 1037 break; 1038 case CmpInst::FCMP_UGT: 1039 CondCode = AArch64CC::HI; 1040 break; 1041 case CmpInst::FCMP_UGE: 1042 CondCode = AArch64CC::PL; 1043 break; 1044 case CmpInst::FCMP_ULT: 1045 CondCode = AArch64CC::LT; 1046 break; 1047 case CmpInst::FCMP_ULE: 1048 CondCode = AArch64CC::LE; 1049 break; 1050 case CmpInst::FCMP_UNE: 1051 CondCode = AArch64CC::NE; 1052 break; 1053 } 1054 } 1055 1056 /// Return a register which can be used as a bit to test in a TB(N)Z. 1057 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, 1058 MachineRegisterInfo &MRI) { 1059 assert(Reg.isValid() && "Expected valid register!"); 1060 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { 1061 unsigned Opc = MI->getOpcode(); 1062 1063 if (!MI->getOperand(0).isReg() || 1064 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1065 break; 1066 1067 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. 1068 // 1069 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number 1070 // on the truncated x is the same as the bit number on x. 1071 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || 1072 Opc == TargetOpcode::G_TRUNC) { 1073 Register NextReg = MI->getOperand(1).getReg(); 1074 // Did we find something worth folding? 1075 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg)) 1076 break; 1077 1078 // NextReg is worth folding. Keep looking. 1079 Reg = NextReg; 1080 continue; 1081 } 1082 1083 // Attempt to find a suitable operation with a constant on one side. 1084 Optional<uint64_t> C; 1085 Register TestReg; 1086 switch (Opc) { 1087 default: 1088 break; 1089 case TargetOpcode::G_AND: 1090 case TargetOpcode::G_XOR: { 1091 TestReg = MI->getOperand(1).getReg(); 1092 Register ConstantReg = MI->getOperand(2).getReg(); 1093 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); 1094 if (!VRegAndVal) { 1095 // AND commutes, check the other side for a constant. 1096 // FIXME: Can we canonicalize the constant so that it's always on the 1097 // same side at some point earlier? 1098 std::swap(ConstantReg, TestReg); 1099 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); 1100 } 1101 if (VRegAndVal) 1102 C = VRegAndVal->Value; 1103 break; 1104 } 1105 case TargetOpcode::G_ASHR: 1106 case TargetOpcode::G_LSHR: 1107 case TargetOpcode::G_SHL: { 1108 TestReg = MI->getOperand(1).getReg(); 1109 auto VRegAndVal = 1110 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 1111 if (VRegAndVal) 1112 C = VRegAndVal->Value; 1113 break; 1114 } 1115 } 1116 1117 // Didn't find a constant or viable register. Bail out of the loop. 1118 if (!C || !TestReg.isValid()) 1119 break; 1120 1121 // We found a suitable instruction with a constant. Check to see if we can 1122 // walk through the instruction. 1123 Register NextReg; 1124 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits(); 1125 switch (Opc) { 1126 default: 1127 break; 1128 case TargetOpcode::G_AND: 1129 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. 1130 if ((*C >> Bit) & 1) 1131 NextReg = TestReg; 1132 break; 1133 case TargetOpcode::G_SHL: 1134 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in 1135 // the type of the register. 1136 if (*C <= Bit && (Bit - *C) < TestRegSize) { 1137 NextReg = TestReg; 1138 Bit = Bit - *C; 1139 } 1140 break; 1141 case TargetOpcode::G_ASHR: 1142 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits 1143 // in x 1144 NextReg = TestReg; 1145 Bit = Bit + *C; 1146 if (Bit >= TestRegSize) 1147 Bit = TestRegSize - 1; 1148 break; 1149 case TargetOpcode::G_LSHR: 1150 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x 1151 if ((Bit + *C) < TestRegSize) { 1152 NextReg = TestReg; 1153 Bit = Bit + *C; 1154 } 1155 break; 1156 case TargetOpcode::G_XOR: 1157 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when 1158 // appropriate. 1159 // 1160 // e.g. If x' = xor x, c, and the b-th bit is set in c then 1161 // 1162 // tbz x', b -> tbnz x, b 1163 // 1164 // Because x' only has the b-th bit set if x does not. 1165 if ((*C >> Bit) & 1) 1166 Invert = !Invert; 1167 NextReg = TestReg; 1168 break; 1169 } 1170 1171 // Check if we found anything worth folding. 1172 if (!NextReg.isValid()) 1173 return Reg; 1174 Reg = NextReg; 1175 } 1176 1177 return Reg; 1178 } 1179 1180 MachineInstr *AArch64InstructionSelector::emitTestBit( 1181 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, 1182 MachineIRBuilder &MIB) const { 1183 assert(TestReg.isValid()); 1184 assert(ProduceNonFlagSettingCondBr && 1185 "Cannot emit TB(N)Z with speculation tracking!"); 1186 MachineRegisterInfo &MRI = *MIB.getMRI(); 1187 1188 // Attempt to optimize the test bit by walking over instructions. 1189 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI); 1190 LLT Ty = MRI.getType(TestReg); 1191 unsigned Size = Ty.getSizeInBits(); 1192 assert(!Ty.isVector() && "Expected a scalar!"); 1193 assert(Bit < 64 && "Bit is too large!"); 1194 1195 // When the test register is a 64-bit register, we have to narrow to make 1196 // TBNZW work. 1197 bool UseWReg = Bit < 32; 1198 unsigned NecessarySize = UseWReg ? 32 : 64; 1199 if (Size != NecessarySize) 1200 TestReg = moveScalarRegClass( 1201 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, 1202 MIB); 1203 1204 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, 1205 {AArch64::TBZW, AArch64::TBNZW}}; 1206 unsigned Opc = OpcTable[UseWReg][IsNegative]; 1207 auto TestBitMI = 1208 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); 1209 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); 1210 return &*TestBitMI; 1211 } 1212 1213 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( 1214 MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred, 1215 MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const { 1216 // Given something like this: 1217 // 1218 // %x = ...Something... 1219 // %one = G_CONSTANT i64 1 1220 // %zero = G_CONSTANT i64 0 1221 // %and = G_AND %x, %one 1222 // %cmp = G_ICMP intpred(ne), %and, %zero 1223 // %cmp_trunc = G_TRUNC %cmp 1224 // G_BRCOND %cmp_trunc, %bb.3 1225 // 1226 // We want to try and fold the AND into the G_BRCOND and produce either a 1227 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). 1228 // 1229 // In this case, we'd get 1230 // 1231 // TBNZ %x %bb.3 1232 // 1233 if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND) 1234 return false; 1235 1236 // Need to be comparing against 0 to fold. 1237 if (CmpConstant != 0) 1238 return false; 1239 1240 MachineRegisterInfo &MRI = *MIB.getMRI(); 1241 1242 // Only support EQ and NE. If we have LT, then it *is* possible to fold, but 1243 // we don't want to do this. When we have an AND and LT, we need a TST/ANDS, 1244 // so folding would be redundant. 1245 if (Pred != CmpInst::Predicate::ICMP_EQ && 1246 Pred != CmpInst::Predicate::ICMP_NE) 1247 return false; 1248 1249 // Check if the AND has a constant on its RHS which we can use as a mask. 1250 // If it's a power of 2, then it's the same as checking a specific bit. 1251 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) 1252 auto MaybeBit = 1253 getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI); 1254 if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value)) 1255 return false; 1256 1257 uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value)); 1258 Register TestReg = AndInst->getOperand(1).getReg(); 1259 bool Invert = Pred == CmpInst::Predicate::ICMP_NE; 1260 1261 // Emit a TB(N)Z. 1262 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB); 1263 return true; 1264 } 1265 1266 bool AArch64InstructionSelector::selectCompareBranch( 1267 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1268 1269 const Register CondReg = I.getOperand(0).getReg(); 1270 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1271 MachineInstr *CCMI = MRI.getVRegDef(CondReg); 1272 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) 1273 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg()); 1274 if (CCMI->getOpcode() != TargetOpcode::G_ICMP) 1275 return false; 1276 1277 Register LHS = CCMI->getOperand(2).getReg(); 1278 Register RHS = CCMI->getOperand(3).getReg(); 1279 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 1280 MachineIRBuilder MIB(I); 1281 CmpInst::Predicate Pred = 1282 (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); 1283 MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI); 1284 1285 // When we can emit a TB(N)Z, prefer that. 1286 // 1287 // Handle non-commutative condition codes first. 1288 // Note that we don't want to do this when we have a G_AND because it can 1289 // become a tst. The tst will make the test bit in the TB(N)Z redundant. 1290 if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) { 1291 int64_t C = VRegAndVal->Value; 1292 1293 // When we have a greater-than comparison, we can just test if the msb is 1294 // zero. 1295 if (C == -1 && Pred == CmpInst::ICMP_SGT) { 1296 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1297 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); 1298 I.eraseFromParent(); 1299 return true; 1300 } 1301 1302 // When we have a less than comparison, we can just test if the msb is not 1303 // zero. 1304 if (C == 0 && Pred == CmpInst::ICMP_SLT) { 1305 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1306 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB); 1307 I.eraseFromParent(); 1308 return true; 1309 } 1310 } 1311 1312 if (!VRegAndVal) { 1313 std::swap(RHS, LHS); 1314 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 1315 LHSMI = getDefIgnoringCopies(LHS, MRI); 1316 } 1317 1318 if (!VRegAndVal || VRegAndVal->Value != 0) { 1319 // If we can't select a CBZ then emit a cmp + Bcc. 1320 MachineInstr *Cmp; 1321 std::tie(Cmp, Pred) = emitIntegerCompare( 1322 CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB); 1323 if (!Cmp) 1324 return false; 1325 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred); 1326 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); 1327 I.eraseFromParent(); 1328 return true; 1329 } 1330 1331 // Try to emit a TB(N)Z for an eq or ne condition. 1332 if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB, 1333 MIB)) { 1334 I.eraseFromParent(); 1335 return true; 1336 } 1337 1338 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI); 1339 if (RB.getID() != AArch64::GPRRegBankID) 1340 return false; 1341 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ) 1342 return false; 1343 1344 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits(); 1345 unsigned CBOpc = 0; 1346 if (CmpWidth <= 32) 1347 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW); 1348 else if (CmpWidth == 64) 1349 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX); 1350 else 1351 return false; 1352 1353 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc)) 1354 .addUse(LHS) 1355 .addMBB(DestMBB) 1356 .constrainAllUses(TII, TRI, RBI); 1357 1358 I.eraseFromParent(); 1359 return true; 1360 } 1361 1362 /// Returns the element immediate value of a vector shift operand if found. 1363 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. 1364 static Optional<int64_t> getVectorShiftImm(Register Reg, 1365 MachineRegisterInfo &MRI) { 1366 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"); 1367 MachineInstr *OpMI = MRI.getVRegDef(Reg); 1368 assert(OpMI && "Expected to find a vreg def for vector shift operand"); 1369 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR) 1370 return None; 1371 1372 // Check all operands are identical immediates. 1373 int64_t ImmVal = 0; 1374 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) { 1375 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI); 1376 if (!VRegAndVal) 1377 return None; 1378 1379 if (Idx == 1) 1380 ImmVal = VRegAndVal->Value; 1381 if (ImmVal != VRegAndVal->Value) 1382 return None; 1383 } 1384 1385 return ImmVal; 1386 } 1387 1388 /// Matches and returns the shift immediate value for a SHL instruction given 1389 /// a shift operand. 1390 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { 1391 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); 1392 if (!ShiftImm) 1393 return None; 1394 // Check the immediate is in range for a SHL. 1395 int64_t Imm = *ShiftImm; 1396 if (Imm < 0) 1397 return None; 1398 switch (SrcTy.getElementType().getSizeInBits()) { 1399 default: 1400 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift"); 1401 return None; 1402 case 8: 1403 if (Imm > 7) 1404 return None; 1405 break; 1406 case 16: 1407 if (Imm > 15) 1408 return None; 1409 break; 1410 case 32: 1411 if (Imm > 31) 1412 return None; 1413 break; 1414 case 64: 1415 if (Imm > 63) 1416 return None; 1417 break; 1418 } 1419 return Imm; 1420 } 1421 1422 bool AArch64InstructionSelector::selectVectorSHL( 1423 MachineInstr &I, MachineRegisterInfo &MRI) const { 1424 assert(I.getOpcode() == TargetOpcode::G_SHL); 1425 Register DstReg = I.getOperand(0).getReg(); 1426 const LLT Ty = MRI.getType(DstReg); 1427 Register Src1Reg = I.getOperand(1).getReg(); 1428 Register Src2Reg = I.getOperand(2).getReg(); 1429 1430 if (!Ty.isVector()) 1431 return false; 1432 1433 // Check if we have a vector of constants on RHS that we can select as the 1434 // immediate form. 1435 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); 1436 1437 unsigned Opc = 0; 1438 if (Ty == LLT::vector(2, 64)) { 1439 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; 1440 } else if (Ty == LLT::vector(4, 32)) { 1441 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; 1442 } else if (Ty == LLT::vector(2, 32)) { 1443 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; 1444 } else { 1445 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); 1446 return false; 1447 } 1448 1449 MachineIRBuilder MIB(I); 1450 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); 1451 if (ImmVal) 1452 Shl.addImm(*ImmVal); 1453 else 1454 Shl.addUse(Src2Reg); 1455 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); 1456 I.eraseFromParent(); 1457 return true; 1458 } 1459 1460 bool AArch64InstructionSelector::selectVectorASHR( 1461 MachineInstr &I, MachineRegisterInfo &MRI) const { 1462 assert(I.getOpcode() == TargetOpcode::G_ASHR); 1463 Register DstReg = I.getOperand(0).getReg(); 1464 const LLT Ty = MRI.getType(DstReg); 1465 Register Src1Reg = I.getOperand(1).getReg(); 1466 Register Src2Reg = I.getOperand(2).getReg(); 1467 1468 if (!Ty.isVector()) 1469 return false; 1470 1471 // There is not a shift right register instruction, but the shift left 1472 // register instruction takes a signed value, where negative numbers specify a 1473 // right shift. 1474 1475 unsigned Opc = 0; 1476 unsigned NegOpc = 0; 1477 const TargetRegisterClass *RC = nullptr; 1478 if (Ty == LLT::vector(2, 64)) { 1479 Opc = AArch64::SSHLv2i64; 1480 NegOpc = AArch64::NEGv2i64; 1481 RC = &AArch64::FPR128RegClass; 1482 } else if (Ty == LLT::vector(4, 32)) { 1483 Opc = AArch64::SSHLv4i32; 1484 NegOpc = AArch64::NEGv4i32; 1485 RC = &AArch64::FPR128RegClass; 1486 } else if (Ty == LLT::vector(2, 32)) { 1487 Opc = AArch64::SSHLv2i32; 1488 NegOpc = AArch64::NEGv2i32; 1489 RC = &AArch64::FPR64RegClass; 1490 } else { 1491 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); 1492 return false; 1493 } 1494 1495 MachineIRBuilder MIB(I); 1496 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); 1497 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); 1498 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); 1499 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); 1500 I.eraseFromParent(); 1501 return true; 1502 } 1503 1504 bool AArch64InstructionSelector::selectVaStartAAPCS( 1505 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1506 return false; 1507 } 1508 1509 bool AArch64InstructionSelector::selectVaStartDarwin( 1510 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1511 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 1512 Register ListReg = I.getOperand(0).getReg(); 1513 1514 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1515 1516 auto MIB = 1517 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) 1518 .addDef(ArgsAddrReg) 1519 .addFrameIndex(FuncInfo->getVarArgsStackIndex()) 1520 .addImm(0) 1521 .addImm(0); 1522 1523 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1524 1525 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) 1526 .addUse(ArgsAddrReg) 1527 .addUse(ListReg) 1528 .addImm(0) 1529 .addMemOperand(*I.memoperands_begin()); 1530 1531 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1532 I.eraseFromParent(); 1533 return true; 1534 } 1535 1536 void AArch64InstructionSelector::materializeLargeCMVal( 1537 MachineInstr &I, const Value *V, unsigned OpFlags) const { 1538 MachineBasicBlock &MBB = *I.getParent(); 1539 MachineFunction &MF = *MBB.getParent(); 1540 MachineRegisterInfo &MRI = MF.getRegInfo(); 1541 MachineIRBuilder MIB(I); 1542 1543 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); 1544 MovZ->addOperand(MF, I.getOperand(1)); 1545 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | 1546 AArch64II::MO_NC); 1547 MovZ->addOperand(MF, MachineOperand::CreateImm(0)); 1548 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); 1549 1550 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, 1551 Register ForceDstReg) { 1552 Register DstReg = ForceDstReg 1553 ? ForceDstReg 1554 : MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1555 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); 1556 if (auto *GV = dyn_cast<GlobalValue>(V)) { 1557 MovI->addOperand(MF, MachineOperand::CreateGA( 1558 GV, MovZ->getOperand(1).getOffset(), Flags)); 1559 } else { 1560 MovI->addOperand( 1561 MF, MachineOperand::CreateBA(cast<BlockAddress>(V), 1562 MovZ->getOperand(1).getOffset(), Flags)); 1563 } 1564 MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); 1565 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); 1566 return DstReg; 1567 }; 1568 Register DstReg = BuildMovK(MovZ.getReg(0), 1569 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); 1570 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); 1571 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); 1572 return; 1573 } 1574 1575 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { 1576 MachineBasicBlock &MBB = *I.getParent(); 1577 MachineFunction &MF = *MBB.getParent(); 1578 MachineRegisterInfo &MRI = MF.getRegInfo(); 1579 1580 switch (I.getOpcode()) { 1581 case TargetOpcode::G_SHL: 1582 case TargetOpcode::G_ASHR: 1583 case TargetOpcode::G_LSHR: { 1584 // These shifts are legalized to have 64 bit shift amounts because we want 1585 // to take advantage of the existing imported selection patterns that assume 1586 // the immediates are s64s. However, if the shifted type is 32 bits and for 1587 // some reason we receive input GMIR that has an s64 shift amount that's not 1588 // a G_CONSTANT, insert a truncate so that we can still select the s32 1589 // register-register variant. 1590 Register SrcReg = I.getOperand(1).getReg(); 1591 Register ShiftReg = I.getOperand(2).getReg(); 1592 const LLT ShiftTy = MRI.getType(ShiftReg); 1593 const LLT SrcTy = MRI.getType(SrcReg); 1594 if (SrcTy.isVector()) 1595 return false; 1596 assert(!ShiftTy.isVector() && "unexpected vector shift ty"); 1597 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64) 1598 return false; 1599 auto *AmtMI = MRI.getVRegDef(ShiftReg); 1600 assert(AmtMI && "could not find a vreg definition for shift amount"); 1601 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) { 1602 // Insert a subregister copy to implement a 64->32 trunc 1603 MachineIRBuilder MIB(I); 1604 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) 1605 .addReg(ShiftReg, 0, AArch64::sub_32); 1606 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 1607 I.getOperand(2).setReg(Trunc.getReg(0)); 1608 } 1609 return true; 1610 } 1611 case TargetOpcode::G_STORE: 1612 return contractCrossBankCopyIntoStore(I, MRI); 1613 case TargetOpcode::G_PTR_ADD: 1614 return convertPtrAddToAdd(I, MRI); 1615 case TargetOpcode::G_LOAD: { 1616 // For scalar loads of pointers, we try to convert the dest type from p0 1617 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD 1618 // conversion, this should be ok because all users should have been 1619 // selected already, so the type doesn't matter for them. 1620 Register DstReg = I.getOperand(0).getReg(); 1621 const LLT DstTy = MRI.getType(DstReg); 1622 if (!DstTy.isPointer()) 1623 return false; 1624 MRI.setType(DstReg, LLT::scalar(64)); 1625 return true; 1626 } 1627 default: 1628 return false; 1629 } 1630 } 1631 1632 /// This lowering tries to look for G_PTR_ADD instructions and then converts 1633 /// them to a standard G_ADD with a COPY on the source. 1634 /// 1635 /// The motivation behind this is to expose the add semantics to the imported 1636 /// tablegen patterns. We shouldn't need to check for uses being loads/stores, 1637 /// because the selector works bottom up, uses before defs. By the time we 1638 /// end up trying to select a G_PTR_ADD, we should have already attempted to 1639 /// fold this into addressing modes and were therefore unsuccessful. 1640 bool AArch64InstructionSelector::convertPtrAddToAdd( 1641 MachineInstr &I, MachineRegisterInfo &MRI) { 1642 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 1643 Register DstReg = I.getOperand(0).getReg(); 1644 Register AddOp1Reg = I.getOperand(1).getReg(); 1645 const LLT PtrTy = MRI.getType(DstReg); 1646 if (PtrTy.getAddressSpace() != 0) 1647 return false; 1648 1649 MachineIRBuilder MIB(I); 1650 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64); 1651 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg); 1652 // Set regbanks on the registers. 1653 if (PtrTy.isVector()) 1654 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); 1655 else 1656 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 1657 1658 // Now turn the %dst(p0) = G_PTR_ADD %base, off into: 1659 // %dst(intty) = G_ADD %intbase, off 1660 I.setDesc(TII.get(TargetOpcode::G_ADD)); 1661 MRI.setType(DstReg, CastPtrTy); 1662 I.getOperand(1).setReg(PtrToInt.getReg(0)); 1663 if (!select(*PtrToInt)) { 1664 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"); 1665 return false; 1666 } 1667 return true; 1668 } 1669 1670 bool AArch64InstructionSelector::earlySelectSHL( 1671 MachineInstr &I, MachineRegisterInfo &MRI) const { 1672 // We try to match the immediate variant of LSL, which is actually an alias 1673 // for a special case of UBFM. Otherwise, we fall back to the imported 1674 // selector which will match the register variant. 1675 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"); 1676 const auto &MO = I.getOperand(2); 1677 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI); 1678 if (!VRegAndVal) 1679 return false; 1680 1681 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1682 if (DstTy.isVector()) 1683 return false; 1684 bool Is64Bit = DstTy.getSizeInBits() == 64; 1685 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); 1686 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); 1687 MachineIRBuilder MIB(I); 1688 1689 if (!Imm1Fn || !Imm2Fn) 1690 return false; 1691 1692 auto NewI = 1693 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, 1694 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); 1695 1696 for (auto &RenderFn : *Imm1Fn) 1697 RenderFn(NewI); 1698 for (auto &RenderFn : *Imm2Fn) 1699 RenderFn(NewI); 1700 1701 I.eraseFromParent(); 1702 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 1703 } 1704 1705 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( 1706 MachineInstr &I, MachineRegisterInfo &MRI) { 1707 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); 1708 // If we're storing a scalar, it doesn't matter what register bank that 1709 // scalar is on. All that matters is the size. 1710 // 1711 // So, if we see something like this (with a 32-bit scalar as an example): 1712 // 1713 // %x:gpr(s32) = ... something ... 1714 // %y:fpr(s32) = COPY %x:gpr(s32) 1715 // G_STORE %y:fpr(s32) 1716 // 1717 // We can fix this up into something like this: 1718 // 1719 // G_STORE %x:gpr(s32) 1720 // 1721 // And then continue the selection process normally. 1722 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); 1723 if (!DefDstReg.isValid()) 1724 return false; 1725 LLT DefDstTy = MRI.getType(DefDstReg); 1726 Register StoreSrcReg = I.getOperand(0).getReg(); 1727 LLT StoreSrcTy = MRI.getType(StoreSrcReg); 1728 1729 // If we get something strange like a physical register, then we shouldn't 1730 // go any further. 1731 if (!DefDstTy.isValid()) 1732 return false; 1733 1734 // Are the source and dst types the same size? 1735 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) 1736 return false; 1737 1738 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == 1739 RBI.getRegBank(DefDstReg, MRI, TRI)) 1740 return false; 1741 1742 // We have a cross-bank copy, which is entering a store. Let's fold it. 1743 I.getOperand(0).setReg(DefDstReg); 1744 return true; 1745 } 1746 1747 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { 1748 assert(I.getParent() && "Instruction should be in a basic block!"); 1749 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 1750 1751 MachineBasicBlock &MBB = *I.getParent(); 1752 MachineFunction &MF = *MBB.getParent(); 1753 MachineRegisterInfo &MRI = MF.getRegInfo(); 1754 1755 switch (I.getOpcode()) { 1756 case TargetOpcode::G_SHL: 1757 return earlySelectSHL(I, MRI); 1758 case TargetOpcode::G_CONSTANT: { 1759 bool IsZero = false; 1760 if (I.getOperand(1).isCImm()) 1761 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; 1762 else if (I.getOperand(1).isImm()) 1763 IsZero = I.getOperand(1).getImm() == 0; 1764 1765 if (!IsZero) 1766 return false; 1767 1768 Register DefReg = I.getOperand(0).getReg(); 1769 LLT Ty = MRI.getType(DefReg); 1770 if (Ty.getSizeInBits() == 64) { 1771 I.getOperand(1).ChangeToRegister(AArch64::XZR, false); 1772 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); 1773 } else if (Ty.getSizeInBits() == 32) { 1774 I.getOperand(1).ChangeToRegister(AArch64::WZR, false); 1775 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); 1776 } else 1777 return false; 1778 1779 I.setDesc(TII.get(TargetOpcode::COPY)); 1780 return true; 1781 } 1782 default: 1783 return false; 1784 } 1785 } 1786 1787 bool AArch64InstructionSelector::select(MachineInstr &I) { 1788 assert(I.getParent() && "Instruction should be in a basic block!"); 1789 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 1790 1791 MachineBasicBlock &MBB = *I.getParent(); 1792 MachineFunction &MF = *MBB.getParent(); 1793 MachineRegisterInfo &MRI = MF.getRegInfo(); 1794 1795 const AArch64Subtarget *Subtarget = 1796 &static_cast<const AArch64Subtarget &>(MF.getSubtarget()); 1797 if (Subtarget->requiresStrictAlign()) { 1798 // We don't support this feature yet. 1799 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n"); 1800 return false; 1801 } 1802 1803 unsigned Opcode = I.getOpcode(); 1804 // G_PHI requires same handling as PHI 1805 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { 1806 // Certain non-generic instructions also need some special handling. 1807 1808 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 1809 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1810 1811 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { 1812 const Register DefReg = I.getOperand(0).getReg(); 1813 const LLT DefTy = MRI.getType(DefReg); 1814 1815 const RegClassOrRegBank &RegClassOrBank = 1816 MRI.getRegClassOrRegBank(DefReg); 1817 1818 const TargetRegisterClass *DefRC 1819 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 1820 if (!DefRC) { 1821 if (!DefTy.isValid()) { 1822 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 1823 return false; 1824 } 1825 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 1826 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); 1827 if (!DefRC) { 1828 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 1829 return false; 1830 } 1831 } 1832 1833 I.setDesc(TII.get(TargetOpcode::PHI)); 1834 1835 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 1836 } 1837 1838 if (I.isCopy()) 1839 return selectCopy(I, TII, MRI, TRI, RBI); 1840 1841 return true; 1842 } 1843 1844 1845 if (I.getNumOperands() != I.getNumExplicitOperands()) { 1846 LLVM_DEBUG( 1847 dbgs() << "Generic instruction has unexpected implicit operands\n"); 1848 return false; 1849 } 1850 1851 // Try to do some lowering before we start instruction selecting. These 1852 // lowerings are purely transformations on the input G_MIR and so selection 1853 // must continue after any modification of the instruction. 1854 if (preISelLower(I)) { 1855 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. 1856 } 1857 1858 // There may be patterns where the importer can't deal with them optimally, 1859 // but does select it to a suboptimal sequence so our custom C++ selection 1860 // code later never has a chance to work on it. Therefore, we have an early 1861 // selection attempt here to give priority to certain selection routines 1862 // over the imported ones. 1863 if (earlySelect(I)) 1864 return true; 1865 1866 if (selectImpl(I, *CoverageInfo)) 1867 return true; 1868 1869 LLT Ty = 1870 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; 1871 1872 MachineIRBuilder MIB(I); 1873 1874 switch (Opcode) { 1875 case TargetOpcode::G_BRCOND: { 1876 if (Ty.getSizeInBits() > 32) { 1877 // We shouldn't need this on AArch64, but it would be implemented as an 1878 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the 1879 // bit being tested is < 32. 1880 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty 1881 << ", expected at most 32-bits"); 1882 return false; 1883 } 1884 1885 const Register CondReg = I.getOperand(0).getReg(); 1886 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1887 1888 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1889 // instructions will not be produced, as they are conditional branch 1890 // instructions that do not set flags. 1891 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI)) 1892 return true; 1893 1894 if (ProduceNonFlagSettingCondBr) { 1895 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW)) 1896 .addUse(CondReg) 1897 .addImm(/*bit offset=*/0) 1898 .addMBB(DestMBB); 1899 1900 I.eraseFromParent(); 1901 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); 1902 } else { 1903 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) 1904 .addDef(AArch64::WZR) 1905 .addUse(CondReg) 1906 .addImm(1); 1907 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI); 1908 auto Bcc = 1909 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc)) 1910 .addImm(AArch64CC::EQ) 1911 .addMBB(DestMBB); 1912 1913 I.eraseFromParent(); 1914 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI); 1915 } 1916 } 1917 1918 case TargetOpcode::G_BRINDIRECT: { 1919 I.setDesc(TII.get(AArch64::BR)); 1920 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1921 } 1922 1923 case TargetOpcode::G_BRJT: 1924 return selectBrJT(I, MRI); 1925 1926 case AArch64::G_ADD_LOW: { 1927 // This op may have been separated from it's ADRP companion by the localizer 1928 // or some other code motion pass. Given that many CPUs will try to 1929 // macro fuse these operations anyway, select this into a MOVaddr pseudo 1930 // which will later be expanded into an ADRP+ADD pair after scheduling. 1931 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg()); 1932 if (BaseMI->getOpcode() != AArch64::ADRP) { 1933 I.setDesc(TII.get(AArch64::ADDXri)); 1934 I.addOperand(MachineOperand::CreateImm(0)); 1935 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1936 } 1937 assert(TM.getCodeModel() == CodeModel::Small && 1938 "Expected small code model"); 1939 MachineIRBuilder MIB(I); 1940 auto Op1 = BaseMI->getOperand(1); 1941 auto Op2 = I.getOperand(2); 1942 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) 1943 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), 1944 Op1.getTargetFlags()) 1945 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), 1946 Op2.getTargetFlags()); 1947 I.eraseFromParent(); 1948 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); 1949 } 1950 1951 case TargetOpcode::G_BSWAP: { 1952 // Handle vector types for G_BSWAP directly. 1953 Register DstReg = I.getOperand(0).getReg(); 1954 LLT DstTy = MRI.getType(DstReg); 1955 1956 // We should only get vector types here; everything else is handled by the 1957 // importer right now. 1958 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { 1959 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n"); 1960 return false; 1961 } 1962 1963 // Only handle 4 and 2 element vectors for now. 1964 // TODO: 16-bit elements. 1965 unsigned NumElts = DstTy.getNumElements(); 1966 if (NumElts != 4 && NumElts != 2) { 1967 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n"); 1968 return false; 1969 } 1970 1971 // Choose the correct opcode for the supported types. Right now, that's 1972 // v2s32, v4s32, and v2s64. 1973 unsigned Opc = 0; 1974 unsigned EltSize = DstTy.getElementType().getSizeInBits(); 1975 if (EltSize == 32) 1976 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 1977 : AArch64::REV32v16i8; 1978 else if (EltSize == 64) 1979 Opc = AArch64::REV64v16i8; 1980 1981 // We should always get something by the time we get here... 1982 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?"); 1983 1984 I.setDesc(TII.get(Opc)); 1985 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1986 } 1987 1988 case TargetOpcode::G_FCONSTANT: 1989 case TargetOpcode::G_CONSTANT: { 1990 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; 1991 1992 const LLT s8 = LLT::scalar(8); 1993 const LLT s16 = LLT::scalar(16); 1994 const LLT s32 = LLT::scalar(32); 1995 const LLT s64 = LLT::scalar(64); 1996 const LLT p0 = LLT::pointer(0, 64); 1997 1998 const Register DefReg = I.getOperand(0).getReg(); 1999 const LLT DefTy = MRI.getType(DefReg); 2000 const unsigned DefSize = DefTy.getSizeInBits(); 2001 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2002 2003 // FIXME: Redundant check, but even less readable when factored out. 2004 if (isFP) { 2005 if (Ty != s32 && Ty != s64) { 2006 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2007 << " constant, expected: " << s32 << " or " << s64 2008 << '\n'); 2009 return false; 2010 } 2011 2012 if (RB.getID() != AArch64::FPRRegBankID) { 2013 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2014 << " constant on bank: " << RB 2015 << ", expected: FPR\n"); 2016 return false; 2017 } 2018 2019 // The case when we have 0.0 is covered by tablegen. Reject it here so we 2020 // can be sure tablegen works correctly and isn't rescued by this code. 2021 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0)) 2022 return false; 2023 } else { 2024 // s32 and s64 are covered by tablegen. 2025 if (Ty != p0 && Ty != s8 && Ty != s16) { 2026 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2027 << " constant, expected: " << s32 << ", " << s64 2028 << ", or " << p0 << '\n'); 2029 return false; 2030 } 2031 2032 if (RB.getID() != AArch64::GPRRegBankID) { 2033 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2034 << " constant on bank: " << RB 2035 << ", expected: GPR\n"); 2036 return false; 2037 } 2038 } 2039 2040 // We allow G_CONSTANT of types < 32b. 2041 const unsigned MovOpc = 2042 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; 2043 2044 if (isFP) { 2045 // Either emit a FMOV, or emit a copy to emit a normal mov. 2046 const TargetRegisterClass &GPRRC = 2047 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass; 2048 const TargetRegisterClass &FPRRC = 2049 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass; 2050 2051 // Can we use a FMOV instruction to represent the immediate? 2052 if (emitFMovForFConstant(I, MRI)) 2053 return true; 2054 2055 // For 64b values, emit a constant pool load instead. 2056 if (DefSize == 64) { 2057 auto *FPImm = I.getOperand(1).getFPImm(); 2058 MachineIRBuilder MIB(I); 2059 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB); 2060 if (!LoadMI) { 2061 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n"); 2062 return false; 2063 } 2064 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()}); 2065 I.eraseFromParent(); 2066 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI); 2067 } 2068 2069 // Nope. Emit a copy and use a normal mov instead. 2070 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC); 2071 MachineOperand &RegOp = I.getOperand(0); 2072 RegOp.setReg(DefGPRReg); 2073 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2074 MIB.buildCopy({DefReg}, {DefGPRReg}); 2075 2076 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { 2077 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n"); 2078 return false; 2079 } 2080 2081 MachineOperand &ImmOp = I.getOperand(1); 2082 // FIXME: Is going through int64_t always correct? 2083 ImmOp.ChangeToImmediate( 2084 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); 2085 } else if (I.getOperand(1).isCImm()) { 2086 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); 2087 I.getOperand(1).ChangeToImmediate(Val); 2088 } else if (I.getOperand(1).isImm()) { 2089 uint64_t Val = I.getOperand(1).getImm(); 2090 I.getOperand(1).ChangeToImmediate(Val); 2091 } 2092 2093 I.setDesc(TII.get(MovOpc)); 2094 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2095 return true; 2096 } 2097 case TargetOpcode::G_EXTRACT: { 2098 Register DstReg = I.getOperand(0).getReg(); 2099 Register SrcReg = I.getOperand(1).getReg(); 2100 LLT SrcTy = MRI.getType(SrcReg); 2101 LLT DstTy = MRI.getType(DstReg); 2102 (void)DstTy; 2103 unsigned SrcSize = SrcTy.getSizeInBits(); 2104 2105 if (SrcTy.getSizeInBits() > 64) { 2106 // This should be an extract of an s128, which is like a vector extract. 2107 if (SrcTy.getSizeInBits() != 128) 2108 return false; 2109 // Only support extracting 64 bits from an s128 at the moment. 2110 if (DstTy.getSizeInBits() != 64) 2111 return false; 2112 2113 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2114 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2115 // Check we have the right regbank always. 2116 assert(SrcRB.getID() == AArch64::FPRRegBankID && 2117 DstRB.getID() == AArch64::FPRRegBankID && 2118 "Wrong extract regbank!"); 2119 (void)SrcRB; 2120 2121 // Emit the same code as a vector extract. 2122 // Offset must be a multiple of 64. 2123 unsigned Offset = I.getOperand(2).getImm(); 2124 if (Offset % 64 != 0) 2125 return false; 2126 unsigned LaneIdx = Offset / 64; 2127 MachineIRBuilder MIB(I); 2128 MachineInstr *Extract = emitExtractVectorElt( 2129 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); 2130 if (!Extract) 2131 return false; 2132 I.eraseFromParent(); 2133 return true; 2134 } 2135 2136 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); 2137 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + 2138 Ty.getSizeInBits() - 1); 2139 2140 if (SrcSize < 64) { 2141 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && 2142 "unexpected G_EXTRACT types"); 2143 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2144 } 2145 2146 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2147 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2148 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 2149 .addReg(DstReg, 0, AArch64::sub_32); 2150 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 2151 AArch64::GPR32RegClass, MRI); 2152 I.getOperand(0).setReg(DstReg); 2153 2154 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2155 } 2156 2157 case TargetOpcode::G_INSERT: { 2158 LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); 2159 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2160 unsigned DstSize = DstTy.getSizeInBits(); 2161 // Larger inserts are vectors, same-size ones should be something else by 2162 // now (split up or turned into COPYs). 2163 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) 2164 return false; 2165 2166 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); 2167 unsigned LSB = I.getOperand(3).getImm(); 2168 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 2169 I.getOperand(3).setImm((DstSize - LSB) % DstSize); 2170 MachineInstrBuilder(MF, I).addImm(Width - 1); 2171 2172 if (DstSize < 64) { 2173 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && 2174 "unexpected G_INSERT types"); 2175 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2176 } 2177 2178 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2179 BuildMI(MBB, I.getIterator(), I.getDebugLoc(), 2180 TII.get(AArch64::SUBREG_TO_REG)) 2181 .addDef(SrcReg) 2182 .addImm(0) 2183 .addUse(I.getOperand(2).getReg()) 2184 .addImm(AArch64::sub_32); 2185 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 2186 AArch64::GPR32RegClass, MRI); 2187 I.getOperand(2).setReg(SrcReg); 2188 2189 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2190 } 2191 case TargetOpcode::G_FRAME_INDEX: { 2192 // allocas and G_FRAME_INDEX are only supported in addrspace(0). 2193 if (Ty != LLT::pointer(0, 64)) { 2194 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty 2195 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2196 return false; 2197 } 2198 I.setDesc(TII.get(AArch64::ADDXri)); 2199 2200 // MOs for a #0 shifted immediate. 2201 I.addOperand(MachineOperand::CreateImm(0)); 2202 I.addOperand(MachineOperand::CreateImm(0)); 2203 2204 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2205 } 2206 2207 case TargetOpcode::G_GLOBAL_VALUE: { 2208 auto GV = I.getOperand(1).getGlobal(); 2209 if (GV->isThreadLocal()) 2210 return selectTLSGlobalValue(I, MRI); 2211 2212 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); 2213 if (OpFlags & AArch64II::MO_GOT) { 2214 I.setDesc(TII.get(AArch64::LOADgot)); 2215 I.getOperand(1).setTargetFlags(OpFlags); 2216 } else if (TM.getCodeModel() == CodeModel::Large) { 2217 // Materialize the global using movz/movk instructions. 2218 materializeLargeCMVal(I, GV, OpFlags); 2219 I.eraseFromParent(); 2220 return true; 2221 } else if (TM.getCodeModel() == CodeModel::Tiny) { 2222 I.setDesc(TII.get(AArch64::ADR)); 2223 I.getOperand(1).setTargetFlags(OpFlags); 2224 } else { 2225 I.setDesc(TII.get(AArch64::MOVaddr)); 2226 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); 2227 MachineInstrBuilder MIB(MF, I); 2228 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), 2229 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 2230 } 2231 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2232 } 2233 2234 case TargetOpcode::G_ZEXTLOAD: 2235 case TargetOpcode::G_LOAD: 2236 case TargetOpcode::G_STORE: { 2237 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; 2238 MachineIRBuilder MIB(I); 2239 2240 LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); 2241 2242 if (PtrTy != LLT::pointer(0, 64)) { 2243 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy 2244 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2245 return false; 2246 } 2247 2248 auto &MemOp = **I.memoperands_begin(); 2249 if (MemOp.isAtomic()) { 2250 // For now we just support s8 acquire loads to be able to compile stack 2251 // protector code. 2252 if (MemOp.getOrdering() == AtomicOrdering::Acquire && 2253 MemOp.getSize() == 1) { 2254 I.setDesc(TII.get(AArch64::LDARB)); 2255 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2256 } 2257 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n"); 2258 return false; 2259 } 2260 unsigned MemSizeInBits = MemOp.getSize() * 8; 2261 2262 const Register PtrReg = I.getOperand(1).getReg(); 2263 #ifndef NDEBUG 2264 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); 2265 // Sanity-check the pointer register. 2266 assert(PtrRB.getID() == AArch64::GPRRegBankID && 2267 "Load/Store pointer operand isn't a GPR"); 2268 assert(MRI.getType(PtrReg).isPointer() && 2269 "Load/Store pointer operand isn't a pointer"); 2270 #endif 2271 2272 const Register ValReg = I.getOperand(0).getReg(); 2273 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); 2274 2275 const unsigned NewOpc = 2276 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); 2277 if (NewOpc == I.getOpcode()) 2278 return false; 2279 2280 I.setDesc(TII.get(NewOpc)); 2281 2282 uint64_t Offset = 0; 2283 auto *PtrMI = MRI.getVRegDef(PtrReg); 2284 2285 // Try to fold a GEP into our unsigned immediate addressing mode. 2286 if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) { 2287 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) { 2288 int64_t Imm = *COff; 2289 const unsigned Size = MemSizeInBits / 8; 2290 const unsigned Scale = Log2_32(Size); 2291 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) { 2292 Register Ptr2Reg = PtrMI->getOperand(1).getReg(); 2293 I.getOperand(1).setReg(Ptr2Reg); 2294 PtrMI = MRI.getVRegDef(Ptr2Reg); 2295 Offset = Imm / Size; 2296 } 2297 } 2298 } 2299 2300 // If we haven't folded anything into our addressing mode yet, try to fold 2301 // a frame index into the base+offset. 2302 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX) 2303 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex()); 2304 2305 I.addOperand(MachineOperand::CreateImm(Offset)); 2306 2307 // If we're storing a 0, use WZR/XZR. 2308 if (auto CVal = getConstantVRegVal(ValReg, MRI)) { 2309 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) { 2310 if (I.getOpcode() == AArch64::STRWui) 2311 I.getOperand(0).setReg(AArch64::WZR); 2312 else if (I.getOpcode() == AArch64::STRXui) 2313 I.getOperand(0).setReg(AArch64::XZR); 2314 } 2315 } 2316 2317 if (IsZExtLoad) { 2318 // The zextload from a smaller type to i32 should be handled by the importer. 2319 if (MRI.getType(ValReg).getSizeInBits() != 64) 2320 return false; 2321 // If we have a ZEXTLOAD then change the load's type to be a narrower reg 2322 //and zero_extend with SUBREG_TO_REG. 2323 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2324 Register DstReg = I.getOperand(0).getReg(); 2325 I.getOperand(0).setReg(LdReg); 2326 2327 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2328 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) 2329 .addImm(0) 2330 .addUse(LdReg) 2331 .addImm(AArch64::sub_32); 2332 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2333 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, 2334 MRI); 2335 } 2336 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2337 } 2338 2339 case TargetOpcode::G_SMULH: 2340 case TargetOpcode::G_UMULH: { 2341 // Reject the various things we don't support yet. 2342 if (unsupportedBinOp(I, RBI, MRI, TRI)) 2343 return false; 2344 2345 const Register DefReg = I.getOperand(0).getReg(); 2346 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2347 2348 if (RB.getID() != AArch64::GPRRegBankID) { 2349 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"); 2350 return false; 2351 } 2352 2353 if (Ty != LLT::scalar(64)) { 2354 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty 2355 << ", expected: " << LLT::scalar(64) << '\n'); 2356 return false; 2357 } 2358 2359 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr 2360 : AArch64::UMULHrr; 2361 I.setDesc(TII.get(NewOpc)); 2362 2363 // Now that we selected an opcode, we need to constrain the register 2364 // operands to use appropriate classes. 2365 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2366 } 2367 case TargetOpcode::G_FADD: 2368 case TargetOpcode::G_FSUB: 2369 case TargetOpcode::G_FMUL: 2370 case TargetOpcode::G_FDIV: 2371 2372 case TargetOpcode::G_ASHR: 2373 if (MRI.getType(I.getOperand(0).getReg()).isVector()) 2374 return selectVectorASHR(I, MRI); 2375 LLVM_FALLTHROUGH; 2376 case TargetOpcode::G_SHL: 2377 if (Opcode == TargetOpcode::G_SHL && 2378 MRI.getType(I.getOperand(0).getReg()).isVector()) 2379 return selectVectorSHL(I, MRI); 2380 LLVM_FALLTHROUGH; 2381 case TargetOpcode::G_OR: 2382 case TargetOpcode::G_LSHR: { 2383 // Reject the various things we don't support yet. 2384 if (unsupportedBinOp(I, RBI, MRI, TRI)) 2385 return false; 2386 2387 const unsigned OpSize = Ty.getSizeInBits(); 2388 2389 const Register DefReg = I.getOperand(0).getReg(); 2390 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2391 2392 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); 2393 if (NewOpc == I.getOpcode()) 2394 return false; 2395 2396 I.setDesc(TII.get(NewOpc)); 2397 // FIXME: Should the type be always reset in setDesc? 2398 2399 // Now that we selected an opcode, we need to constrain the register 2400 // operands to use appropriate classes. 2401 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2402 } 2403 2404 case TargetOpcode::G_PTR_ADD: { 2405 MachineIRBuilder MIRBuilder(I); 2406 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), 2407 MIRBuilder); 2408 I.eraseFromParent(); 2409 return true; 2410 } 2411 case TargetOpcode::G_UADDO: { 2412 // TODO: Support other types. 2413 unsigned OpSize = Ty.getSizeInBits(); 2414 if (OpSize != 32 && OpSize != 64) { 2415 LLVM_DEBUG( 2416 dbgs() 2417 << "G_UADDO currently only supported for 32 and 64 b types.\n"); 2418 return false; 2419 } 2420 2421 // TODO: Support vectors. 2422 if (Ty.isVector()) { 2423 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n"); 2424 return false; 2425 } 2426 2427 // Add and set the set condition flag. 2428 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr; 2429 MachineIRBuilder MIRBuilder(I); 2430 auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)}, 2431 {I.getOperand(2), I.getOperand(3)}); 2432 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI); 2433 2434 // Now, put the overflow result in the register given by the first operand 2435 // to the G_UADDO. CSINC increments the result when the predicate is false, 2436 // so to get the increment when it's true, we need to use the inverse. In 2437 // this case, we want to increment when carry is set. 2438 auto CsetMI = MIRBuilder 2439 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, 2440 {Register(AArch64::WZR), Register(AArch64::WZR)}) 2441 .addImm(getInvertedCondCode(AArch64CC::HS)); 2442 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); 2443 I.eraseFromParent(); 2444 return true; 2445 } 2446 2447 case TargetOpcode::G_PTRMASK: { 2448 Register MaskReg = I.getOperand(2).getReg(); 2449 Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI); 2450 // TODO: Implement arbitrary cases 2451 if (!MaskVal || !isShiftedMask_64(*MaskVal)) 2452 return false; 2453 2454 uint64_t Mask = *MaskVal; 2455 I.setDesc(TII.get(AArch64::ANDXri)); 2456 I.getOperand(2).ChangeToImmediate( 2457 AArch64_AM::encodeLogicalImmediate(Mask, 64)); 2458 2459 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2460 } 2461 case TargetOpcode::G_PTRTOINT: 2462 case TargetOpcode::G_TRUNC: { 2463 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2464 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 2465 2466 const Register DstReg = I.getOperand(0).getReg(); 2467 const Register SrcReg = I.getOperand(1).getReg(); 2468 2469 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2470 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2471 2472 if (DstRB.getID() != SrcRB.getID()) { 2473 LLVM_DEBUG( 2474 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); 2475 return false; 2476 } 2477 2478 if (DstRB.getID() == AArch64::GPRRegBankID) { 2479 const TargetRegisterClass *DstRC = 2480 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 2481 if (!DstRC) 2482 return false; 2483 2484 const TargetRegisterClass *SrcRC = 2485 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); 2486 if (!SrcRC) 2487 return false; 2488 2489 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 2490 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 2491 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); 2492 return false; 2493 } 2494 2495 if (DstRC == SrcRC) { 2496 // Nothing to be done 2497 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && 2498 SrcTy == LLT::scalar(64)) { 2499 llvm_unreachable("TableGen can import this case"); 2500 return false; 2501 } else if (DstRC == &AArch64::GPR32RegClass && 2502 SrcRC == &AArch64::GPR64RegClass) { 2503 I.getOperand(1).setSubReg(AArch64::sub_32); 2504 } else { 2505 LLVM_DEBUG( 2506 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); 2507 return false; 2508 } 2509 2510 I.setDesc(TII.get(TargetOpcode::COPY)); 2511 return true; 2512 } else if (DstRB.getID() == AArch64::FPRRegBankID) { 2513 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) { 2514 I.setDesc(TII.get(AArch64::XTNv4i16)); 2515 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2516 return true; 2517 } 2518 2519 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { 2520 MachineIRBuilder MIB(I); 2521 MachineInstr *Extract = emitExtractVectorElt( 2522 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); 2523 if (!Extract) 2524 return false; 2525 I.eraseFromParent(); 2526 return true; 2527 } 2528 2529 // We might have a vector G_PTRTOINT, in which case just emit a COPY. 2530 if (Opcode == TargetOpcode::G_PTRTOINT) { 2531 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"); 2532 I.setDesc(TII.get(TargetOpcode::COPY)); 2533 return true; 2534 } 2535 } 2536 2537 return false; 2538 } 2539 2540 case TargetOpcode::G_ANYEXT: { 2541 const Register DstReg = I.getOperand(0).getReg(); 2542 const Register SrcReg = I.getOperand(1).getReg(); 2543 2544 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); 2545 if (RBDst.getID() != AArch64::GPRRegBankID) { 2546 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst 2547 << ", expected: GPR\n"); 2548 return false; 2549 } 2550 2551 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); 2552 if (RBSrc.getID() != AArch64::GPRRegBankID) { 2553 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc 2554 << ", expected: GPR\n"); 2555 return false; 2556 } 2557 2558 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); 2559 2560 if (DstSize == 0) { 2561 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"); 2562 return false; 2563 } 2564 2565 if (DstSize != 64 && DstSize > 32) { 2566 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize 2567 << ", expected: 32 or 64\n"); 2568 return false; 2569 } 2570 // At this point G_ANYEXT is just like a plain COPY, but we need 2571 // to explicitly form the 64-bit value if any. 2572 if (DstSize > 32) { 2573 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); 2574 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) 2575 .addDef(ExtSrc) 2576 .addImm(0) 2577 .addUse(SrcReg) 2578 .addImm(AArch64::sub_32); 2579 I.getOperand(1).setReg(ExtSrc); 2580 } 2581 return selectCopy(I, TII, MRI, TRI, RBI); 2582 } 2583 2584 case TargetOpcode::G_ZEXT: 2585 case TargetOpcode::G_SEXT_INREG: 2586 case TargetOpcode::G_SEXT: { 2587 unsigned Opcode = I.getOpcode(); 2588 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; 2589 const Register DefReg = I.getOperand(0).getReg(); 2590 Register SrcReg = I.getOperand(1).getReg(); 2591 const LLT DstTy = MRI.getType(DefReg); 2592 const LLT SrcTy = MRI.getType(SrcReg); 2593 unsigned DstSize = DstTy.getSizeInBits(); 2594 unsigned SrcSize = SrcTy.getSizeInBits(); 2595 2596 // SEXT_INREG has the same src reg size as dst, the size of the value to be 2597 // extended is encoded in the imm. 2598 if (Opcode == TargetOpcode::G_SEXT_INREG) 2599 SrcSize = I.getOperand(2).getImm(); 2600 2601 if (DstTy.isVector()) 2602 return false; // Should be handled by imported patterns. 2603 2604 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == 2605 AArch64::GPRRegBankID && 2606 "Unexpected ext regbank"); 2607 2608 MachineIRBuilder MIB(I); 2609 MachineInstr *ExtI; 2610 2611 // First check if we're extending the result of a load which has a dest type 2612 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest 2613 // GPR register on AArch64 and all loads which are smaller automatically 2614 // zero-extend the upper bits. E.g. 2615 // %v(s8) = G_LOAD %p, :: (load 1) 2616 // %v2(s32) = G_ZEXT %v(s8) 2617 if (!IsSigned) { 2618 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 2619 bool IsGPR = 2620 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; 2621 if (LoadMI && IsGPR) { 2622 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); 2623 unsigned BytesLoaded = MemOp->getSize(); 2624 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) 2625 return selectCopy(I, TII, MRI, TRI, RBI); 2626 } 2627 2628 // If we are zero extending from 32 bits to 64 bits, it's possible that 2629 // the instruction implicitly does the zero extend for us. In that case, 2630 // we can just emit a SUBREG_TO_REG. 2631 if (IsGPR && SrcSize == 32 && DstSize == 64) { 2632 // Unlike with the G_LOAD case, we don't want to look through copies 2633 // here. 2634 MachineInstr *Def = MRI.getVRegDef(SrcReg); 2635 if (Def && isDef32(*Def)) { 2636 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) 2637 .addImm(0) 2638 .addUse(SrcReg) 2639 .addImm(AArch64::sub_32); 2640 2641 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, 2642 MRI)) { 2643 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n"); 2644 return false; 2645 } 2646 2647 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 2648 MRI)) { 2649 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n"); 2650 return false; 2651 } 2652 2653 I.eraseFromParent(); 2654 return true; 2655 } 2656 } 2657 } 2658 2659 if (DstSize == 64) { 2660 if (Opcode != TargetOpcode::G_SEXT_INREG) { 2661 // FIXME: Can we avoid manually doing this? 2662 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 2663 MRI)) { 2664 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) 2665 << " operand\n"); 2666 return false; 2667 } 2668 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, 2669 {&AArch64::GPR64RegClass}, {}) 2670 .addImm(0) 2671 .addUse(SrcReg) 2672 .addImm(AArch64::sub_32) 2673 .getReg(0); 2674 } 2675 2676 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, 2677 {DefReg}, {SrcReg}) 2678 .addImm(0) 2679 .addImm(SrcSize - 1); 2680 } else if (DstSize <= 32) { 2681 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, 2682 {DefReg}, {SrcReg}) 2683 .addImm(0) 2684 .addImm(SrcSize - 1); 2685 } else { 2686 return false; 2687 } 2688 2689 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 2690 I.eraseFromParent(); 2691 return true; 2692 } 2693 2694 case TargetOpcode::G_SITOFP: 2695 case TargetOpcode::G_UITOFP: 2696 case TargetOpcode::G_FPTOSI: 2697 case TargetOpcode::G_FPTOUI: { 2698 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), 2699 SrcTy = MRI.getType(I.getOperand(1).getReg()); 2700 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); 2701 if (NewOpc == Opcode) 2702 return false; 2703 2704 I.setDesc(TII.get(NewOpc)); 2705 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2706 2707 return true; 2708 } 2709 2710 case TargetOpcode::G_FREEZE: 2711 return selectCopy(I, TII, MRI, TRI, RBI); 2712 2713 case TargetOpcode::G_INTTOPTR: 2714 // The importer is currently unable to import pointer types since they 2715 // didn't exist in SelectionDAG. 2716 return selectCopy(I, TII, MRI, TRI, RBI); 2717 2718 case TargetOpcode::G_BITCAST: 2719 // Imported SelectionDAG rules can handle every bitcast except those that 2720 // bitcast from a type to the same type. Ideally, these shouldn't occur 2721 // but we might not run an optimizer that deletes them. The other exception 2722 // is bitcasts involving pointer types, as SelectionDAG has no knowledge 2723 // of them. 2724 return selectCopy(I, TII, MRI, TRI, RBI); 2725 2726 case TargetOpcode::G_SELECT: { 2727 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { 2728 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty 2729 << ", expected: " << LLT::scalar(1) << '\n'); 2730 return false; 2731 } 2732 2733 const Register CondReg = I.getOperand(1).getReg(); 2734 const Register TReg = I.getOperand(2).getReg(); 2735 const Register FReg = I.getOperand(3).getReg(); 2736 2737 if (tryOptSelect(I)) 2738 return true; 2739 2740 Register CSelOpc = selectSelectOpc(I, MRI, RBI); 2741 MachineInstr &TstMI = 2742 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) 2743 .addDef(AArch64::WZR) 2744 .addUse(CondReg) 2745 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2746 2747 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc)) 2748 .addDef(I.getOperand(0).getReg()) 2749 .addUse(TReg) 2750 .addUse(FReg) 2751 .addImm(AArch64CC::NE); 2752 2753 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI); 2754 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI); 2755 2756 I.eraseFromParent(); 2757 return true; 2758 } 2759 case TargetOpcode::G_ICMP: { 2760 if (Ty.isVector()) 2761 return selectVectorICmp(I, MRI); 2762 2763 if (Ty != LLT::scalar(32)) { 2764 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty 2765 << ", expected: " << LLT::scalar(32) << '\n'); 2766 return false; 2767 } 2768 2769 MachineIRBuilder MIRBuilder(I); 2770 MachineInstr *Cmp; 2771 CmpInst::Predicate Pred; 2772 std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3), 2773 I.getOperand(1), MIRBuilder); 2774 if (!Cmp) 2775 return false; 2776 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder); 2777 I.eraseFromParent(); 2778 return true; 2779 } 2780 2781 case TargetOpcode::G_FCMP: { 2782 if (Ty != LLT::scalar(32)) { 2783 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty 2784 << ", expected: " << LLT::scalar(32) << '\n'); 2785 return false; 2786 } 2787 2788 unsigned CmpOpc = selectFCMPOpc(I, MRI); 2789 if (!CmpOpc) 2790 return false; 2791 2792 // FIXME: regbank 2793 2794 AArch64CC::CondCode CC1, CC2; 2795 changeFCMPPredToAArch64CC( 2796 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2); 2797 2798 // Partially build the compare. Decide if we need to add a use for the 2799 // third operand based off whether or not we're comparing against 0.0. 2800 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) 2801 .addUse(I.getOperand(2).getReg()); 2802 2803 // If we don't have an immediate compare, then we need to add a use of the 2804 // register which wasn't used for the immediate. 2805 // Note that the immediate will always be the last operand. 2806 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) 2807 CmpMI = CmpMI.addUse(I.getOperand(3).getReg()); 2808 2809 const Register DefReg = I.getOperand(0).getReg(); 2810 Register Def1Reg = DefReg; 2811 if (CC2 != AArch64CC::AL) 2812 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2813 2814 MachineInstr &CSetMI = 2815 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) 2816 .addDef(Def1Reg) 2817 .addUse(AArch64::WZR) 2818 .addUse(AArch64::WZR) 2819 .addImm(getInvertedCondCode(CC1)); 2820 2821 if (CC2 != AArch64CC::AL) { 2822 Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2823 MachineInstr &CSet2MI = 2824 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) 2825 .addDef(Def2Reg) 2826 .addUse(AArch64::WZR) 2827 .addUse(AArch64::WZR) 2828 .addImm(getInvertedCondCode(CC2)); 2829 MachineInstr &OrMI = 2830 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr)) 2831 .addDef(DefReg) 2832 .addUse(Def1Reg) 2833 .addUse(Def2Reg); 2834 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI); 2835 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI); 2836 } 2837 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 2838 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI); 2839 2840 I.eraseFromParent(); 2841 return true; 2842 } 2843 case TargetOpcode::G_VASTART: 2844 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) 2845 : selectVaStartAAPCS(I, MF, MRI); 2846 case TargetOpcode::G_INTRINSIC: 2847 return selectIntrinsic(I, MRI); 2848 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 2849 return selectIntrinsicWithSideEffects(I, MRI); 2850 case TargetOpcode::G_IMPLICIT_DEF: { 2851 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 2852 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2853 const Register DstReg = I.getOperand(0).getReg(); 2854 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2855 const TargetRegisterClass *DstRC = 2856 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 2857 RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 2858 return true; 2859 } 2860 case TargetOpcode::G_BLOCK_ADDR: { 2861 if (TM.getCodeModel() == CodeModel::Large) { 2862 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); 2863 I.eraseFromParent(); 2864 return true; 2865 } else { 2866 I.setDesc(TII.get(AArch64::MOVaddrBA)); 2867 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), 2868 I.getOperand(0).getReg()) 2869 .addBlockAddress(I.getOperand(1).getBlockAddress(), 2870 /* Offset */ 0, AArch64II::MO_PAGE) 2871 .addBlockAddress( 2872 I.getOperand(1).getBlockAddress(), /* Offset */ 0, 2873 AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 2874 I.eraseFromParent(); 2875 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 2876 } 2877 } 2878 case TargetOpcode::G_INTRINSIC_TRUNC: 2879 return selectIntrinsicTrunc(I, MRI); 2880 case TargetOpcode::G_INTRINSIC_ROUND: 2881 return selectIntrinsicRound(I, MRI); 2882 case TargetOpcode::G_BUILD_VECTOR: 2883 return selectBuildVector(I, MRI); 2884 case TargetOpcode::G_MERGE_VALUES: 2885 return selectMergeValues(I, MRI); 2886 case TargetOpcode::G_UNMERGE_VALUES: 2887 return selectUnmergeValues(I, MRI); 2888 case TargetOpcode::G_SHUFFLE_VECTOR: 2889 return selectShuffleVector(I, MRI); 2890 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 2891 return selectExtractElt(I, MRI); 2892 case TargetOpcode::G_INSERT_VECTOR_ELT: 2893 return selectInsertElt(I, MRI); 2894 case TargetOpcode::G_CONCAT_VECTORS: 2895 return selectConcatVectors(I, MRI); 2896 case TargetOpcode::G_JUMP_TABLE: 2897 return selectJumpTable(I, MRI); 2898 } 2899 2900 return false; 2901 } 2902 2903 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, 2904 MachineRegisterInfo &MRI) const { 2905 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); 2906 Register JTAddr = I.getOperand(0).getReg(); 2907 unsigned JTI = I.getOperand(1).getIndex(); 2908 Register Index = I.getOperand(2).getReg(); 2909 MachineIRBuilder MIB(I); 2910 2911 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 2912 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 2913 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, 2914 {TargetReg, ScratchReg}, {JTAddr, Index}) 2915 .addJumpTableIndex(JTI); 2916 // Build the indirect branch. 2917 MIB.buildInstr(AArch64::BR, {}, {TargetReg}); 2918 I.eraseFromParent(); 2919 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); 2920 } 2921 2922 bool AArch64InstructionSelector::selectJumpTable( 2923 MachineInstr &I, MachineRegisterInfo &MRI) const { 2924 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"); 2925 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!"); 2926 2927 Register DstReg = I.getOperand(0).getReg(); 2928 unsigned JTI = I.getOperand(1).getIndex(); 2929 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. 2930 MachineIRBuilder MIB(I); 2931 auto MovMI = 2932 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) 2933 .addJumpTableIndex(JTI, AArch64II::MO_PAGE) 2934 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 2935 I.eraseFromParent(); 2936 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 2937 } 2938 2939 bool AArch64InstructionSelector::selectTLSGlobalValue( 2940 MachineInstr &I, MachineRegisterInfo &MRI) const { 2941 if (!STI.isTargetMachO()) 2942 return false; 2943 MachineFunction &MF = *I.getParent()->getParent(); 2944 MF.getFrameInfo().setAdjustsStack(true); 2945 2946 const GlobalValue &GV = *I.getOperand(1).getGlobal(); 2947 MachineIRBuilder MIB(I); 2948 2949 MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {}) 2950 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); 2951 2952 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, 2953 {Register(AArch64::X0)}) 2954 .addImm(0); 2955 2956 // TLS calls preserve all registers except those that absolutely must be 2957 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be 2958 // silly). 2959 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) 2960 .addDef(AArch64::X0, RegState::Implicit) 2961 .addRegMask(TRI.getTLSCallPreservedMask()); 2962 2963 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); 2964 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, 2965 MRI); 2966 I.eraseFromParent(); 2967 return true; 2968 } 2969 2970 bool AArch64InstructionSelector::selectIntrinsicTrunc( 2971 MachineInstr &I, MachineRegisterInfo &MRI) const { 2972 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 2973 2974 // Select the correct opcode. 2975 unsigned Opc = 0; 2976 if (!SrcTy.isVector()) { 2977 switch (SrcTy.getSizeInBits()) { 2978 default: 2979 case 16: 2980 Opc = AArch64::FRINTZHr; 2981 break; 2982 case 32: 2983 Opc = AArch64::FRINTZSr; 2984 break; 2985 case 64: 2986 Opc = AArch64::FRINTZDr; 2987 break; 2988 } 2989 } else { 2990 unsigned NumElts = SrcTy.getNumElements(); 2991 switch (SrcTy.getElementType().getSizeInBits()) { 2992 default: 2993 break; 2994 case 16: 2995 if (NumElts == 4) 2996 Opc = AArch64::FRINTZv4f16; 2997 else if (NumElts == 8) 2998 Opc = AArch64::FRINTZv8f16; 2999 break; 3000 case 32: 3001 if (NumElts == 2) 3002 Opc = AArch64::FRINTZv2f32; 3003 else if (NumElts == 4) 3004 Opc = AArch64::FRINTZv4f32; 3005 break; 3006 case 64: 3007 if (NumElts == 2) 3008 Opc = AArch64::FRINTZv2f64; 3009 break; 3010 } 3011 } 3012 3013 if (!Opc) { 3014 // Didn't get an opcode above, bail. 3015 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"); 3016 return false; 3017 } 3018 3019 // Legalization would have set us up perfectly for this; we just need to 3020 // set the opcode and move on. 3021 I.setDesc(TII.get(Opc)); 3022 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3023 } 3024 3025 bool AArch64InstructionSelector::selectIntrinsicRound( 3026 MachineInstr &I, MachineRegisterInfo &MRI) const { 3027 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 3028 3029 // Select the correct opcode. 3030 unsigned Opc = 0; 3031 if (!SrcTy.isVector()) { 3032 switch (SrcTy.getSizeInBits()) { 3033 default: 3034 case 16: 3035 Opc = AArch64::FRINTAHr; 3036 break; 3037 case 32: 3038 Opc = AArch64::FRINTASr; 3039 break; 3040 case 64: 3041 Opc = AArch64::FRINTADr; 3042 break; 3043 } 3044 } else { 3045 unsigned NumElts = SrcTy.getNumElements(); 3046 switch (SrcTy.getElementType().getSizeInBits()) { 3047 default: 3048 break; 3049 case 16: 3050 if (NumElts == 4) 3051 Opc = AArch64::FRINTAv4f16; 3052 else if (NumElts == 8) 3053 Opc = AArch64::FRINTAv8f16; 3054 break; 3055 case 32: 3056 if (NumElts == 2) 3057 Opc = AArch64::FRINTAv2f32; 3058 else if (NumElts == 4) 3059 Opc = AArch64::FRINTAv4f32; 3060 break; 3061 case 64: 3062 if (NumElts == 2) 3063 Opc = AArch64::FRINTAv2f64; 3064 break; 3065 } 3066 } 3067 3068 if (!Opc) { 3069 // Didn't get an opcode above, bail. 3070 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"); 3071 return false; 3072 } 3073 3074 // Legalization would have set us up perfectly for this; we just need to 3075 // set the opcode and move on. 3076 I.setDesc(TII.get(Opc)); 3077 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3078 } 3079 3080 bool AArch64InstructionSelector::selectVectorICmp( 3081 MachineInstr &I, MachineRegisterInfo &MRI) const { 3082 Register DstReg = I.getOperand(0).getReg(); 3083 LLT DstTy = MRI.getType(DstReg); 3084 Register SrcReg = I.getOperand(2).getReg(); 3085 Register Src2Reg = I.getOperand(3).getReg(); 3086 LLT SrcTy = MRI.getType(SrcReg); 3087 3088 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); 3089 unsigned NumElts = DstTy.getNumElements(); 3090 3091 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b 3092 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 3093 // Third index is cc opcode: 3094 // 0 == eq 3095 // 1 == ugt 3096 // 2 == uge 3097 // 3 == ult 3098 // 4 == ule 3099 // 5 == sgt 3100 // 6 == sge 3101 // 7 == slt 3102 // 8 == sle 3103 // ne is done by negating 'eq' result. 3104 3105 // This table below assumes that for some comparisons the operands will be 3106 // commuted. 3107 // ult op == commute + ugt op 3108 // ule op == commute + uge op 3109 // slt op == commute + sgt op 3110 // sle op == commute + sge op 3111 unsigned PredIdx = 0; 3112 bool SwapOperands = false; 3113 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 3114 switch (Pred) { 3115 case CmpInst::ICMP_NE: 3116 case CmpInst::ICMP_EQ: 3117 PredIdx = 0; 3118 break; 3119 case CmpInst::ICMP_UGT: 3120 PredIdx = 1; 3121 break; 3122 case CmpInst::ICMP_UGE: 3123 PredIdx = 2; 3124 break; 3125 case CmpInst::ICMP_ULT: 3126 PredIdx = 3; 3127 SwapOperands = true; 3128 break; 3129 case CmpInst::ICMP_ULE: 3130 PredIdx = 4; 3131 SwapOperands = true; 3132 break; 3133 case CmpInst::ICMP_SGT: 3134 PredIdx = 5; 3135 break; 3136 case CmpInst::ICMP_SGE: 3137 PredIdx = 6; 3138 break; 3139 case CmpInst::ICMP_SLT: 3140 PredIdx = 7; 3141 SwapOperands = true; 3142 break; 3143 case CmpInst::ICMP_SLE: 3144 PredIdx = 8; 3145 SwapOperands = true; 3146 break; 3147 default: 3148 llvm_unreachable("Unhandled icmp predicate"); 3149 return false; 3150 } 3151 3152 // This table obviously should be tablegen'd when we have our GISel native 3153 // tablegen selector. 3154 3155 static const unsigned OpcTable[4][4][9] = { 3156 { 3157 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3158 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3159 0 /* invalid */}, 3160 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3161 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3162 0 /* invalid */}, 3163 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, 3164 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, 3165 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, 3166 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, 3167 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, 3168 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} 3169 }, 3170 { 3171 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3172 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3173 0 /* invalid */}, 3174 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, 3175 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, 3176 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, 3177 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, 3178 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, 3179 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, 3180 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3181 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3182 0 /* invalid */} 3183 }, 3184 { 3185 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, 3186 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, 3187 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, 3188 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, 3189 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, 3190 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, 3191 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3192 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3193 0 /* invalid */}, 3194 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3195 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3196 0 /* invalid */} 3197 }, 3198 { 3199 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, 3200 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, 3201 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, 3202 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3203 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3204 0 /* invalid */}, 3205 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3206 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3207 0 /* invalid */}, 3208 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3209 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3210 0 /* invalid */} 3211 }, 3212 }; 3213 unsigned EltIdx = Log2_32(SrcEltSize / 8); 3214 unsigned NumEltsIdx = Log2_32(NumElts / 2); 3215 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; 3216 if (!Opc) { 3217 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode"); 3218 return false; 3219 } 3220 3221 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); 3222 const TargetRegisterClass *SrcRC = 3223 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); 3224 if (!SrcRC) { 3225 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 3226 return false; 3227 } 3228 3229 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; 3230 if (SrcTy.getSizeInBits() == 128) 3231 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; 3232 3233 if (SwapOperands) 3234 std::swap(SrcReg, Src2Reg); 3235 3236 MachineIRBuilder MIB(I); 3237 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); 3238 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3239 3240 // Invert if we had a 'ne' cc. 3241 if (NotOpc) { 3242 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); 3243 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3244 } else { 3245 MIB.buildCopy(DstReg, Cmp.getReg(0)); 3246 } 3247 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); 3248 I.eraseFromParent(); 3249 return true; 3250 } 3251 3252 MachineInstr *AArch64InstructionSelector::emitScalarToVector( 3253 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, 3254 MachineIRBuilder &MIRBuilder) const { 3255 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); 3256 3257 auto BuildFn = [&](unsigned SubregIndex) { 3258 auto Ins = 3259 MIRBuilder 3260 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) 3261 .addImm(SubregIndex); 3262 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); 3263 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); 3264 return &*Ins; 3265 }; 3266 3267 switch (EltSize) { 3268 case 16: 3269 return BuildFn(AArch64::hsub); 3270 case 32: 3271 return BuildFn(AArch64::ssub); 3272 case 64: 3273 return BuildFn(AArch64::dsub); 3274 default: 3275 return nullptr; 3276 } 3277 } 3278 3279 bool AArch64InstructionSelector::selectMergeValues( 3280 MachineInstr &I, MachineRegisterInfo &MRI) const { 3281 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"); 3282 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3283 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 3284 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); 3285 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 3286 3287 if (I.getNumOperands() != 3) 3288 return false; 3289 3290 // Merging 2 s64s into an s128. 3291 if (DstTy == LLT::scalar(128)) { 3292 if (SrcTy.getSizeInBits() != 64) 3293 return false; 3294 MachineIRBuilder MIB(I); 3295 Register DstReg = I.getOperand(0).getReg(); 3296 Register Src1Reg = I.getOperand(1).getReg(); 3297 Register Src2Reg = I.getOperand(2).getReg(); 3298 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); 3299 MachineInstr *InsMI = 3300 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); 3301 if (!InsMI) 3302 return false; 3303 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), 3304 Src2Reg, /* LaneIdx */ 1, RB, MIB); 3305 if (!Ins2MI) 3306 return false; 3307 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 3308 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); 3309 I.eraseFromParent(); 3310 return true; 3311 } 3312 3313 if (RB.getID() != AArch64::GPRRegBankID) 3314 return false; 3315 3316 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) 3317 return false; 3318 3319 auto *DstRC = &AArch64::GPR64RegClass; 3320 Register SubToRegDef = MRI.createVirtualRegister(DstRC); 3321 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 3322 TII.get(TargetOpcode::SUBREG_TO_REG)) 3323 .addDef(SubToRegDef) 3324 .addImm(0) 3325 .addUse(I.getOperand(1).getReg()) 3326 .addImm(AArch64::sub_32); 3327 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); 3328 // Need to anyext the second scalar before we can use bfm 3329 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 3330 TII.get(TargetOpcode::SUBREG_TO_REG)) 3331 .addDef(SubToRegDef2) 3332 .addImm(0) 3333 .addUse(I.getOperand(2).getReg()) 3334 .addImm(AArch64::sub_32); 3335 MachineInstr &BFM = 3336 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) 3337 .addDef(I.getOperand(0).getReg()) 3338 .addUse(SubToRegDef) 3339 .addUse(SubToRegDef2) 3340 .addImm(32) 3341 .addImm(31); 3342 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); 3343 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); 3344 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); 3345 I.eraseFromParent(); 3346 return true; 3347 } 3348 3349 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, 3350 const unsigned EltSize) { 3351 // Choose a lane copy opcode and subregister based off of the size of the 3352 // vector's elements. 3353 switch (EltSize) { 3354 case 16: 3355 CopyOpc = AArch64::CPYi16; 3356 ExtractSubReg = AArch64::hsub; 3357 break; 3358 case 32: 3359 CopyOpc = AArch64::CPYi32; 3360 ExtractSubReg = AArch64::ssub; 3361 break; 3362 case 64: 3363 CopyOpc = AArch64::CPYi64; 3364 ExtractSubReg = AArch64::dsub; 3365 break; 3366 default: 3367 // Unknown size, bail out. 3368 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n"); 3369 return false; 3370 } 3371 return true; 3372 } 3373 3374 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( 3375 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, 3376 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { 3377 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 3378 unsigned CopyOpc = 0; 3379 unsigned ExtractSubReg = 0; 3380 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { 3381 LLVM_DEBUG( 3382 dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); 3383 return nullptr; 3384 } 3385 3386 const TargetRegisterClass *DstRC = 3387 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); 3388 if (!DstRC) { 3389 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); 3390 return nullptr; 3391 } 3392 3393 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); 3394 const LLT &VecTy = MRI.getType(VecReg); 3395 const TargetRegisterClass *VecRC = 3396 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); 3397 if (!VecRC) { 3398 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 3399 return nullptr; 3400 } 3401 3402 // The register that we're going to copy into. 3403 Register InsertReg = VecReg; 3404 if (!DstReg) 3405 DstReg = MRI.createVirtualRegister(DstRC); 3406 // If the lane index is 0, we just use a subregister COPY. 3407 if (LaneIdx == 0) { 3408 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) 3409 .addReg(VecReg, 0, ExtractSubReg); 3410 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 3411 return &*Copy; 3412 } 3413 3414 // Lane copies require 128-bit wide registers. If we're dealing with an 3415 // unpacked vector, then we need to move up to that width. Insert an implicit 3416 // def and a subregister insert to get us there. 3417 if (VecTy.getSizeInBits() != 128) { 3418 MachineInstr *ScalarToVector = emitScalarToVector( 3419 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); 3420 if (!ScalarToVector) 3421 return nullptr; 3422 InsertReg = ScalarToVector->getOperand(0).getReg(); 3423 } 3424 3425 MachineInstr *LaneCopyMI = 3426 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); 3427 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); 3428 3429 // Make sure that we actually constrain the initial copy. 3430 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 3431 return LaneCopyMI; 3432 } 3433 3434 bool AArch64InstructionSelector::selectExtractElt( 3435 MachineInstr &I, MachineRegisterInfo &MRI) const { 3436 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && 3437 "unexpected opcode!"); 3438 Register DstReg = I.getOperand(0).getReg(); 3439 const LLT NarrowTy = MRI.getType(DstReg); 3440 const Register SrcReg = I.getOperand(1).getReg(); 3441 const LLT WideTy = MRI.getType(SrcReg); 3442 (void)WideTy; 3443 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && 3444 "source register size too small!"); 3445 assert(NarrowTy.isScalar() && "cannot extract vector into vector!"); 3446 3447 // Need the lane index to determine the correct copy opcode. 3448 MachineOperand &LaneIdxOp = I.getOperand(2); 3449 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?"); 3450 3451 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 3452 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n"); 3453 return false; 3454 } 3455 3456 // Find the index to extract from. 3457 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); 3458 if (!VRegAndVal) 3459 return false; 3460 unsigned LaneIdx = VRegAndVal->Value; 3461 3462 MachineIRBuilder MIRBuilder(I); 3463 3464 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 3465 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, 3466 LaneIdx, MIRBuilder); 3467 if (!Extract) 3468 return false; 3469 3470 I.eraseFromParent(); 3471 return true; 3472 } 3473 3474 bool AArch64InstructionSelector::selectSplitVectorUnmerge( 3475 MachineInstr &I, MachineRegisterInfo &MRI) const { 3476 unsigned NumElts = I.getNumOperands() - 1; 3477 Register SrcReg = I.getOperand(NumElts).getReg(); 3478 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3479 const LLT SrcTy = MRI.getType(SrcReg); 3480 3481 assert(NarrowTy.isVector() && "Expected an unmerge into vectors"); 3482 if (SrcTy.getSizeInBits() > 128) { 3483 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge"); 3484 return false; 3485 } 3486 3487 MachineIRBuilder MIB(I); 3488 3489 // We implement a split vector operation by treating the sub-vectors as 3490 // scalars and extracting them. 3491 const RegisterBank &DstRB = 3492 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); 3493 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { 3494 Register Dst = I.getOperand(OpIdx).getReg(); 3495 MachineInstr *Extract = 3496 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); 3497 if (!Extract) 3498 return false; 3499 } 3500 I.eraseFromParent(); 3501 return true; 3502 } 3503 3504 bool AArch64InstructionSelector::selectUnmergeValues( 3505 MachineInstr &I, MachineRegisterInfo &MRI) const { 3506 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 3507 "unexpected opcode"); 3508 3509 // TODO: Handle unmerging into GPRs and from scalars to scalars. 3510 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 3511 AArch64::FPRRegBankID || 3512 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 3513 AArch64::FPRRegBankID) { 3514 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " 3515 "currently unsupported.\n"); 3516 return false; 3517 } 3518 3519 // The last operand is the vector source register, and every other operand is 3520 // a register to unpack into. 3521 unsigned NumElts = I.getNumOperands() - 1; 3522 Register SrcReg = I.getOperand(NumElts).getReg(); 3523 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3524 const LLT WideTy = MRI.getType(SrcReg); 3525 (void)WideTy; 3526 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && 3527 "can only unmerge from vector or s128 types!"); 3528 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && 3529 "source register size too small!"); 3530 3531 if (!NarrowTy.isScalar()) 3532 return selectSplitVectorUnmerge(I, MRI); 3533 3534 MachineIRBuilder MIB(I); 3535 3536 // Choose a lane copy opcode and subregister based off of the size of the 3537 // vector's elements. 3538 unsigned CopyOpc = 0; 3539 unsigned ExtractSubReg = 0; 3540 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) 3541 return false; 3542 3543 // Set up for the lane copies. 3544 MachineBasicBlock &MBB = *I.getParent(); 3545 3546 // Stores the registers we'll be copying from. 3547 SmallVector<Register, 4> InsertRegs; 3548 3549 // We'll use the first register twice, so we only need NumElts-1 registers. 3550 unsigned NumInsertRegs = NumElts - 1; 3551 3552 // If our elements fit into exactly 128 bits, then we can copy from the source 3553 // directly. Otherwise, we need to do a bit of setup with some subregister 3554 // inserts. 3555 if (NarrowTy.getSizeInBits() * NumElts == 128) { 3556 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); 3557 } else { 3558 // No. We have to perform subregister inserts. For each insert, create an 3559 // implicit def and a subregister insert, and save the register we create. 3560 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { 3561 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 3562 MachineInstr &ImpDefMI = 3563 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), 3564 ImpDefReg); 3565 3566 // Now, create the subregister insert from SrcReg. 3567 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 3568 MachineInstr &InsMI = 3569 *BuildMI(MBB, I, I.getDebugLoc(), 3570 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) 3571 .addUse(ImpDefReg) 3572 .addUse(SrcReg) 3573 .addImm(AArch64::dsub); 3574 3575 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); 3576 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); 3577 3578 // Save the register so that we can copy from it after. 3579 InsertRegs.push_back(InsertReg); 3580 } 3581 } 3582 3583 // Now that we've created any necessary subregister inserts, we can 3584 // create the copies. 3585 // 3586 // Perform the first copy separately as a subregister copy. 3587 Register CopyTo = I.getOperand(0).getReg(); 3588 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) 3589 .addReg(InsertRegs[0], 0, ExtractSubReg); 3590 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); 3591 3592 // Now, perform the remaining copies as vector lane copies. 3593 unsigned LaneIdx = 1; 3594 for (Register InsReg : InsertRegs) { 3595 Register CopyTo = I.getOperand(LaneIdx).getReg(); 3596 MachineInstr &CopyInst = 3597 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) 3598 .addUse(InsReg) 3599 .addImm(LaneIdx); 3600 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); 3601 ++LaneIdx; 3602 } 3603 3604 // Separately constrain the first copy's destination. Because of the 3605 // limitation in constrainOperandRegClass, we can't guarantee that this will 3606 // actually be constrained. So, do it ourselves using the second operand. 3607 const TargetRegisterClass *RC = 3608 MRI.getRegClassOrNull(I.getOperand(1).getReg()); 3609 if (!RC) { 3610 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n"); 3611 return false; 3612 } 3613 3614 RBI.constrainGenericRegister(CopyTo, *RC, MRI); 3615 I.eraseFromParent(); 3616 return true; 3617 } 3618 3619 bool AArch64InstructionSelector::selectConcatVectors( 3620 MachineInstr &I, MachineRegisterInfo &MRI) const { 3621 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 3622 "Unexpected opcode"); 3623 Register Dst = I.getOperand(0).getReg(); 3624 Register Op1 = I.getOperand(1).getReg(); 3625 Register Op2 = I.getOperand(2).getReg(); 3626 MachineIRBuilder MIRBuilder(I); 3627 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder); 3628 if (!ConcatMI) 3629 return false; 3630 I.eraseFromParent(); 3631 return true; 3632 } 3633 3634 unsigned 3635 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, 3636 MachineFunction &MF) const { 3637 Type *CPTy = CPVal->getType(); 3638 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy); 3639 3640 MachineConstantPool *MCP = MF.getConstantPool(); 3641 return MCP->getConstantPoolIndex(CPVal, Alignment); 3642 } 3643 3644 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( 3645 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { 3646 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF()); 3647 3648 auto Adrp = 3649 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) 3650 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 3651 3652 MachineInstr *LoadMI = nullptr; 3653 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) { 3654 case 16: 3655 LoadMI = 3656 &*MIRBuilder 3657 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp}) 3658 .addConstantPoolIndex(CPIdx, 0, 3659 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3660 break; 3661 case 8: 3662 LoadMI = &*MIRBuilder 3663 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) 3664 .addConstantPoolIndex( 3665 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3666 break; 3667 default: 3668 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " 3669 << *CPVal->getType()); 3670 return nullptr; 3671 } 3672 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); 3673 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); 3674 return LoadMI; 3675 } 3676 3677 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given 3678 /// size and RB. 3679 static std::pair<unsigned, unsigned> 3680 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { 3681 unsigned Opc, SubregIdx; 3682 if (RB.getID() == AArch64::GPRRegBankID) { 3683 if (EltSize == 32) { 3684 Opc = AArch64::INSvi32gpr; 3685 SubregIdx = AArch64::ssub; 3686 } else if (EltSize == 64) { 3687 Opc = AArch64::INSvi64gpr; 3688 SubregIdx = AArch64::dsub; 3689 } else { 3690 llvm_unreachable("invalid elt size!"); 3691 } 3692 } else { 3693 if (EltSize == 8) { 3694 Opc = AArch64::INSvi8lane; 3695 SubregIdx = AArch64::bsub; 3696 } else if (EltSize == 16) { 3697 Opc = AArch64::INSvi16lane; 3698 SubregIdx = AArch64::hsub; 3699 } else if (EltSize == 32) { 3700 Opc = AArch64::INSvi32lane; 3701 SubregIdx = AArch64::ssub; 3702 } else if (EltSize == 64) { 3703 Opc = AArch64::INSvi64lane; 3704 SubregIdx = AArch64::dsub; 3705 } else { 3706 llvm_unreachable("invalid elt size!"); 3707 } 3708 } 3709 return std::make_pair(Opc, SubregIdx); 3710 } 3711 3712 MachineInstr * 3713 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, 3714 MachineOperand &RHS, 3715 MachineIRBuilder &MIRBuilder) const { 3716 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3717 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3718 static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri}, 3719 {AArch64::ADDWrr, AArch64::ADDWri}}; 3720 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32; 3721 auto ImmFns = selectArithImmed(RHS); 3722 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; 3723 auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS}); 3724 3725 // If we matched a valid constant immediate, add those operands. 3726 if (ImmFns) { 3727 for (auto &RenderFn : *ImmFns) 3728 RenderFn(AddMI); 3729 } else { 3730 AddMI.addUse(RHS.getReg()); 3731 } 3732 3733 constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI); 3734 return &*AddMI; 3735 } 3736 3737 MachineInstr * 3738 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, 3739 MachineIRBuilder &MIRBuilder) const { 3740 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3741 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3742 static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri}, 3743 {AArch64::ADDSWrr, AArch64::ADDSWri}}; 3744 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); 3745 auto ImmFns = selectArithImmed(RHS); 3746 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; 3747 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 3748 3749 auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS}); 3750 3751 // If we matched a valid constant immediate, add those operands. 3752 if (ImmFns) { 3753 for (auto &RenderFn : *ImmFns) 3754 RenderFn(CmpMI); 3755 } else { 3756 CmpMI.addUse(RHS.getReg()); 3757 } 3758 3759 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 3760 return &*CmpMI; 3761 } 3762 3763 MachineInstr * 3764 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS, 3765 MachineIRBuilder &MIRBuilder) const { 3766 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3767 unsigned RegSize = MRI.getType(LHS).getSizeInBits(); 3768 bool Is32Bit = (RegSize == 32); 3769 static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri}, 3770 {AArch64::ANDSWrr, AArch64::ANDSWri}}; 3771 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 3772 3773 // We might be able to fold in an immediate into the TST. We need to make sure 3774 // it's a logical immediate though, since ANDS requires that. 3775 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); 3776 bool IsImmForm = ValAndVReg.hasValue() && 3777 AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize); 3778 unsigned Opc = OpcTable[Is32Bit][IsImmForm]; 3779 auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS}); 3780 3781 if (IsImmForm) 3782 TstMI.addImm( 3783 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize)); 3784 else 3785 TstMI.addUse(RHS); 3786 3787 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 3788 return &*TstMI; 3789 } 3790 3791 std::pair<MachineInstr *, CmpInst::Predicate> 3792 AArch64InstructionSelector::emitIntegerCompare( 3793 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 3794 MachineIRBuilder &MIRBuilder) const { 3795 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3796 assert(Predicate.isPredicate() && "Expected predicate?"); 3797 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3798 3799 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); 3800 3801 // Fold the compare if possible. 3802 MachineInstr *FoldCmp = 3803 tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder); 3804 if (FoldCmp) 3805 return {FoldCmp, P}; 3806 3807 // Can't fold into a CMN. Just emit a normal compare. 3808 unsigned CmpOpc = 0; 3809 Register ZReg; 3810 3811 LLT CmpTy = MRI.getType(LHS.getReg()); 3812 assert((CmpTy.isScalar() || CmpTy.isPointer()) && 3813 "Expected scalar or pointer"); 3814 if (CmpTy == LLT::scalar(32)) { 3815 CmpOpc = AArch64::SUBSWrr; 3816 ZReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 3817 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) { 3818 CmpOpc = AArch64::SUBSXrr; 3819 ZReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3820 } else { 3821 return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE}; 3822 } 3823 3824 // Try to match immediate forms. 3825 MachineInstr *ImmedCmp = 3826 tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder); 3827 if (ImmedCmp) 3828 return {ImmedCmp, P}; 3829 3830 // If we don't have an immediate, we may have a shift which can be folded 3831 // into the compare. 3832 MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder); 3833 if (ShiftedCmp) 3834 return {ShiftedCmp, P}; 3835 3836 auto CmpMI = 3837 MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()}); 3838 // Make sure that we can constrain the compare that we emitted. 3839 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 3840 return {&*CmpMI, P}; 3841 } 3842 3843 MachineInstr *AArch64InstructionSelector::emitVectorConcat( 3844 Optional<Register> Dst, Register Op1, Register Op2, 3845 MachineIRBuilder &MIRBuilder) const { 3846 // We implement a vector concat by: 3847 // 1. Use scalar_to_vector to insert the lower vector into the larger dest 3848 // 2. Insert the upper vector into the destination's upper element 3849 // TODO: some of this code is common with G_BUILD_VECTOR handling. 3850 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3851 3852 const LLT Op1Ty = MRI.getType(Op1); 3853 const LLT Op2Ty = MRI.getType(Op2); 3854 3855 if (Op1Ty != Op2Ty) { 3856 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys"); 3857 return nullptr; 3858 } 3859 assert(Op1Ty.isVector() && "Expected a vector for vector concat"); 3860 3861 if (Op1Ty.getSizeInBits() >= 128) { 3862 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors"); 3863 return nullptr; 3864 } 3865 3866 // At the moment we just support 64 bit vector concats. 3867 if (Op1Ty.getSizeInBits() != 64) { 3868 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors"); 3869 return nullptr; 3870 } 3871 3872 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); 3873 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); 3874 const TargetRegisterClass *DstRC = 3875 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); 3876 3877 MachineInstr *WidenedOp1 = 3878 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); 3879 MachineInstr *WidenedOp2 = 3880 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); 3881 if (!WidenedOp1 || !WidenedOp2) { 3882 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value"); 3883 return nullptr; 3884 } 3885 3886 // Now do the insert of the upper element. 3887 unsigned InsertOpc, InsSubRegIdx; 3888 std::tie(InsertOpc, InsSubRegIdx) = 3889 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); 3890 3891 if (!Dst) 3892 Dst = MRI.createVirtualRegister(DstRC); 3893 auto InsElt = 3894 MIRBuilder 3895 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) 3896 .addImm(1) /* Lane index */ 3897 .addUse(WidenedOp2->getOperand(0).getReg()) 3898 .addImm(0); 3899 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 3900 return &*InsElt; 3901 } 3902 3903 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant( 3904 MachineInstr &I, MachineRegisterInfo &MRI) const { 3905 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT && 3906 "Expected a G_FCONSTANT!"); 3907 MachineOperand &ImmOp = I.getOperand(1); 3908 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); 3909 3910 // Only handle 32 and 64 bit defs for now. 3911 if (DefSize != 32 && DefSize != 64) 3912 return nullptr; 3913 3914 // Don't handle null values using FMOV. 3915 if (ImmOp.getFPImm()->isNullValue()) 3916 return nullptr; 3917 3918 // Get the immediate representation for the FMOV. 3919 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF(); 3920 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF) 3921 : AArch64_AM::getFP64Imm(ImmValAPF); 3922 3923 // If this is -1, it means the immediate can't be represented as the requested 3924 // floating point value. Bail. 3925 if (Imm == -1) 3926 return nullptr; 3927 3928 // Update MI to represent the new FMOV instruction, constrain it, and return. 3929 ImmOp.ChangeToImmediate(Imm); 3930 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi; 3931 I.setDesc(TII.get(MovOpc)); 3932 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3933 return &I; 3934 } 3935 3936 MachineInstr * 3937 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, 3938 MachineIRBuilder &MIRBuilder) const { 3939 // CSINC increments the result when the predicate is false. Invert it. 3940 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( 3941 CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); 3942 auto I = 3943 MIRBuilder 3944 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)}) 3945 .addImm(InvCC); 3946 constrainSelectedInstRegOperands(*I, TII, TRI, RBI); 3947 return &*I; 3948 } 3949 3950 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { 3951 MachineIRBuilder MIB(I); 3952 MachineRegisterInfo &MRI = *MIB.getMRI(); 3953 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 3954 3955 // We want to recognize this pattern: 3956 // 3957 // $z = G_FCMP pred, $x, $y 3958 // ... 3959 // $w = G_SELECT $z, $a, $b 3960 // 3961 // Where the value of $z is *only* ever used by the G_SELECT (possibly with 3962 // some copies/truncs in between.) 3963 // 3964 // If we see this, then we can emit something like this: 3965 // 3966 // fcmp $x, $y 3967 // fcsel $w, $a, $b, pred 3968 // 3969 // Rather than emitting both of the rather long sequences in the standard 3970 // G_FCMP/G_SELECT select methods. 3971 3972 // First, check if the condition is defined by a compare. 3973 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); 3974 while (CondDef) { 3975 // We can only fold if all of the defs have one use. 3976 Register CondDefReg = CondDef->getOperand(0).getReg(); 3977 if (!MRI.hasOneNonDBGUse(CondDefReg)) { 3978 // Unless it's another select. 3979 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { 3980 if (CondDef == &UI) 3981 continue; 3982 if (UI.getOpcode() != TargetOpcode::G_SELECT) 3983 return false; 3984 } 3985 } 3986 3987 // We can skip over G_TRUNC since the condition is 1-bit. 3988 // Truncating/extending can have no impact on the value. 3989 unsigned Opc = CondDef->getOpcode(); 3990 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) 3991 break; 3992 3993 // Can't see past copies from physregs. 3994 if (Opc == TargetOpcode::COPY && 3995 Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) 3996 return false; 3997 3998 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); 3999 } 4000 4001 // Is the condition defined by a compare? 4002 if (!CondDef) 4003 return false; 4004 4005 unsigned CondOpc = CondDef->getOpcode(); 4006 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) 4007 return false; 4008 4009 AArch64CC::CondCode CondCode; 4010 if (CondOpc == TargetOpcode::G_ICMP) { 4011 MachineInstr *Cmp; 4012 CmpInst::Predicate Pred; 4013 4014 std::tie(Cmp, Pred) = 4015 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), 4016 CondDef->getOperand(1), MIB); 4017 4018 if (!Cmp) { 4019 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); 4020 return false; 4021 } 4022 4023 // Have to collect the CondCode after emitIntegerCompare, since it can 4024 // update the predicate. 4025 CondCode = changeICMPPredToAArch64CC(Pred); 4026 } else { 4027 // Get the condition code for the select. 4028 AArch64CC::CondCode CondCode2; 4029 changeFCMPPredToAArch64CC( 4030 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode, 4031 CondCode2); 4032 4033 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two 4034 // instructions to emit the comparison. 4035 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be 4036 // unnecessary. 4037 if (CondCode2 != AArch64CC::AL) 4038 return false; 4039 4040 // Make sure we'll be able to select the compare. 4041 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI); 4042 if (!CmpOpc) 4043 return false; 4044 4045 // Emit a new compare. 4046 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()}); 4047 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) 4048 Cmp.addUse(CondDef->getOperand(3).getReg()); 4049 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 4050 } 4051 4052 // Emit the select. 4053 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI); 4054 auto CSel = 4055 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()}, 4056 {I.getOperand(2).getReg(), I.getOperand(3).getReg()}) 4057 .addImm(CondCode); 4058 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI); 4059 I.eraseFromParent(); 4060 return true; 4061 } 4062 4063 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( 4064 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 4065 MachineIRBuilder &MIRBuilder) const { 4066 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && 4067 "Unexpected MachineOperand"); 4068 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4069 // We want to find this sort of thing: 4070 // x = G_SUB 0, y 4071 // G_ICMP z, x 4072 // 4073 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. 4074 // e.g: 4075 // 4076 // cmn z, y 4077 4078 // Helper lambda to detect the subtract followed by the compare. 4079 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0. 4080 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) { 4081 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB) 4082 return false; 4083 4084 // Need to make sure NZCV is the same at the end of the transformation. 4085 if (CC != AArch64CC::EQ && CC != AArch64CC::NE) 4086 return false; 4087 4088 // We want to match against SUBs. 4089 if (DefMI->getOpcode() != TargetOpcode::G_SUB) 4090 return false; 4091 4092 // Make sure that we're getting 4093 // x = G_SUB 0, y 4094 auto ValAndVReg = 4095 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI); 4096 if (!ValAndVReg || ValAndVReg->Value != 0) 4097 return false; 4098 4099 // This can safely be represented as a CMN. 4100 return true; 4101 }; 4102 4103 // Check if the RHS or LHS of the G_ICMP is defined by a SUB 4104 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); 4105 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); 4106 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); 4107 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P); 4108 4109 // Given this: 4110 // 4111 // x = G_SUB 0, y 4112 // G_ICMP x, z 4113 // 4114 // Produce this: 4115 // 4116 // cmn y, z 4117 if (IsCMN(LHSDef, CC)) 4118 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); 4119 4120 // Same idea here, but with the RHS of the compare instead: 4121 // 4122 // Given this: 4123 // 4124 // x = G_SUB 0, y 4125 // G_ICMP z, x 4126 // 4127 // Produce this: 4128 // 4129 // cmn z, y 4130 if (IsCMN(RHSDef, CC)) 4131 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); 4132 4133 // Given this: 4134 // 4135 // z = G_AND x, y 4136 // G_ICMP z, 0 4137 // 4138 // Produce this if the compare is signed: 4139 // 4140 // tst x, y 4141 if (!isUnsignedICMPPred(P) && LHSDef && 4142 LHSDef->getOpcode() == TargetOpcode::G_AND) { 4143 // Make sure that the RHS is 0. 4144 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI); 4145 if (!ValAndVReg || ValAndVReg->Value != 0) 4146 return nullptr; 4147 4148 return emitTST(LHSDef->getOperand(1).getReg(), 4149 LHSDef->getOperand(2).getReg(), MIRBuilder); 4150 } 4151 4152 return nullptr; 4153 } 4154 4155 MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( 4156 MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P, 4157 MachineIRBuilder &MIB) const { 4158 // Attempt to select the immediate form of an integer compare. 4159 MachineRegisterInfo &MRI = *MIB.getMRI(); 4160 auto Ty = MRI.getType(LHS.getReg()); 4161 assert(!Ty.isVector() && "Expected scalar or pointer only?"); 4162 unsigned Size = Ty.getSizeInBits(); 4163 assert((Size == 32 || Size == 64) && 4164 "Expected 32 bit or 64 bit compare only?"); 4165 4166 // Check if this is a case we can already handle. 4167 InstructionSelector::ComplexRendererFns ImmFns; 4168 ImmFns = selectArithImmed(RHS); 4169 4170 if (!ImmFns) { 4171 // We didn't get a rendering function, but we may still have a constant. 4172 auto MaybeImmed = getImmedFromMO(RHS); 4173 if (!MaybeImmed) 4174 return nullptr; 4175 4176 // We have a constant, but it doesn't fit. Try adjusting it by one and 4177 // updating the predicate if possible. 4178 uint64_t C = *MaybeImmed; 4179 CmpInst::Predicate NewP; 4180 switch (P) { 4181 default: 4182 return nullptr; 4183 case CmpInst::ICMP_SLT: 4184 case CmpInst::ICMP_SGE: 4185 // Check for 4186 // 4187 // x slt c => x sle c - 1 4188 // x sge c => x sgt c - 1 4189 // 4190 // When c is not the smallest possible negative number. 4191 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) || 4192 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN)) 4193 return nullptr; 4194 NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; 4195 C -= 1; 4196 break; 4197 case CmpInst::ICMP_ULT: 4198 case CmpInst::ICMP_UGE: 4199 // Check for 4200 // 4201 // x ult c => x ule c - 1 4202 // x uge c => x ugt c - 1 4203 // 4204 // When c is not zero. 4205 if (C == 0) 4206 return nullptr; 4207 NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; 4208 C -= 1; 4209 break; 4210 case CmpInst::ICMP_SLE: 4211 case CmpInst::ICMP_SGT: 4212 // Check for 4213 // 4214 // x sle c => x slt c + 1 4215 // x sgt c => s sge c + 1 4216 // 4217 // When c is not the largest possible signed integer. 4218 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) || 4219 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX)) 4220 return nullptr; 4221 NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; 4222 C += 1; 4223 break; 4224 case CmpInst::ICMP_ULE: 4225 case CmpInst::ICMP_UGT: 4226 // Check for 4227 // 4228 // x ule c => x ult c + 1 4229 // x ugt c => s uge c + 1 4230 // 4231 // When c is not the largest possible unsigned integer. 4232 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) || 4233 (Size == 64 && C == UINT64_MAX)) 4234 return nullptr; 4235 NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; 4236 C += 1; 4237 break; 4238 } 4239 4240 // Check if the new constant is valid. 4241 if (Size == 32) 4242 C = static_cast<uint32_t>(C); 4243 ImmFns = select12BitValueWithLeftShift(C); 4244 if (!ImmFns) 4245 return nullptr; 4246 P = NewP; 4247 } 4248 4249 // At this point, we know we can select an immediate form. Go ahead and do 4250 // that. 4251 Register ZReg; 4252 unsigned Opc; 4253 if (Size == 32) { 4254 ZReg = AArch64::WZR; 4255 Opc = AArch64::SUBSWri; 4256 } else { 4257 ZReg = AArch64::XZR; 4258 Opc = AArch64::SUBSXri; 4259 } 4260 4261 auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()}); 4262 for (auto &RenderFn : *ImmFns) 4263 RenderFn(CmpMI); 4264 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 4265 return &*CmpMI; 4266 } 4267 4268 MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare( 4269 MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const { 4270 // We are looking for the following pattern: 4271 // 4272 // shift = G_SHL/ASHR/LHSR y, c 4273 // ... 4274 // cmp = G_ICMP pred, something, shift 4275 // 4276 // Since we will select the G_ICMP to a SUBS, we can potentially fold the 4277 // shift into the subtract. 4278 static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs}; 4279 static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR}; 4280 auto ImmFns = selectShiftedRegister(RHS); 4281 if (!ImmFns) 4282 return nullptr; 4283 MachineRegisterInfo &MRI = *MIB.getMRI(); 4284 auto Ty = MRI.getType(LHS.getReg()); 4285 assert(!Ty.isVector() && "Expected scalar or pointer only?"); 4286 unsigned Size = Ty.getSizeInBits(); 4287 bool Idx = (Size == 64); 4288 Register ZReg = ZRegTable[Idx]; 4289 unsigned Opc = OpcTable[Idx]; 4290 auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()}); 4291 for (auto &RenderFn : *ImmFns) 4292 RenderFn(CmpMI); 4293 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 4294 return &*CmpMI; 4295 } 4296 4297 bool AArch64InstructionSelector::selectShuffleVector( 4298 MachineInstr &I, MachineRegisterInfo &MRI) const { 4299 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 4300 Register Src1Reg = I.getOperand(1).getReg(); 4301 const LLT Src1Ty = MRI.getType(Src1Reg); 4302 Register Src2Reg = I.getOperand(2).getReg(); 4303 const LLT Src2Ty = MRI.getType(Src2Reg); 4304 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); 4305 4306 MachineBasicBlock &MBB = *I.getParent(); 4307 MachineFunction &MF = *MBB.getParent(); 4308 LLVMContext &Ctx = MF.getFunction().getContext(); 4309 4310 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if 4311 // it's originated from a <1 x T> type. Those should have been lowered into 4312 // G_BUILD_VECTOR earlier. 4313 if (!Src1Ty.isVector() || !Src2Ty.isVector()) { 4314 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"); 4315 return false; 4316 } 4317 4318 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; 4319 4320 SmallVector<Constant *, 64> CstIdxs; 4321 for (int Val : Mask) { 4322 // For now, any undef indexes we'll just assume to be 0. This should be 4323 // optimized in future, e.g. to select DUP etc. 4324 Val = Val < 0 ? 0 : Val; 4325 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { 4326 unsigned Offset = Byte + Val * BytesPerElt; 4327 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); 4328 } 4329 } 4330 4331 MachineIRBuilder MIRBuilder(I); 4332 4333 // Use a constant pool to load the index vector for TBL. 4334 Constant *CPVal = ConstantVector::get(CstIdxs); 4335 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder); 4336 if (!IndexLoad) { 4337 LLVM_DEBUG(dbgs() << "Could not load from a constant pool"); 4338 return false; 4339 } 4340 4341 if (DstTy.getSizeInBits() != 128) { 4342 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); 4343 // This case can be done with TBL1. 4344 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder); 4345 if (!Concat) { 4346 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); 4347 return false; 4348 } 4349 4350 // The constant pool load will be 64 bits, so need to convert to FPR128 reg. 4351 IndexLoad = 4352 emitScalarToVector(64, &AArch64::FPR128RegClass, 4353 IndexLoad->getOperand(0).getReg(), MIRBuilder); 4354 4355 auto TBL1 = MIRBuilder.buildInstr( 4356 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, 4357 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); 4358 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); 4359 4360 auto Copy = 4361 MIRBuilder 4362 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 4363 .addReg(TBL1.getReg(0), 0, AArch64::dsub); 4364 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); 4365 I.eraseFromParent(); 4366 return true; 4367 } 4368 4369 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive 4370 // Q registers for regalloc. 4371 auto RegSeq = MIRBuilder 4372 .buildInstr(TargetOpcode::REG_SEQUENCE, 4373 {&AArch64::QQRegClass}, {Src1Reg}) 4374 .addImm(AArch64::qsub0) 4375 .addUse(Src2Reg) 4376 .addImm(AArch64::qsub1); 4377 4378 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, 4379 {RegSeq, IndexLoad->getOperand(0)}); 4380 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI); 4381 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); 4382 I.eraseFromParent(); 4383 return true; 4384 } 4385 4386 MachineInstr *AArch64InstructionSelector::emitLaneInsert( 4387 Optional<Register> DstReg, Register SrcReg, Register EltReg, 4388 unsigned LaneIdx, const RegisterBank &RB, 4389 MachineIRBuilder &MIRBuilder) const { 4390 MachineInstr *InsElt = nullptr; 4391 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 4392 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4393 4394 // Create a register to define with the insert if one wasn't passed in. 4395 if (!DstReg) 4396 DstReg = MRI.createVirtualRegister(DstRC); 4397 4398 unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); 4399 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; 4400 4401 if (RB.getID() == AArch64::FPRRegBankID) { 4402 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); 4403 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 4404 .addImm(LaneIdx) 4405 .addUse(InsSub->getOperand(0).getReg()) 4406 .addImm(0); 4407 } else { 4408 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 4409 .addImm(LaneIdx) 4410 .addUse(EltReg); 4411 } 4412 4413 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 4414 return InsElt; 4415 } 4416 4417 bool AArch64InstructionSelector::selectInsertElt( 4418 MachineInstr &I, MachineRegisterInfo &MRI) const { 4419 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); 4420 4421 // Get information on the destination. 4422 Register DstReg = I.getOperand(0).getReg(); 4423 const LLT DstTy = MRI.getType(DstReg); 4424 unsigned VecSize = DstTy.getSizeInBits(); 4425 4426 // Get information on the element we want to insert into the destination. 4427 Register EltReg = I.getOperand(2).getReg(); 4428 const LLT EltTy = MRI.getType(EltReg); 4429 unsigned EltSize = EltTy.getSizeInBits(); 4430 if (EltSize < 16 || EltSize > 64) 4431 return false; // Don't support all element types yet. 4432 4433 // Find the definition of the index. Bail out if it's not defined by a 4434 // G_CONSTANT. 4435 Register IdxReg = I.getOperand(3).getReg(); 4436 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI); 4437 if (!VRegAndVal) 4438 return false; 4439 unsigned LaneIdx = VRegAndVal->Value; 4440 4441 // Perform the lane insert. 4442 Register SrcReg = I.getOperand(1).getReg(); 4443 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); 4444 MachineIRBuilder MIRBuilder(I); 4445 4446 if (VecSize < 128) { 4447 // If the vector we're inserting into is smaller than 128 bits, widen it 4448 // to 128 to do the insert. 4449 MachineInstr *ScalarToVec = emitScalarToVector( 4450 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder); 4451 if (!ScalarToVec) 4452 return false; 4453 SrcReg = ScalarToVec->getOperand(0).getReg(); 4454 } 4455 4456 // Create an insert into a new FPR128 register. 4457 // Note that if our vector is already 128 bits, we end up emitting an extra 4458 // register. 4459 MachineInstr *InsMI = 4460 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder); 4461 4462 if (VecSize < 128) { 4463 // If we had to widen to perform the insert, then we have to demote back to 4464 // the original size to get the result we want. 4465 Register DemoteVec = InsMI->getOperand(0).getReg(); 4466 const TargetRegisterClass *RC = 4467 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); 4468 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 4469 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 4470 return false; 4471 } 4472 unsigned SubReg = 0; 4473 if (!getSubRegForClass(RC, TRI, SubReg)) 4474 return false; 4475 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 4476 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize 4477 << "\n"); 4478 return false; 4479 } 4480 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 4481 .addReg(DemoteVec, 0, SubReg); 4482 RBI.constrainGenericRegister(DstReg, *RC, MRI); 4483 } else { 4484 // No widening needed. 4485 InsMI->getOperand(0).setReg(DstReg); 4486 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 4487 } 4488 4489 I.eraseFromParent(); 4490 return true; 4491 } 4492 4493 bool AArch64InstructionSelector::tryOptConstantBuildVec( 4494 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const { 4495 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 4496 assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!"); 4497 if (DstTy.getSizeInBits() < 32) 4498 return false; 4499 // Check if we're building a constant vector, in which case we want to 4500 // generate a constant pool load instead of a vector insert sequence. 4501 SmallVector<Constant *, 16> Csts; 4502 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { 4503 // Try to find G_CONSTANT or G_FCONSTANT 4504 auto *OpMI = 4505 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI); 4506 if (OpMI) 4507 Csts.emplace_back( 4508 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm())); 4509 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT, 4510 I.getOperand(Idx).getReg(), MRI))) 4511 Csts.emplace_back( 4512 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm())); 4513 else 4514 return false; 4515 } 4516 Constant *CV = ConstantVector::get(Csts); 4517 MachineIRBuilder MIB(I); 4518 auto *CPLoad = emitLoadFromConstantPool(CV, MIB); 4519 if (!CPLoad) { 4520 LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector"); 4521 return false; 4522 } 4523 MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0)); 4524 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 4525 *MRI.getRegClass(CPLoad->getOperand(0).getReg()), 4526 MRI); 4527 I.eraseFromParent(); 4528 return true; 4529 } 4530 4531 bool AArch64InstructionSelector::selectBuildVector( 4532 MachineInstr &I, MachineRegisterInfo &MRI) const { 4533 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 4534 // Until we port more of the optimized selections, for now just use a vector 4535 // insert sequence. 4536 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 4537 const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); 4538 unsigned EltSize = EltTy.getSizeInBits(); 4539 4540 if (tryOptConstantBuildVec(I, DstTy, MRI)) 4541 return true; 4542 if (EltSize < 16 || EltSize > 64) 4543 return false; // Don't support all element types yet. 4544 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 4545 MachineIRBuilder MIRBuilder(I); 4546 4547 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 4548 MachineInstr *ScalarToVec = 4549 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, 4550 I.getOperand(1).getReg(), MIRBuilder); 4551 if (!ScalarToVec) 4552 return false; 4553 4554 Register DstVec = ScalarToVec->getOperand(0).getReg(); 4555 unsigned DstSize = DstTy.getSizeInBits(); 4556 4557 // Keep track of the last MI we inserted. Later on, we might be able to save 4558 // a copy using it. 4559 MachineInstr *PrevMI = nullptr; 4560 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { 4561 // Note that if we don't do a subregister copy, we can end up making an 4562 // extra register. 4563 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB, 4564 MIRBuilder); 4565 DstVec = PrevMI->getOperand(0).getReg(); 4566 } 4567 4568 // If DstTy's size in bits is less than 128, then emit a subregister copy 4569 // from DstVec to the last register we've defined. 4570 if (DstSize < 128) { 4571 // Force this to be FPR using the destination vector. 4572 const TargetRegisterClass *RC = 4573 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize); 4574 if (!RC) 4575 return false; 4576 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 4577 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 4578 return false; 4579 } 4580 4581 unsigned SubReg = 0; 4582 if (!getSubRegForClass(RC, TRI, SubReg)) 4583 return false; 4584 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 4585 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize 4586 << "\n"); 4587 return false; 4588 } 4589 4590 Register Reg = MRI.createVirtualRegister(RC); 4591 Register DstReg = I.getOperand(0).getReg(); 4592 4593 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 4594 .addReg(DstVec, 0, SubReg); 4595 MachineOperand &RegOp = I.getOperand(1); 4596 RegOp.setReg(Reg); 4597 RBI.constrainGenericRegister(DstReg, *RC, MRI); 4598 } else { 4599 // We don't need a subregister copy. Save a copy by re-using the 4600 // destination register on the final insert. 4601 assert(PrevMI && "PrevMI was null?"); 4602 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); 4603 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); 4604 } 4605 4606 I.eraseFromParent(); 4607 return true; 4608 } 4609 4610 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the 4611 /// ID if it exists, and 0 otherwise. 4612 static unsigned findIntrinsicID(MachineInstr &I) { 4613 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) { 4614 return Op.isIntrinsicID(); 4615 }); 4616 if (IntrinOp == I.operands_end()) 4617 return 0; 4618 return IntrinOp->getIntrinsicID(); 4619 } 4620 4621 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( 4622 MachineInstr &I, MachineRegisterInfo &MRI) const { 4623 // Find the intrinsic ID. 4624 unsigned IntrinID = findIntrinsicID(I); 4625 if (!IntrinID) 4626 return false; 4627 MachineIRBuilder MIRBuilder(I); 4628 4629 // Select the instruction. 4630 switch (IntrinID) { 4631 default: 4632 return false; 4633 case Intrinsic::trap: 4634 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1); 4635 break; 4636 case Intrinsic::debugtrap: 4637 if (!STI.isTargetWindows()) 4638 return false; 4639 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); 4640 break; 4641 } 4642 4643 I.eraseFromParent(); 4644 return true; 4645 } 4646 4647 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, 4648 MachineRegisterInfo &MRI) { 4649 unsigned IntrinID = findIntrinsicID(I); 4650 if (!IntrinID) 4651 return false; 4652 MachineIRBuilder MIRBuilder(I); 4653 4654 switch (IntrinID) { 4655 default: 4656 break; 4657 case Intrinsic::aarch64_crypto_sha1h: { 4658 Register DstReg = I.getOperand(0).getReg(); 4659 Register SrcReg = I.getOperand(2).getReg(); 4660 4661 // FIXME: Should this be an assert? 4662 if (MRI.getType(DstReg).getSizeInBits() != 32 || 4663 MRI.getType(SrcReg).getSizeInBits() != 32) 4664 return false; 4665 4666 // The operation has to happen on FPRs. Set up some new FPR registers for 4667 // the source and destination if they are on GPRs. 4668 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 4669 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 4670 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)}); 4671 4672 // Make sure the copy ends up getting constrained properly. 4673 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 4674 AArch64::GPR32RegClass, MRI); 4675 } 4676 4677 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) 4678 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 4679 4680 // Actually insert the instruction. 4681 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); 4682 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); 4683 4684 // Did we create a new register for the destination? 4685 if (DstReg != I.getOperand(0).getReg()) { 4686 // Yep. Copy the result of the instruction back into the original 4687 // destination. 4688 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg}); 4689 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 4690 AArch64::GPR32RegClass, MRI); 4691 } 4692 4693 I.eraseFromParent(); 4694 return true; 4695 } 4696 case Intrinsic::frameaddress: 4697 case Intrinsic::returnaddress: { 4698 MachineFunction &MF = *I.getParent()->getParent(); 4699 MachineFrameInfo &MFI = MF.getFrameInfo(); 4700 4701 unsigned Depth = I.getOperand(2).getImm(); 4702 Register DstReg = I.getOperand(0).getReg(); 4703 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); 4704 4705 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { 4706 if (MFReturnAddr) { 4707 MIRBuilder.buildCopy({DstReg}, MFReturnAddr); 4708 I.eraseFromParent(); 4709 return true; 4710 } 4711 MFI.setReturnAddressIsTaken(true); 4712 MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass); 4713 // Insert the copy from LR/X30 into the entry block, before it can be 4714 // clobbered by anything. 4715 MachineBasicBlock &EntryBlock = *MF.begin(); 4716 if (!EntryBlock.isLiveIn(AArch64::LR)) 4717 EntryBlock.addLiveIn(AArch64::LR); 4718 MachineIRBuilder EntryBuilder(MF); 4719 EntryBuilder.setInstr(*EntryBlock.begin()); 4720 EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); 4721 MFReturnAddr = DstReg; 4722 I.eraseFromParent(); 4723 return true; 4724 } 4725 4726 MFI.setFrameAddressIsTaken(true); 4727 Register FrameAddr(AArch64::FP); 4728 while (Depth--) { 4729 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 4730 auto Ldr = 4731 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}) 4732 .addImm(0); 4733 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); 4734 FrameAddr = NextFrame; 4735 } 4736 4737 if (IntrinID == Intrinsic::frameaddress) 4738 MIRBuilder.buildCopy({DstReg}, {FrameAddr}); 4739 else { 4740 MFI.setReturnAddressIsTaken(true); 4741 MIRBuilder.buildInstr(AArch64::LDRXui, {DstReg}, {FrameAddr}).addImm(1); 4742 } 4743 4744 I.eraseFromParent(); 4745 return true; 4746 } 4747 } 4748 return false; 4749 } 4750 4751 InstructionSelector::ComplexRendererFns 4752 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { 4753 auto MaybeImmed = getImmedFromMO(Root); 4754 if (MaybeImmed == None || *MaybeImmed > 31) 4755 return None; 4756 uint64_t Enc = (32 - *MaybeImmed) & 0x1f; 4757 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4758 } 4759 4760 InstructionSelector::ComplexRendererFns 4761 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { 4762 auto MaybeImmed = getImmedFromMO(Root); 4763 if (MaybeImmed == None || *MaybeImmed > 31) 4764 return None; 4765 uint64_t Enc = 31 - *MaybeImmed; 4766 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4767 } 4768 4769 InstructionSelector::ComplexRendererFns 4770 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { 4771 auto MaybeImmed = getImmedFromMO(Root); 4772 if (MaybeImmed == None || *MaybeImmed > 63) 4773 return None; 4774 uint64_t Enc = (64 - *MaybeImmed) & 0x3f; 4775 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4776 } 4777 4778 InstructionSelector::ComplexRendererFns 4779 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { 4780 auto MaybeImmed = getImmedFromMO(Root); 4781 if (MaybeImmed == None || *MaybeImmed > 63) 4782 return None; 4783 uint64_t Enc = 63 - *MaybeImmed; 4784 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4785 } 4786 4787 /// Helper to select an immediate value that can be represented as a 12-bit 4788 /// value shifted left by either 0 or 12. If it is possible to do so, return 4789 /// the immediate and shift value. If not, return None. 4790 /// 4791 /// Used by selectArithImmed and selectNegArithImmed. 4792 InstructionSelector::ComplexRendererFns 4793 AArch64InstructionSelector::select12BitValueWithLeftShift( 4794 uint64_t Immed) const { 4795 unsigned ShiftAmt; 4796 if (Immed >> 12 == 0) { 4797 ShiftAmt = 0; 4798 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 4799 ShiftAmt = 12; 4800 Immed = Immed >> 12; 4801 } else 4802 return None; 4803 4804 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 4805 return {{ 4806 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, 4807 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, 4808 }}; 4809 } 4810 4811 /// SelectArithImmed - Select an immediate value that can be represented as 4812 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 4813 /// Val set to the 12-bit value and Shift set to the shifter operand. 4814 InstructionSelector::ComplexRendererFns 4815 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { 4816 // This function is called from the addsub_shifted_imm ComplexPattern, 4817 // which lists [imm] as the list of opcode it's interested in, however 4818 // we still need to check whether the operand is actually an immediate 4819 // here because the ComplexPattern opcode list is only used in 4820 // root-level opcode matching. 4821 auto MaybeImmed = getImmedFromMO(Root); 4822 if (MaybeImmed == None) 4823 return None; 4824 return select12BitValueWithLeftShift(*MaybeImmed); 4825 } 4826 4827 /// SelectNegArithImmed - As above, but negates the value before trying to 4828 /// select it. 4829 InstructionSelector::ComplexRendererFns 4830 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { 4831 // We need a register here, because we need to know if we have a 64 or 32 4832 // bit immediate. 4833 if (!Root.isReg()) 4834 return None; 4835 auto MaybeImmed = getImmedFromMO(Root); 4836 if (MaybeImmed == None) 4837 return None; 4838 uint64_t Immed = *MaybeImmed; 4839 4840 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 4841 // have the opposite effect on the C flag, so this pattern mustn't match under 4842 // those circumstances. 4843 if (Immed == 0) 4844 return None; 4845 4846 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on 4847 // the root. 4848 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4849 if (MRI.getType(Root.getReg()).getSizeInBits() == 32) 4850 Immed = ~((uint32_t)Immed) + 1; 4851 else 4852 Immed = ~Immed + 1ULL; 4853 4854 if (Immed & 0xFFFFFFFFFF000000ULL) 4855 return None; 4856 4857 Immed &= 0xFFFFFFULL; 4858 return select12BitValueWithLeftShift(Immed); 4859 } 4860 4861 /// Return true if it is worth folding MI into an extended register. That is, 4862 /// if it's safe to pull it into the addressing mode of a load or store as a 4863 /// shift. 4864 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( 4865 MachineInstr &MI, const MachineRegisterInfo &MRI) const { 4866 // Always fold if there is one use, or if we're optimizing for size. 4867 Register DefReg = MI.getOperand(0).getReg(); 4868 if (MRI.hasOneNonDBGUse(DefReg) || 4869 MI.getParent()->getParent()->getFunction().hasMinSize()) 4870 return true; 4871 4872 // It's better to avoid folding and recomputing shifts when we don't have a 4873 // fastpath. 4874 if (!STI.hasLSLFast()) 4875 return false; 4876 4877 // We have a fastpath, so folding a shift in and potentially computing it 4878 // many times may be beneficial. Check if this is only used in memory ops. 4879 // If it is, then we should fold. 4880 return all_of(MRI.use_nodbg_instructions(DefReg), 4881 [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); 4882 } 4883 4884 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { 4885 switch (Type) { 4886 case AArch64_AM::SXTB: 4887 case AArch64_AM::SXTH: 4888 case AArch64_AM::SXTW: 4889 return true; 4890 default: 4891 return false; 4892 } 4893 } 4894 4895 InstructionSelector::ComplexRendererFns 4896 AArch64InstructionSelector::selectExtendedSHL( 4897 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, 4898 unsigned SizeInBytes, bool WantsExt) const { 4899 assert(Base.isReg() && "Expected base to be a register operand"); 4900 assert(Offset.isReg() && "Expected offset to be a register operand"); 4901 4902 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4903 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); 4904 if (!OffsetInst) 4905 return None; 4906 4907 unsigned OffsetOpc = OffsetInst->getOpcode(); 4908 bool LookedThroughZExt = false; 4909 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { 4910 // Try to look through a ZEXT. 4911 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) 4912 return None; 4913 4914 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg()); 4915 OffsetOpc = OffsetInst->getOpcode(); 4916 LookedThroughZExt = true; 4917 4918 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) 4919 return None; 4920 } 4921 // Make sure that the memory op is a valid size. 4922 int64_t LegalShiftVal = Log2_32(SizeInBytes); 4923 if (LegalShiftVal == 0) 4924 return None; 4925 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 4926 return None; 4927 4928 // Now, try to find the specific G_CONSTANT. Start by assuming that the 4929 // register we will offset is the LHS, and the register containing the 4930 // constant is the RHS. 4931 Register OffsetReg = OffsetInst->getOperand(1).getReg(); 4932 Register ConstantReg = OffsetInst->getOperand(2).getReg(); 4933 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 4934 if (!ValAndVReg) { 4935 // We didn't get a constant on the RHS. If the opcode is a shift, then 4936 // we're done. 4937 if (OffsetOpc == TargetOpcode::G_SHL) 4938 return None; 4939 4940 // If we have a G_MUL, we can use either register. Try looking at the RHS. 4941 std::swap(OffsetReg, ConstantReg); 4942 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 4943 if (!ValAndVReg) 4944 return None; 4945 } 4946 4947 // The value must fit into 3 bits, and must be positive. Make sure that is 4948 // true. 4949 int64_t ImmVal = ValAndVReg->Value; 4950 4951 // Since we're going to pull this into a shift, the constant value must be 4952 // a power of 2. If we got a multiply, then we need to check this. 4953 if (OffsetOpc == TargetOpcode::G_MUL) { 4954 if (!isPowerOf2_32(ImmVal)) 4955 return None; 4956 4957 // Got a power of 2. So, the amount we'll shift is the log base-2 of that. 4958 ImmVal = Log2_32(ImmVal); 4959 } 4960 4961 if ((ImmVal & 0x7) != ImmVal) 4962 return None; 4963 4964 // We are only allowed to shift by LegalShiftVal. This shift value is built 4965 // into the instruction, so we can't just use whatever we want. 4966 if (ImmVal != LegalShiftVal) 4967 return None; 4968 4969 unsigned SignExtend = 0; 4970 if (WantsExt) { 4971 // Check if the offset is defined by an extend, unless we looked through a 4972 // G_ZEXT earlier. 4973 if (!LookedThroughZExt) { 4974 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); 4975 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); 4976 if (Ext == AArch64_AM::InvalidShiftExtend) 4977 return None; 4978 4979 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; 4980 // We only support SXTW for signed extension here. 4981 if (SignExtend && Ext != AArch64_AM::SXTW) 4982 return None; 4983 OffsetReg = ExtInst->getOperand(1).getReg(); 4984 } 4985 4986 // Need a 32-bit wide register here. 4987 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); 4988 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); 4989 } 4990 4991 // We can use the LHS of the GEP as the base, and the LHS of the shift as an 4992 // offset. Signify that we are shifting by setting the shift flag to 1. 4993 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, 4994 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, 4995 [=](MachineInstrBuilder &MIB) { 4996 // Need to add both immediates here to make sure that they are both 4997 // added to the instruction. 4998 MIB.addImm(SignExtend); 4999 MIB.addImm(1); 5000 }}}; 5001 } 5002 5003 /// This is used for computing addresses like this: 5004 /// 5005 /// ldr x1, [x2, x3, lsl #3] 5006 /// 5007 /// Where x2 is the base register, and x3 is an offset register. The shift-left 5008 /// is a constant value specific to this load instruction. That is, we'll never 5009 /// see anything other than a 3 here (which corresponds to the size of the 5010 /// element being loaded.) 5011 InstructionSelector::ComplexRendererFns 5012 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( 5013 MachineOperand &Root, unsigned SizeInBytes) const { 5014 if (!Root.isReg()) 5015 return None; 5016 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5017 5018 // We want to find something like this: 5019 // 5020 // val = G_CONSTANT LegalShiftVal 5021 // shift = G_SHL off_reg val 5022 // ptr = G_PTR_ADD base_reg shift 5023 // x = G_LOAD ptr 5024 // 5025 // And fold it into this addressing mode: 5026 // 5027 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] 5028 5029 // Check if we can find the G_PTR_ADD. 5030 MachineInstr *PtrAdd = 5031 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5032 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 5033 return None; 5034 5035 // Now, try to match an opcode which will match our specific offset. 5036 // We want a G_SHL or a G_MUL. 5037 MachineInstr *OffsetInst = 5038 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); 5039 return selectExtendedSHL(Root, PtrAdd->getOperand(1), 5040 OffsetInst->getOperand(0), SizeInBytes, 5041 /*WantsExt=*/false); 5042 } 5043 5044 /// This is used for computing addresses like this: 5045 /// 5046 /// ldr x1, [x2, x3] 5047 /// 5048 /// Where x2 is the base register, and x3 is an offset register. 5049 /// 5050 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, 5051 /// this will do so. Otherwise, it will return None. 5052 InstructionSelector::ComplexRendererFns 5053 AArch64InstructionSelector::selectAddrModeRegisterOffset( 5054 MachineOperand &Root) const { 5055 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5056 5057 // We need a GEP. 5058 MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); 5059 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD) 5060 return None; 5061 5062 // If this is used more than once, let's not bother folding. 5063 // TODO: Check if they are memory ops. If they are, then we can still fold 5064 // without having to recompute anything. 5065 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg())) 5066 return None; 5067 5068 // Base is the GEP's LHS, offset is its RHS. 5069 return {{[=](MachineInstrBuilder &MIB) { 5070 MIB.addUse(Gep->getOperand(1).getReg()); 5071 }, 5072 [=](MachineInstrBuilder &MIB) { 5073 MIB.addUse(Gep->getOperand(2).getReg()); 5074 }, 5075 [=](MachineInstrBuilder &MIB) { 5076 // Need to add both immediates here to make sure that they are both 5077 // added to the instruction. 5078 MIB.addImm(0); 5079 MIB.addImm(0); 5080 }}}; 5081 } 5082 5083 /// This is intended to be equivalent to selectAddrModeXRO in 5084 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. 5085 InstructionSelector::ComplexRendererFns 5086 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, 5087 unsigned SizeInBytes) const { 5088 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5089 5090 // If we have a constant offset, then we probably don't want to match a 5091 // register offset. 5092 if (isBaseWithConstantOffset(Root, MRI)) 5093 return None; 5094 5095 // Try to fold shifts into the addressing mode. 5096 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); 5097 if (AddrModeFns) 5098 return AddrModeFns; 5099 5100 // If that doesn't work, see if it's possible to fold in registers from 5101 // a GEP. 5102 return selectAddrModeRegisterOffset(Root); 5103 } 5104 5105 /// This is used for computing addresses like this: 5106 /// 5107 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] 5108 /// 5109 /// Where we have a 64-bit base register, a 32-bit offset register, and an 5110 /// extend (which may or may not be signed). 5111 InstructionSelector::ComplexRendererFns 5112 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, 5113 unsigned SizeInBytes) const { 5114 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5115 5116 MachineInstr *PtrAdd = 5117 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5118 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 5119 return None; 5120 5121 MachineOperand &LHS = PtrAdd->getOperand(1); 5122 MachineOperand &RHS = PtrAdd->getOperand(2); 5123 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); 5124 5125 // The first case is the same as selectAddrModeXRO, except we need an extend. 5126 // In this case, we try to find a shift and extend, and fold them into the 5127 // addressing mode. 5128 // 5129 // E.g. 5130 // 5131 // off_reg = G_Z/S/ANYEXT ext_reg 5132 // val = G_CONSTANT LegalShiftVal 5133 // shift = G_SHL off_reg val 5134 // ptr = G_PTR_ADD base_reg shift 5135 // x = G_LOAD ptr 5136 // 5137 // In this case we can get a load like this: 5138 // 5139 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] 5140 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), 5141 SizeInBytes, /*WantsExt=*/true); 5142 if (ExtendedShl) 5143 return ExtendedShl; 5144 5145 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. 5146 // 5147 // e.g. 5148 // ldr something, [base_reg, ext_reg, sxtw] 5149 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 5150 return None; 5151 5152 // Check if this is an extend. We'll get an extend type if it is. 5153 AArch64_AM::ShiftExtendType Ext = 5154 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); 5155 if (Ext == AArch64_AM::InvalidShiftExtend) 5156 return None; 5157 5158 // Need a 32-bit wide register. 5159 MachineIRBuilder MIB(*PtrAdd); 5160 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), 5161 AArch64::GPR32RegClass, MIB); 5162 unsigned SignExtend = Ext == AArch64_AM::SXTW; 5163 5164 // Base is LHS, offset is ExtReg. 5165 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, 5166 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 5167 [=](MachineInstrBuilder &MIB) { 5168 MIB.addImm(SignExtend); 5169 MIB.addImm(0); 5170 }}}; 5171 } 5172 5173 /// Select a "register plus unscaled signed 9-bit immediate" address. This 5174 /// should only match when there is an offset that is not valid for a scaled 5175 /// immediate addressing mode. The "Size" argument is the size in bytes of the 5176 /// memory reference, which is needed here to know what is valid for a scaled 5177 /// immediate. 5178 InstructionSelector::ComplexRendererFns 5179 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, 5180 unsigned Size) const { 5181 MachineRegisterInfo &MRI = 5182 Root.getParent()->getParent()->getParent()->getRegInfo(); 5183 5184 if (!Root.isReg()) 5185 return None; 5186 5187 if (!isBaseWithConstantOffset(Root, MRI)) 5188 return None; 5189 5190 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 5191 if (!RootDef) 5192 return None; 5193 5194 MachineOperand &OffImm = RootDef->getOperand(2); 5195 if (!OffImm.isReg()) 5196 return None; 5197 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); 5198 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) 5199 return None; 5200 int64_t RHSC; 5201 MachineOperand &RHSOp1 = RHS->getOperand(1); 5202 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) 5203 return None; 5204 RHSC = RHSOp1.getCImm()->getSExtValue(); 5205 5206 // If the offset is valid as a scaled immediate, don't match here. 5207 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) 5208 return None; 5209 if (RHSC >= -256 && RHSC < 256) { 5210 MachineOperand &Base = RootDef->getOperand(1); 5211 return {{ 5212 [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, 5213 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, 5214 }}; 5215 } 5216 return None; 5217 } 5218 5219 InstructionSelector::ComplexRendererFns 5220 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, 5221 unsigned Size, 5222 MachineRegisterInfo &MRI) const { 5223 if (RootDef.getOpcode() != AArch64::G_ADD_LOW) 5224 return None; 5225 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); 5226 if (Adrp.getOpcode() != AArch64::ADRP) 5227 return None; 5228 5229 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. 5230 // TODO: Need to check GV's offset % size if doing offset folding into globals. 5231 assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"); 5232 auto GV = Adrp.getOperand(1).getGlobal(); 5233 if (GV->isThreadLocal()) 5234 return None; 5235 5236 auto &MF = *RootDef.getParent()->getParent(); 5237 if (GV->getPointerAlignment(MF.getDataLayout()) < Size) 5238 return None; 5239 5240 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); 5241 MachineIRBuilder MIRBuilder(RootDef); 5242 Register AdrpReg = Adrp.getOperand(0).getReg(); 5243 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, 5244 [=](MachineInstrBuilder &MIB) { 5245 MIB.addGlobalAddress(GV, /* Offset */ 0, 5246 OpFlags | AArch64II::MO_PAGEOFF | 5247 AArch64II::MO_NC); 5248 }}}; 5249 } 5250 5251 /// Select a "register plus scaled unsigned 12-bit immediate" address. The 5252 /// "Size" argument is the size in bytes of the memory reference, which 5253 /// determines the scale. 5254 InstructionSelector::ComplexRendererFns 5255 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, 5256 unsigned Size) const { 5257 MachineFunction &MF = *Root.getParent()->getParent()->getParent(); 5258 MachineRegisterInfo &MRI = MF.getRegInfo(); 5259 5260 if (!Root.isReg()) 5261 return None; 5262 5263 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 5264 if (!RootDef) 5265 return None; 5266 5267 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 5268 return {{ 5269 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, 5270 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 5271 }}; 5272 } 5273 5274 CodeModel::Model CM = MF.getTarget().getCodeModel(); 5275 // Check if we can fold in the ADD of small code model ADRP + ADD address. 5276 if (CM == CodeModel::Small) { 5277 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI); 5278 if (OpFns) 5279 return OpFns; 5280 } 5281 5282 if (isBaseWithConstantOffset(Root, MRI)) { 5283 MachineOperand &LHS = RootDef->getOperand(1); 5284 MachineOperand &RHS = RootDef->getOperand(2); 5285 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 5286 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 5287 if (LHSDef && RHSDef) { 5288 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); 5289 unsigned Scale = Log2_32(Size); 5290 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 5291 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 5292 return {{ 5293 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, 5294 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 5295 }}; 5296 5297 return {{ 5298 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 5299 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 5300 }}; 5301 } 5302 } 5303 } 5304 5305 // Before falling back to our general case, check if the unscaled 5306 // instructions can handle this. If so, that's preferable. 5307 if (selectAddrModeUnscaled(Root, Size).hasValue()) 5308 return None; 5309 5310 return {{ 5311 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 5312 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 5313 }}; 5314 } 5315 5316 /// Given a shift instruction, return the correct shift type for that 5317 /// instruction. 5318 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { 5319 // TODO: Handle AArch64_AM::ROR 5320 switch (MI.getOpcode()) { 5321 default: 5322 return AArch64_AM::InvalidShiftExtend; 5323 case TargetOpcode::G_SHL: 5324 return AArch64_AM::LSL; 5325 case TargetOpcode::G_LSHR: 5326 return AArch64_AM::LSR; 5327 case TargetOpcode::G_ASHR: 5328 return AArch64_AM::ASR; 5329 } 5330 } 5331 5332 /// Select a "shifted register" operand. If the value is not shifted, set the 5333 /// shift operand to a default value of "lsl 0". 5334 /// 5335 /// TODO: Allow shifted register to be rotated in logical instructions. 5336 InstructionSelector::ComplexRendererFns 5337 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const { 5338 if (!Root.isReg()) 5339 return None; 5340 MachineRegisterInfo &MRI = 5341 Root.getParent()->getParent()->getParent()->getRegInfo(); 5342 5343 // Check if the operand is defined by an instruction which corresponds to 5344 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. 5345 // 5346 // TODO: Handle AArch64_AM::ROR for logical instructions. 5347 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); 5348 if (!ShiftInst) 5349 return None; 5350 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); 5351 if (ShType == AArch64_AM::InvalidShiftExtend) 5352 return None; 5353 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) 5354 return None; 5355 5356 // Need an immediate on the RHS. 5357 MachineOperand &ShiftRHS = ShiftInst->getOperand(2); 5358 auto Immed = getImmedFromMO(ShiftRHS); 5359 if (!Immed) 5360 return None; 5361 5362 // We have something that we can fold. Fold in the shift's LHS and RHS into 5363 // the instruction. 5364 MachineOperand &ShiftLHS = ShiftInst->getOperand(1); 5365 Register ShiftReg = ShiftLHS.getReg(); 5366 5367 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); 5368 unsigned Val = *Immed & (NumBits - 1); 5369 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); 5370 5371 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, 5372 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; 5373 } 5374 5375 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( 5376 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { 5377 unsigned Opc = MI.getOpcode(); 5378 5379 // Handle explicit extend instructions first. 5380 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { 5381 unsigned Size; 5382 if (Opc == TargetOpcode::G_SEXT) 5383 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 5384 else 5385 Size = MI.getOperand(2).getImm(); 5386 assert(Size != 64 && "Extend from 64 bits?"); 5387 switch (Size) { 5388 case 8: 5389 return AArch64_AM::SXTB; 5390 case 16: 5391 return AArch64_AM::SXTH; 5392 case 32: 5393 return AArch64_AM::SXTW; 5394 default: 5395 return AArch64_AM::InvalidShiftExtend; 5396 } 5397 } 5398 5399 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { 5400 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 5401 assert(Size != 64 && "Extend from 64 bits?"); 5402 switch (Size) { 5403 case 8: 5404 return AArch64_AM::UXTB; 5405 case 16: 5406 return AArch64_AM::UXTH; 5407 case 32: 5408 return AArch64_AM::UXTW; 5409 default: 5410 return AArch64_AM::InvalidShiftExtend; 5411 } 5412 } 5413 5414 // Don't have an explicit extend. Try to handle a G_AND with a constant mask 5415 // on the RHS. 5416 if (Opc != TargetOpcode::G_AND) 5417 return AArch64_AM::InvalidShiftExtend; 5418 5419 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); 5420 if (!MaybeAndMask) 5421 return AArch64_AM::InvalidShiftExtend; 5422 uint64_t AndMask = *MaybeAndMask; 5423 switch (AndMask) { 5424 default: 5425 return AArch64_AM::InvalidShiftExtend; 5426 case 0xFF: 5427 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 5428 case 0xFFFF: 5429 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 5430 case 0xFFFFFFFF: 5431 return AArch64_AM::UXTW; 5432 } 5433 } 5434 5435 Register AArch64InstructionSelector::moveScalarRegClass( 5436 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { 5437 MachineRegisterInfo &MRI = *MIB.getMRI(); 5438 auto Ty = MRI.getType(Reg); 5439 assert(!Ty.isVector() && "Expected scalars only!"); 5440 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) 5441 return Reg; 5442 5443 // Create a copy and immediately select it. 5444 // FIXME: We should have an emitCopy function? 5445 auto Copy = MIB.buildCopy({&RC}, {Reg}); 5446 selectCopy(*Copy, TII, MRI, TRI, RBI); 5447 return Copy.getReg(0); 5448 } 5449 5450 /// Select an "extended register" operand. This operand folds in an extend 5451 /// followed by an optional left shift. 5452 InstructionSelector::ComplexRendererFns 5453 AArch64InstructionSelector::selectArithExtendedRegister( 5454 MachineOperand &Root) const { 5455 if (!Root.isReg()) 5456 return None; 5457 MachineRegisterInfo &MRI = 5458 Root.getParent()->getParent()->getParent()->getRegInfo(); 5459 5460 uint64_t ShiftVal = 0; 5461 Register ExtReg; 5462 AArch64_AM::ShiftExtendType Ext; 5463 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); 5464 if (!RootDef) 5465 return None; 5466 5467 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) 5468 return None; 5469 5470 // Check if we can fold a shift and an extend. 5471 if (RootDef->getOpcode() == TargetOpcode::G_SHL) { 5472 // Look for a constant on the RHS of the shift. 5473 MachineOperand &RHS = RootDef->getOperand(2); 5474 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); 5475 if (!MaybeShiftVal) 5476 return None; 5477 ShiftVal = *MaybeShiftVal; 5478 if (ShiftVal > 4) 5479 return None; 5480 // Look for a valid extend instruction on the LHS of the shift. 5481 MachineOperand &LHS = RootDef->getOperand(1); 5482 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); 5483 if (!ExtDef) 5484 return None; 5485 Ext = getExtendTypeForInst(*ExtDef, MRI); 5486 if (Ext == AArch64_AM::InvalidShiftExtend) 5487 return None; 5488 ExtReg = ExtDef->getOperand(1).getReg(); 5489 } else { 5490 // Didn't get a shift. Try just folding an extend. 5491 Ext = getExtendTypeForInst(*RootDef, MRI); 5492 if (Ext == AArch64_AM::InvalidShiftExtend) 5493 return None; 5494 ExtReg = RootDef->getOperand(1).getReg(); 5495 5496 // If we have a 32 bit instruction which zeroes out the high half of a 5497 // register, we get an implicit zero extend for free. Check if we have one. 5498 // FIXME: We actually emit the extend right now even though we don't have 5499 // to. 5500 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { 5501 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); 5502 if (ExtInst && isDef32(*ExtInst)) 5503 return None; 5504 } 5505 } 5506 5507 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister 5508 // copy. 5509 MachineIRBuilder MIB(*RootDef); 5510 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); 5511 5512 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 5513 [=](MachineInstrBuilder &MIB) { 5514 MIB.addImm(getArithExtendImm(Ext, ShiftVal)); 5515 }}}; 5516 } 5517 5518 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, 5519 const MachineInstr &MI, 5520 int OpIdx) const { 5521 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 5522 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5523 "Expected G_CONSTANT"); 5524 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI); 5525 assert(CstVal && "Expected constant value"); 5526 MIB.addImm(CstVal.getValue()); 5527 } 5528 5529 void AArch64InstructionSelector::renderLogicalImm32( 5530 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 5531 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5532 "Expected G_CONSTANT"); 5533 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 5534 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); 5535 MIB.addImm(Enc); 5536 } 5537 5538 void AArch64InstructionSelector::renderLogicalImm64( 5539 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 5540 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5541 "Expected G_CONSTANT"); 5542 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 5543 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); 5544 MIB.addImm(Enc); 5545 } 5546 5547 bool AArch64InstructionSelector::isLoadStoreOfNumBytes( 5548 const MachineInstr &MI, unsigned NumBytes) const { 5549 if (!MI.mayLoadOrStore()) 5550 return false; 5551 assert(MI.hasOneMemOperand() && 5552 "Expected load/store to have only one mem op!"); 5553 return (*MI.memoperands_begin())->getSize() == NumBytes; 5554 } 5555 5556 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { 5557 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 5558 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) 5559 return false; 5560 5561 // Only return true if we know the operation will zero-out the high half of 5562 // the 64-bit register. Truncates can be subregister copies, which don't 5563 // zero out the high bits. Copies and other copy-like instructions can be 5564 // fed by truncates, or could be lowered as subregister copies. 5565 switch (MI.getOpcode()) { 5566 default: 5567 return true; 5568 case TargetOpcode::COPY: 5569 case TargetOpcode::G_BITCAST: 5570 case TargetOpcode::G_TRUNC: 5571 case TargetOpcode::G_PHI: 5572 return false; 5573 } 5574 } 5575 5576 5577 // Perform fixups on the given PHI instruction's operands to force them all 5578 // to be the same as the destination regbank. 5579 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, 5580 const AArch64RegisterBankInfo &RBI) { 5581 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI"); 5582 Register DstReg = MI.getOperand(0).getReg(); 5583 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg); 5584 assert(DstRB && "Expected PHI dst to have regbank assigned"); 5585 MachineIRBuilder MIB(MI); 5586 5587 // Go through each operand and ensure it has the same regbank. 5588 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { 5589 MachineOperand &MO = MI.getOperand(OpIdx); 5590 if (!MO.isReg()) 5591 continue; 5592 Register OpReg = MO.getReg(); 5593 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg); 5594 if (RB != DstRB) { 5595 // Insert a cross-bank copy. 5596 auto *OpDef = MRI.getVRegDef(OpReg); 5597 const LLT &Ty = MRI.getType(OpReg); 5598 MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator())); 5599 auto Copy = MIB.buildCopy(Ty, OpReg); 5600 MRI.setRegBank(Copy.getReg(0), *DstRB); 5601 MO.setReg(Copy.getReg(0)); 5602 } 5603 } 5604 } 5605 5606 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { 5607 // We're looking for PHIs, build a list so we don't invalidate iterators. 5608 MachineRegisterInfo &MRI = MF.getRegInfo(); 5609 SmallVector<MachineInstr *, 32> Phis; 5610 for (auto &BB : MF) { 5611 for (auto &MI : BB) { 5612 if (MI.getOpcode() == TargetOpcode::G_PHI) 5613 Phis.emplace_back(&MI); 5614 } 5615 } 5616 5617 for (auto *MI : Phis) { 5618 // We need to do some work here if the operand types are < 16 bit and they 5619 // are split across fpr/gpr banks. Since all types <32b on gpr 5620 // end up being assigned gpr32 regclasses, we can end up with PHIs here 5621 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't 5622 // be selecting heterogenous regbanks for operands if possible, but we 5623 // still need to be able to deal with it here. 5624 // 5625 // To fix this, if we have a gpr-bank operand < 32b in size and at least 5626 // one other operand is on the fpr bank, then we add cross-bank copies 5627 // to homogenize the operand banks. For simplicity the bank that we choose 5628 // to settle on is whatever bank the def operand has. For example: 5629 // 5630 // %endbb: 5631 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 5632 // => 5633 // %bb2: 5634 // ... 5635 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) 5636 // ... 5637 // %endbb: 5638 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 5639 bool HasGPROp = false, HasFPROp = false; 5640 for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) { 5641 const auto &MO = MI->getOperand(OpIdx); 5642 if (!MO.isReg()) 5643 continue; 5644 const LLT &Ty = MRI.getType(MO.getReg()); 5645 if (!Ty.isValid() || !Ty.isScalar()) 5646 break; 5647 if (Ty.getSizeInBits() >= 32) 5648 break; 5649 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); 5650 // If for some reason we don't have a regbank yet. Don't try anything. 5651 if (!RB) 5652 break; 5653 5654 if (RB->getID() == AArch64::GPRRegBankID) 5655 HasGPROp = true; 5656 else 5657 HasFPROp = true; 5658 } 5659 // We have heterogenous regbanks, need to fixup. 5660 if (HasGPROp && HasFPROp) 5661 fixupPHIOpBanks(*MI, MRI, RBI); 5662 } 5663 } 5664 5665 namespace llvm { 5666 InstructionSelector * 5667 createAArch64InstructionSelector(const AArch64TargetMachine &TM, 5668 AArch64Subtarget &Subtarget, 5669 AArch64RegisterBankInfo &RBI) { 5670 return new AArch64InstructionSelector(TM, Subtarget, RBI); 5671 } 5672 } 5673