1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64InstrInfo.h" 15 #include "AArch64MachineFunctionInfo.h" 16 #include "AArch64RegisterBankInfo.h" 17 #include "AArch64RegisterInfo.h" 18 #include "AArch64Subtarget.h" 19 #include "AArch64TargetMachine.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "llvm/ADT/Optional.h" 22 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 24 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 26 #include "llvm/CodeGen/GlobalISel/Utils.h" 27 #include "llvm/CodeGen/MachineBasicBlock.h" 28 #include "llvm/CodeGen/MachineConstantPool.h" 29 #include "llvm/CodeGen/MachineFunction.h" 30 #include "llvm/CodeGen/MachineInstr.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include "llvm/CodeGen/MachineOperand.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/TargetOpcodes.h" 35 #include "llvm/IR/Constants.h" 36 #include "llvm/IR/Type.h" 37 #include "llvm/IR/IntrinsicsAArch64.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/raw_ostream.h" 40 41 #define DEBUG_TYPE "aarch64-isel" 42 43 using namespace llvm; 44 45 namespace { 46 47 #define GET_GLOBALISEL_PREDICATE_BITSET 48 #include "AArch64GenGlobalISel.inc" 49 #undef GET_GLOBALISEL_PREDICATE_BITSET 50 51 class AArch64InstructionSelector : public InstructionSelector { 52 public: 53 AArch64InstructionSelector(const AArch64TargetMachine &TM, 54 const AArch64Subtarget &STI, 55 const AArch64RegisterBankInfo &RBI); 56 57 bool select(MachineInstr &I) override; 58 static const char *getName() { return DEBUG_TYPE; } 59 60 void setupMF(MachineFunction &MF, GISelKnownBits &KB, 61 CodeGenCoverage &CoverageInfo) override { 62 InstructionSelector::setupMF(MF, KB, CoverageInfo); 63 64 // hasFnAttribute() is expensive to call on every BRCOND selection, so 65 // cache it here for each run of the selector. 66 ProduceNonFlagSettingCondBr = 67 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); 68 MFReturnAddr = Register(); 69 70 processPHIs(MF); 71 } 72 73 private: 74 /// tblgen-erated 'select' implementation, used as the initial selector for 75 /// the patterns that don't require complex C++. 76 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 77 78 // A lowering phase that runs before any selection attempts. 79 // Returns true if the instruction was modified. 80 bool preISelLower(MachineInstr &I); 81 82 // An early selection function that runs before the selectImpl() call. 83 bool earlySelect(MachineInstr &I) const; 84 85 // Do some preprocessing of G_PHIs before we begin selection. 86 void processPHIs(MachineFunction &MF); 87 88 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; 89 90 /// Eliminate same-sized cross-bank copies into stores before selectImpl(). 91 bool contractCrossBankCopyIntoStore(MachineInstr &I, 92 MachineRegisterInfo &MRI); 93 94 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); 95 96 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, 97 MachineRegisterInfo &MRI) const; 98 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, 99 MachineRegisterInfo &MRI) const; 100 101 bool tryOptAndIntoCompareBranch(MachineInstr *LHS, 102 int64_t CmpConstant, 103 const CmpInst::Predicate &Pred, 104 MachineBasicBlock *DstMBB, 105 MachineIRBuilder &MIB) const; 106 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, 107 MachineRegisterInfo &MRI) const; 108 109 bool selectVectorASHR(MachineInstr &I, MachineRegisterInfo &MRI) const; 110 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; 111 112 // Helper to generate an equivalent of scalar_to_vector into a new register, 113 // returned via 'Dst'. 114 MachineInstr *emitScalarToVector(unsigned EltSize, 115 const TargetRegisterClass *DstRC, 116 Register Scalar, 117 MachineIRBuilder &MIRBuilder) const; 118 119 /// Emit a lane insert into \p DstReg, or a new vector register if None is 120 /// provided. 121 /// 122 /// The lane inserted into is defined by \p LaneIdx. The vector source 123 /// register is given by \p SrcReg. The register containing the element is 124 /// given by \p EltReg. 125 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg, 126 Register EltReg, unsigned LaneIdx, 127 const RegisterBank &RB, 128 MachineIRBuilder &MIRBuilder) const; 129 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const; 130 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, 131 MachineRegisterInfo &MRI) const; 132 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const; 133 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; 134 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; 135 136 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; 137 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; 138 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; 139 bool selectSplitVectorUnmerge(MachineInstr &I, 140 MachineRegisterInfo &MRI) const; 141 bool selectIntrinsicWithSideEffects(MachineInstr &I, 142 MachineRegisterInfo &MRI) const; 143 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); 144 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const; 145 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; 146 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; 147 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const; 148 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const; 149 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const; 150 151 unsigned emitConstantPoolEntry(const Constant *CPVal, 152 MachineFunction &MF) const; 153 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, 154 MachineIRBuilder &MIRBuilder) const; 155 156 // Emit a vector concat operation. 157 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1, 158 Register Op2, 159 MachineIRBuilder &MIRBuilder) const; 160 161 // Emit an integer compare between LHS and RHS, which checks for Predicate. 162 // 163 // This returns the produced compare instruction, and the predicate which 164 // was ultimately used in the compare. The predicate may differ from what 165 // is passed in \p Predicate due to optimization. 166 std::pair<MachineInstr *, CmpInst::Predicate> 167 emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 168 MachineOperand &Predicate, 169 MachineIRBuilder &MIRBuilder) const; 170 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, MachineOperand &RHS, 171 MachineIRBuilder &MIRBuilder) const; 172 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, 173 MachineIRBuilder &MIRBuilder) const; 174 MachineInstr *emitTST(const Register &LHS, const Register &RHS, 175 MachineIRBuilder &MIRBuilder) const; 176 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg, 177 const RegisterBank &DstRB, LLT ScalarTy, 178 Register VecReg, unsigned LaneIdx, 179 MachineIRBuilder &MIRBuilder) const; 180 181 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be 182 /// materialized using a FMOV instruction, then update MI and return it. 183 /// Otherwise, do nothing and return a nullptr. 184 MachineInstr *emitFMovForFConstant(MachineInstr &MI, 185 MachineRegisterInfo &MRI) const; 186 187 /// Emit a CSet for a compare. 188 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, 189 MachineIRBuilder &MIRBuilder) const; 190 191 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. 192 /// \p IsNegative is true if the test should be "not zero". 193 /// This will also optimize the test bit instruction when possible. 194 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, 195 MachineBasicBlock *DstMBB, 196 MachineIRBuilder &MIB) const; 197 198 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. 199 // We use these manually instead of using the importer since it doesn't 200 // support SDNodeXForm. 201 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; 202 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; 203 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; 204 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; 205 206 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; 207 ComplexRendererFns selectArithImmed(MachineOperand &Root) const; 208 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; 209 210 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, 211 unsigned Size) const; 212 213 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { 214 return selectAddrModeUnscaled(Root, 1); 215 } 216 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { 217 return selectAddrModeUnscaled(Root, 2); 218 } 219 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { 220 return selectAddrModeUnscaled(Root, 4); 221 } 222 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { 223 return selectAddrModeUnscaled(Root, 8); 224 } 225 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { 226 return selectAddrModeUnscaled(Root, 16); 227 } 228 229 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used 230 /// from complex pattern matchers like selectAddrModeIndexed(). 231 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, 232 MachineRegisterInfo &MRI) const; 233 234 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, 235 unsigned Size) const; 236 template <int Width> 237 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { 238 return selectAddrModeIndexed(Root, Width / 8); 239 } 240 241 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, 242 const MachineRegisterInfo &MRI) const; 243 ComplexRendererFns 244 selectAddrModeShiftedExtendXReg(MachineOperand &Root, 245 unsigned SizeInBytes) const; 246 247 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether 248 /// or not a shift + extend should be folded into an addressing mode. Returns 249 /// None when this is not profitable or possible. 250 ComplexRendererFns 251 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, 252 MachineOperand &Offset, unsigned SizeInBytes, 253 bool WantsExt) const; 254 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; 255 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, 256 unsigned SizeInBytes) const; 257 template <int Width> 258 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { 259 return selectAddrModeXRO(Root, Width / 8); 260 } 261 262 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, 263 unsigned SizeInBytes) const; 264 template <int Width> 265 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { 266 return selectAddrModeWRO(Root, Width / 8); 267 } 268 269 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; 270 271 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { 272 return selectShiftedRegister(Root); 273 } 274 275 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { 276 // TODO: selectShiftedRegister should allow for rotates on logical shifts. 277 // For now, make them the same. The only difference between the two is that 278 // logical shifts are allowed to fold in rotates. Otherwise, these are 279 // functionally the same. 280 return selectShiftedRegister(Root); 281 } 282 283 /// Given an extend instruction, determine the correct shift-extend type for 284 /// that instruction. 285 /// 286 /// If the instruction is going to be used in a load or store, pass 287 /// \p IsLoadStore = true. 288 AArch64_AM::ShiftExtendType 289 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, 290 bool IsLoadStore = false) const; 291 292 /// Instructions that accept extend modifiers like UXTW expect the register 293 /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a 294 /// subregister copy if necessary. Return either ExtReg, or the result of the 295 /// new copy. 296 Register narrowExtendRegIfNeeded(Register ExtReg, 297 MachineIRBuilder &MIB) const; 298 Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size, 299 MachineIRBuilder &MIB) const; 300 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; 301 302 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 303 int OpIdx = -1) const; 304 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, 305 int OpIdx = -1) const; 306 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, 307 int OpIdx = -1) const; 308 309 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. 310 void materializeLargeCMVal(MachineInstr &I, const Value *V, 311 unsigned OpFlags) const; 312 313 // Optimization methods. 314 bool tryOptSelect(MachineInstr &MI) const; 315 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 316 MachineOperand &Predicate, 317 MachineIRBuilder &MIRBuilder) const; 318 MachineInstr *tryOptArithImmedIntegerCompare(MachineOperand &LHS, 319 MachineOperand &RHS, 320 CmpInst::Predicate &Predicate, 321 MachineIRBuilder &MIB) const; 322 MachineInstr *tryOptArithShiftedCompare(MachineOperand &LHS, 323 MachineOperand &RHS, 324 MachineIRBuilder &MIB) const; 325 326 /// Return true if \p MI is a load or store of \p NumBytes bytes. 327 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; 328 329 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit 330 /// register zeroed out. In other words, the result of MI has been explicitly 331 /// zero extended. 332 bool isDef32(const MachineInstr &MI) const; 333 334 const AArch64TargetMachine &TM; 335 const AArch64Subtarget &STI; 336 const AArch64InstrInfo &TII; 337 const AArch64RegisterInfo &TRI; 338 const AArch64RegisterBankInfo &RBI; 339 340 bool ProduceNonFlagSettingCondBr = false; 341 342 // Some cached values used during selection. 343 // We use LR as a live-in register, and we keep track of it here as it can be 344 // clobbered by calls. 345 Register MFReturnAddr; 346 347 #define GET_GLOBALISEL_PREDICATES_DECL 348 #include "AArch64GenGlobalISel.inc" 349 #undef GET_GLOBALISEL_PREDICATES_DECL 350 351 // We declare the temporaries used by selectImpl() in the class to minimize the 352 // cost of constructing placeholder values. 353 #define GET_GLOBALISEL_TEMPORARIES_DECL 354 #include "AArch64GenGlobalISel.inc" 355 #undef GET_GLOBALISEL_TEMPORARIES_DECL 356 }; 357 358 } // end anonymous namespace 359 360 #define GET_GLOBALISEL_IMPL 361 #include "AArch64GenGlobalISel.inc" 362 #undef GET_GLOBALISEL_IMPL 363 364 AArch64InstructionSelector::AArch64InstructionSelector( 365 const AArch64TargetMachine &TM, const AArch64Subtarget &STI, 366 const AArch64RegisterBankInfo &RBI) 367 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), 368 TRI(*STI.getRegisterInfo()), RBI(RBI), 369 #define GET_GLOBALISEL_PREDICATES_INIT 370 #include "AArch64GenGlobalISel.inc" 371 #undef GET_GLOBALISEL_PREDICATES_INIT 372 #define GET_GLOBALISEL_TEMPORARIES_INIT 373 #include "AArch64GenGlobalISel.inc" 374 #undef GET_GLOBALISEL_TEMPORARIES_INIT 375 { 376 } 377 378 // FIXME: This should be target-independent, inferred from the types declared 379 // for each class in the bank. 380 static const TargetRegisterClass * 381 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, 382 const RegisterBankInfo &RBI, 383 bool GetAllRegSet = false) { 384 if (RB.getID() == AArch64::GPRRegBankID) { 385 if (Ty.getSizeInBits() <= 32) 386 return GetAllRegSet ? &AArch64::GPR32allRegClass 387 : &AArch64::GPR32RegClass; 388 if (Ty.getSizeInBits() == 64) 389 return GetAllRegSet ? &AArch64::GPR64allRegClass 390 : &AArch64::GPR64RegClass; 391 return nullptr; 392 } 393 394 if (RB.getID() == AArch64::FPRRegBankID) { 395 if (Ty.getSizeInBits() <= 16) 396 return &AArch64::FPR16RegClass; 397 if (Ty.getSizeInBits() == 32) 398 return &AArch64::FPR32RegClass; 399 if (Ty.getSizeInBits() == 64) 400 return &AArch64::FPR64RegClass; 401 if (Ty.getSizeInBits() == 128) 402 return &AArch64::FPR128RegClass; 403 return nullptr; 404 } 405 406 return nullptr; 407 } 408 409 /// Given a register bank, and size in bits, return the smallest register class 410 /// that can represent that combination. 411 static const TargetRegisterClass * 412 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, 413 bool GetAllRegSet = false) { 414 unsigned RegBankID = RB.getID(); 415 416 if (RegBankID == AArch64::GPRRegBankID) { 417 if (SizeInBits <= 32) 418 return GetAllRegSet ? &AArch64::GPR32allRegClass 419 : &AArch64::GPR32RegClass; 420 if (SizeInBits == 64) 421 return GetAllRegSet ? &AArch64::GPR64allRegClass 422 : &AArch64::GPR64RegClass; 423 } 424 425 if (RegBankID == AArch64::FPRRegBankID) { 426 switch (SizeInBits) { 427 default: 428 return nullptr; 429 case 8: 430 return &AArch64::FPR8RegClass; 431 case 16: 432 return &AArch64::FPR16RegClass; 433 case 32: 434 return &AArch64::FPR32RegClass; 435 case 64: 436 return &AArch64::FPR64RegClass; 437 case 128: 438 return &AArch64::FPR128RegClass; 439 } 440 } 441 442 return nullptr; 443 } 444 445 /// Returns the correct subregister to use for a given register class. 446 static bool getSubRegForClass(const TargetRegisterClass *RC, 447 const TargetRegisterInfo &TRI, unsigned &SubReg) { 448 switch (TRI.getRegSizeInBits(*RC)) { 449 case 8: 450 SubReg = AArch64::bsub; 451 break; 452 case 16: 453 SubReg = AArch64::hsub; 454 break; 455 case 32: 456 if (RC != &AArch64::FPR32RegClass) 457 SubReg = AArch64::sub_32; 458 else 459 SubReg = AArch64::ssub; 460 break; 461 case 64: 462 SubReg = AArch64::dsub; 463 break; 464 default: 465 LLVM_DEBUG( 466 dbgs() << "Couldn't find appropriate subregister for register class."); 467 return false; 468 } 469 470 return true; 471 } 472 473 /// Returns the minimum size the given register bank can hold. 474 static unsigned getMinSizeForRegBank(const RegisterBank &RB) { 475 switch (RB.getID()) { 476 case AArch64::GPRRegBankID: 477 return 32; 478 case AArch64::FPRRegBankID: 479 return 8; 480 default: 481 llvm_unreachable("Tried to get minimum size for unknown register bank."); 482 } 483 } 484 485 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { 486 auto &MI = *Root.getParent(); 487 auto &MBB = *MI.getParent(); 488 auto &MF = *MBB.getParent(); 489 auto &MRI = MF.getRegInfo(); 490 uint64_t Immed; 491 if (Root.isImm()) 492 Immed = Root.getImm(); 493 else if (Root.isCImm()) 494 Immed = Root.getCImm()->getZExtValue(); 495 else if (Root.isReg()) { 496 auto ValAndVReg = 497 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); 498 if (!ValAndVReg) 499 return None; 500 Immed = ValAndVReg->Value; 501 } else 502 return None; 503 return Immed; 504 } 505 506 /// Check whether \p I is a currently unsupported binary operation: 507 /// - it has an unsized type 508 /// - an operand is not a vreg 509 /// - all operands are not in the same bank 510 /// These are checks that should someday live in the verifier, but right now, 511 /// these are mostly limitations of the aarch64 selector. 512 static bool unsupportedBinOp(const MachineInstr &I, 513 const AArch64RegisterBankInfo &RBI, 514 const MachineRegisterInfo &MRI, 515 const AArch64RegisterInfo &TRI) { 516 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 517 if (!Ty.isValid()) { 518 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n"); 519 return true; 520 } 521 522 const RegisterBank *PrevOpBank = nullptr; 523 for (auto &MO : I.operands()) { 524 // FIXME: Support non-register operands. 525 if (!MO.isReg()) { 526 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n"); 527 return true; 528 } 529 530 // FIXME: Can generic operations have physical registers operands? If 531 // so, this will need to be taught about that, and we'll need to get the 532 // bank out of the minimal class for the register. 533 // Either way, this needs to be documented (and possibly verified). 534 if (!Register::isVirtualRegister(MO.getReg())) { 535 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n"); 536 return true; 537 } 538 539 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); 540 if (!OpBank) { 541 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n"); 542 return true; 543 } 544 545 if (PrevOpBank && OpBank != PrevOpBank) { 546 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n"); 547 return true; 548 } 549 PrevOpBank = OpBank; 550 } 551 return false; 552 } 553 554 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc 555 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID 556 /// and of size \p OpSize. 557 /// \returns \p GenericOpc if the combination is unsupported. 558 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, 559 unsigned OpSize) { 560 switch (RegBankID) { 561 case AArch64::GPRRegBankID: 562 if (OpSize == 32) { 563 switch (GenericOpc) { 564 case TargetOpcode::G_SHL: 565 return AArch64::LSLVWr; 566 case TargetOpcode::G_LSHR: 567 return AArch64::LSRVWr; 568 case TargetOpcode::G_ASHR: 569 return AArch64::ASRVWr; 570 default: 571 return GenericOpc; 572 } 573 } else if (OpSize == 64) { 574 switch (GenericOpc) { 575 case TargetOpcode::G_PTR_ADD: 576 return AArch64::ADDXrr; 577 case TargetOpcode::G_SHL: 578 return AArch64::LSLVXr; 579 case TargetOpcode::G_LSHR: 580 return AArch64::LSRVXr; 581 case TargetOpcode::G_ASHR: 582 return AArch64::ASRVXr; 583 default: 584 return GenericOpc; 585 } 586 } 587 break; 588 case AArch64::FPRRegBankID: 589 switch (OpSize) { 590 case 32: 591 switch (GenericOpc) { 592 case TargetOpcode::G_FADD: 593 return AArch64::FADDSrr; 594 case TargetOpcode::G_FSUB: 595 return AArch64::FSUBSrr; 596 case TargetOpcode::G_FMUL: 597 return AArch64::FMULSrr; 598 case TargetOpcode::G_FDIV: 599 return AArch64::FDIVSrr; 600 default: 601 return GenericOpc; 602 } 603 case 64: 604 switch (GenericOpc) { 605 case TargetOpcode::G_FADD: 606 return AArch64::FADDDrr; 607 case TargetOpcode::G_FSUB: 608 return AArch64::FSUBDrr; 609 case TargetOpcode::G_FMUL: 610 return AArch64::FMULDrr; 611 case TargetOpcode::G_FDIV: 612 return AArch64::FDIVDrr; 613 case TargetOpcode::G_OR: 614 return AArch64::ORRv8i8; 615 default: 616 return GenericOpc; 617 } 618 } 619 break; 620 } 621 return GenericOpc; 622 } 623 624 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, 625 /// appropriate for the (value) register bank \p RegBankID and of memory access 626 /// size \p OpSize. This returns the variant with the base+unsigned-immediate 627 /// addressing mode (e.g., LDRXui). 628 /// \returns \p GenericOpc if the combination is unsupported. 629 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, 630 unsigned OpSize) { 631 const bool isStore = GenericOpc == TargetOpcode::G_STORE; 632 switch (RegBankID) { 633 case AArch64::GPRRegBankID: 634 switch (OpSize) { 635 case 8: 636 return isStore ? AArch64::STRBBui : AArch64::LDRBBui; 637 case 16: 638 return isStore ? AArch64::STRHHui : AArch64::LDRHHui; 639 case 32: 640 return isStore ? AArch64::STRWui : AArch64::LDRWui; 641 case 64: 642 return isStore ? AArch64::STRXui : AArch64::LDRXui; 643 } 644 break; 645 case AArch64::FPRRegBankID: 646 switch (OpSize) { 647 case 8: 648 return isStore ? AArch64::STRBui : AArch64::LDRBui; 649 case 16: 650 return isStore ? AArch64::STRHui : AArch64::LDRHui; 651 case 32: 652 return isStore ? AArch64::STRSui : AArch64::LDRSui; 653 case 64: 654 return isStore ? AArch64::STRDui : AArch64::LDRDui; 655 } 656 break; 657 } 658 return GenericOpc; 659 } 660 661 #ifndef NDEBUG 662 /// Helper function that verifies that we have a valid copy at the end of 663 /// selectCopy. Verifies that the source and dest have the expected sizes and 664 /// then returns true. 665 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, 666 const MachineRegisterInfo &MRI, 667 const TargetRegisterInfo &TRI, 668 const RegisterBankInfo &RBI) { 669 const Register DstReg = I.getOperand(0).getReg(); 670 const Register SrcReg = I.getOperand(1).getReg(); 671 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 672 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 673 674 // Make sure the size of the source and dest line up. 675 assert( 676 (DstSize == SrcSize || 677 // Copies are a mean to setup initial types, the number of 678 // bits may not exactly match. 679 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || 680 // Copies are a mean to copy bits around, as long as we are 681 // on the same register class, that's fine. Otherwise, that 682 // means we need some SUBREG_TO_REG or AND & co. 683 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && 684 "Copy with different width?!"); 685 686 // Check the size of the destination. 687 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && 688 "GPRs cannot get more than 64-bit width values"); 689 690 return true; 691 } 692 #endif 693 694 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg 695 /// to \p *To. 696 /// 697 /// E.g "To = COPY SrcReg:SubReg" 698 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, 699 const RegisterBankInfo &RBI, Register SrcReg, 700 const TargetRegisterClass *To, unsigned SubReg) { 701 assert(SrcReg.isValid() && "Expected a valid source register?"); 702 assert(To && "Destination register class cannot be null"); 703 assert(SubReg && "Expected a valid subregister"); 704 705 MachineIRBuilder MIB(I); 706 auto SubRegCopy = 707 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg); 708 MachineOperand &RegOp = I.getOperand(1); 709 RegOp.setReg(SubRegCopy.getReg(0)); 710 711 // It's possible that the destination register won't be constrained. Make 712 // sure that happens. 713 if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) 714 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); 715 716 return true; 717 } 718 719 /// Helper function to get the source and destination register classes for a 720 /// copy. Returns a std::pair containing the source register class for the 721 /// copy, and the destination register class for the copy. If a register class 722 /// cannot be determined, then it will be nullptr. 723 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 724 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, 725 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 726 const RegisterBankInfo &RBI) { 727 Register DstReg = I.getOperand(0).getReg(); 728 Register SrcReg = I.getOperand(1).getReg(); 729 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 730 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 731 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 732 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 733 734 // Special casing for cross-bank copies of s1s. We can technically represent 735 // a 1-bit value with any size of register. The minimum size for a GPR is 32 736 // bits. So, we need to put the FPR on 32 bits as well. 737 // 738 // FIXME: I'm not sure if this case holds true outside of copies. If it does, 739 // then we can pull it into the helpers that get the appropriate class for a 740 // register bank. Or make a new helper that carries along some constraint 741 // information. 742 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) 743 SrcSize = DstSize = 32; 744 745 return {getMinClassForRegBank(SrcRegBank, SrcSize, true), 746 getMinClassForRegBank(DstRegBank, DstSize, true)}; 747 } 748 749 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, 750 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 751 const RegisterBankInfo &RBI) { 752 Register DstReg = I.getOperand(0).getReg(); 753 Register SrcReg = I.getOperand(1).getReg(); 754 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 755 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 756 757 // Find the correct register classes for the source and destination registers. 758 const TargetRegisterClass *SrcRC; 759 const TargetRegisterClass *DstRC; 760 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); 761 762 if (!DstRC) { 763 LLVM_DEBUG(dbgs() << "Unexpected dest size " 764 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); 765 return false; 766 } 767 768 // A couple helpers below, for making sure that the copy we produce is valid. 769 770 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want 771 // to verify that the src and dst are the same size, since that's handled by 772 // the SUBREG_TO_REG. 773 bool KnownValid = false; 774 775 // Returns true, or asserts if something we don't expect happens. Instead of 776 // returning true, we return isValidCopy() to ensure that we verify the 777 // result. 778 auto CheckCopy = [&]() { 779 // If we have a bitcast or something, we can't have physical registers. 780 assert((I.isCopy() || 781 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && 782 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && 783 "No phys reg on generic operator!"); 784 bool ValidCopy = true; 785 #ifndef NDEBUG 786 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI); 787 assert(ValidCopy && "Invalid copy."); 788 #endif 789 return ValidCopy; 790 }; 791 792 // Is this a copy? If so, then we may need to insert a subregister copy. 793 if (I.isCopy()) { 794 // Yes. Check if there's anything to fix up. 795 if (!SrcRC) { 796 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n"); 797 return false; 798 } 799 800 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); 801 unsigned DstSize = TRI.getRegSizeInBits(*DstRC); 802 unsigned SubReg; 803 804 // If the source bank doesn't support a subregister copy small enough, 805 // then we first need to copy to the destination bank. 806 if (getMinSizeForRegBank(SrcRegBank) > DstSize) { 807 const TargetRegisterClass *DstTempRC = 808 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true); 809 getSubRegForClass(DstRC, TRI, SubReg); 810 811 MachineIRBuilder MIB(I); 812 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg}); 813 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); 814 } else if (SrcSize > DstSize) { 815 // If the source register is bigger than the destination we need to 816 // perform a subregister copy. 817 const TargetRegisterClass *SubRegRC = 818 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 819 getSubRegForClass(SubRegRC, TRI, SubReg); 820 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); 821 } else if (DstSize > SrcSize) { 822 // If the destination register is bigger than the source we need to do 823 // a promotion using SUBREG_TO_REG. 824 const TargetRegisterClass *PromotionRC = 825 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 826 getSubRegForClass(SrcRC, TRI, SubReg); 827 828 Register PromoteReg = MRI.createVirtualRegister(PromotionRC); 829 BuildMI(*I.getParent(), I, I.getDebugLoc(), 830 TII.get(AArch64::SUBREG_TO_REG), PromoteReg) 831 .addImm(0) 832 .addUse(SrcReg) 833 .addImm(SubReg); 834 MachineOperand &RegOp = I.getOperand(1); 835 RegOp.setReg(PromoteReg); 836 837 // Promise that the copy is implicitly validated by the SUBREG_TO_REG. 838 KnownValid = true; 839 } 840 841 // If the destination is a physical register, then there's nothing to 842 // change, so we're done. 843 if (Register::isPhysicalRegister(DstReg)) 844 return CheckCopy(); 845 } 846 847 // No need to constrain SrcReg. It will get constrained when we hit another 848 // of its use or its defs. Copies do not have constraints. 849 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 850 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 851 << " operand\n"); 852 return false; 853 } 854 I.setDesc(TII.get(AArch64::COPY)); 855 return CheckCopy(); 856 } 857 858 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { 859 if (!DstTy.isScalar() || !SrcTy.isScalar()) 860 return GenericOpc; 861 862 const unsigned DstSize = DstTy.getSizeInBits(); 863 const unsigned SrcSize = SrcTy.getSizeInBits(); 864 865 switch (DstSize) { 866 case 32: 867 switch (SrcSize) { 868 case 32: 869 switch (GenericOpc) { 870 case TargetOpcode::G_SITOFP: 871 return AArch64::SCVTFUWSri; 872 case TargetOpcode::G_UITOFP: 873 return AArch64::UCVTFUWSri; 874 case TargetOpcode::G_FPTOSI: 875 return AArch64::FCVTZSUWSr; 876 case TargetOpcode::G_FPTOUI: 877 return AArch64::FCVTZUUWSr; 878 default: 879 return GenericOpc; 880 } 881 case 64: 882 switch (GenericOpc) { 883 case TargetOpcode::G_SITOFP: 884 return AArch64::SCVTFUXSri; 885 case TargetOpcode::G_UITOFP: 886 return AArch64::UCVTFUXSri; 887 case TargetOpcode::G_FPTOSI: 888 return AArch64::FCVTZSUWDr; 889 case TargetOpcode::G_FPTOUI: 890 return AArch64::FCVTZUUWDr; 891 default: 892 return GenericOpc; 893 } 894 default: 895 return GenericOpc; 896 } 897 case 64: 898 switch (SrcSize) { 899 case 32: 900 switch (GenericOpc) { 901 case TargetOpcode::G_SITOFP: 902 return AArch64::SCVTFUWDri; 903 case TargetOpcode::G_UITOFP: 904 return AArch64::UCVTFUWDri; 905 case TargetOpcode::G_FPTOSI: 906 return AArch64::FCVTZSUXSr; 907 case TargetOpcode::G_FPTOUI: 908 return AArch64::FCVTZUUXSr; 909 default: 910 return GenericOpc; 911 } 912 case 64: 913 switch (GenericOpc) { 914 case TargetOpcode::G_SITOFP: 915 return AArch64::SCVTFUXDri; 916 case TargetOpcode::G_UITOFP: 917 return AArch64::UCVTFUXDri; 918 case TargetOpcode::G_FPTOSI: 919 return AArch64::FCVTZSUXDr; 920 case TargetOpcode::G_FPTOUI: 921 return AArch64::FCVTZUUXDr; 922 default: 923 return GenericOpc; 924 } 925 default: 926 return GenericOpc; 927 } 928 default: 929 return GenericOpc; 930 }; 931 return GenericOpc; 932 } 933 934 static unsigned selectSelectOpc(MachineInstr &I, MachineRegisterInfo &MRI, 935 const RegisterBankInfo &RBI) { 936 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 937 bool IsFP = (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 938 AArch64::GPRRegBankID); 939 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 940 if (Ty == LLT::scalar(32)) 941 return IsFP ? AArch64::FCSELSrrr : AArch64::CSELWr; 942 else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) 943 return IsFP ? AArch64::FCSELDrrr : AArch64::CSELXr; 944 return 0; 945 } 946 947 /// Helper function to select the opcode for a G_FCMP. 948 static unsigned selectFCMPOpc(MachineInstr &I, MachineRegisterInfo &MRI) { 949 // If this is a compare against +0.0, then we don't have to explicitly 950 // materialize a constant. 951 const ConstantFP *FPImm = getConstantFPVRegVal(I.getOperand(3).getReg(), MRI); 952 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); 953 unsigned OpSize = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 954 if (OpSize != 32 && OpSize != 64) 955 return 0; 956 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, 957 {AArch64::FCMPSri, AArch64::FCMPDri}}; 958 return CmpOpcTbl[ShouldUseImm][OpSize == 64]; 959 } 960 961 /// Returns true if \p P is an unsigned integer comparison predicate. 962 static bool isUnsignedICMPPred(const CmpInst::Predicate P) { 963 switch (P) { 964 default: 965 return false; 966 case CmpInst::ICMP_UGT: 967 case CmpInst::ICMP_UGE: 968 case CmpInst::ICMP_ULT: 969 case CmpInst::ICMP_ULE: 970 return true; 971 } 972 } 973 974 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { 975 switch (P) { 976 default: 977 llvm_unreachable("Unknown condition code!"); 978 case CmpInst::ICMP_NE: 979 return AArch64CC::NE; 980 case CmpInst::ICMP_EQ: 981 return AArch64CC::EQ; 982 case CmpInst::ICMP_SGT: 983 return AArch64CC::GT; 984 case CmpInst::ICMP_SGE: 985 return AArch64CC::GE; 986 case CmpInst::ICMP_SLT: 987 return AArch64CC::LT; 988 case CmpInst::ICMP_SLE: 989 return AArch64CC::LE; 990 case CmpInst::ICMP_UGT: 991 return AArch64CC::HI; 992 case CmpInst::ICMP_UGE: 993 return AArch64CC::HS; 994 case CmpInst::ICMP_ULT: 995 return AArch64CC::LO; 996 case CmpInst::ICMP_ULE: 997 return AArch64CC::LS; 998 } 999 } 1000 1001 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P, 1002 AArch64CC::CondCode &CondCode, 1003 AArch64CC::CondCode &CondCode2) { 1004 CondCode2 = AArch64CC::AL; 1005 switch (P) { 1006 default: 1007 llvm_unreachable("Unknown FP condition!"); 1008 case CmpInst::FCMP_OEQ: 1009 CondCode = AArch64CC::EQ; 1010 break; 1011 case CmpInst::FCMP_OGT: 1012 CondCode = AArch64CC::GT; 1013 break; 1014 case CmpInst::FCMP_OGE: 1015 CondCode = AArch64CC::GE; 1016 break; 1017 case CmpInst::FCMP_OLT: 1018 CondCode = AArch64CC::MI; 1019 break; 1020 case CmpInst::FCMP_OLE: 1021 CondCode = AArch64CC::LS; 1022 break; 1023 case CmpInst::FCMP_ONE: 1024 CondCode = AArch64CC::MI; 1025 CondCode2 = AArch64CC::GT; 1026 break; 1027 case CmpInst::FCMP_ORD: 1028 CondCode = AArch64CC::VC; 1029 break; 1030 case CmpInst::FCMP_UNO: 1031 CondCode = AArch64CC::VS; 1032 break; 1033 case CmpInst::FCMP_UEQ: 1034 CondCode = AArch64CC::EQ; 1035 CondCode2 = AArch64CC::VS; 1036 break; 1037 case CmpInst::FCMP_UGT: 1038 CondCode = AArch64CC::HI; 1039 break; 1040 case CmpInst::FCMP_UGE: 1041 CondCode = AArch64CC::PL; 1042 break; 1043 case CmpInst::FCMP_ULT: 1044 CondCode = AArch64CC::LT; 1045 break; 1046 case CmpInst::FCMP_ULE: 1047 CondCode = AArch64CC::LE; 1048 break; 1049 case CmpInst::FCMP_UNE: 1050 CondCode = AArch64CC::NE; 1051 break; 1052 } 1053 } 1054 1055 /// Return a register which can be used as a bit to test in a TB(N)Z. 1056 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, 1057 MachineRegisterInfo &MRI) { 1058 assert(Reg.isValid() && "Expected valid register!"); 1059 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { 1060 unsigned Opc = MI->getOpcode(); 1061 1062 if (!MI->getOperand(0).isReg() || 1063 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1064 break; 1065 1066 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. 1067 // 1068 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number 1069 // on the truncated x is the same as the bit number on x. 1070 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || 1071 Opc == TargetOpcode::G_TRUNC) { 1072 Register NextReg = MI->getOperand(1).getReg(); 1073 // Did we find something worth folding? 1074 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg)) 1075 break; 1076 1077 // NextReg is worth folding. Keep looking. 1078 Reg = NextReg; 1079 continue; 1080 } 1081 1082 // Attempt to find a suitable operation with a constant on one side. 1083 Optional<uint64_t> C; 1084 Register TestReg; 1085 switch (Opc) { 1086 default: 1087 break; 1088 case TargetOpcode::G_AND: 1089 case TargetOpcode::G_XOR: { 1090 TestReg = MI->getOperand(1).getReg(); 1091 Register ConstantReg = MI->getOperand(2).getReg(); 1092 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); 1093 if (!VRegAndVal) { 1094 // AND commutes, check the other side for a constant. 1095 // FIXME: Can we canonicalize the constant so that it's always on the 1096 // same side at some point earlier? 1097 std::swap(ConstantReg, TestReg); 1098 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); 1099 } 1100 if (VRegAndVal) 1101 C = VRegAndVal->Value; 1102 break; 1103 } 1104 case TargetOpcode::G_ASHR: 1105 case TargetOpcode::G_LSHR: 1106 case TargetOpcode::G_SHL: { 1107 TestReg = MI->getOperand(1).getReg(); 1108 auto VRegAndVal = 1109 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 1110 if (VRegAndVal) 1111 C = VRegAndVal->Value; 1112 break; 1113 } 1114 } 1115 1116 // Didn't find a constant or viable register. Bail out of the loop. 1117 if (!C || !TestReg.isValid()) 1118 break; 1119 1120 // We found a suitable instruction with a constant. Check to see if we can 1121 // walk through the instruction. 1122 Register NextReg; 1123 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits(); 1124 switch (Opc) { 1125 default: 1126 break; 1127 case TargetOpcode::G_AND: 1128 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. 1129 if ((*C >> Bit) & 1) 1130 NextReg = TestReg; 1131 break; 1132 case TargetOpcode::G_SHL: 1133 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in 1134 // the type of the register. 1135 if (*C <= Bit && (Bit - *C) < TestRegSize) { 1136 NextReg = TestReg; 1137 Bit = Bit - *C; 1138 } 1139 break; 1140 case TargetOpcode::G_ASHR: 1141 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits 1142 // in x 1143 NextReg = TestReg; 1144 Bit = Bit + *C; 1145 if (Bit >= TestRegSize) 1146 Bit = TestRegSize - 1; 1147 break; 1148 case TargetOpcode::G_LSHR: 1149 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x 1150 if ((Bit + *C) < TestRegSize) { 1151 NextReg = TestReg; 1152 Bit = Bit + *C; 1153 } 1154 break; 1155 case TargetOpcode::G_XOR: 1156 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when 1157 // appropriate. 1158 // 1159 // e.g. If x' = xor x, c, and the b-th bit is set in c then 1160 // 1161 // tbz x', b -> tbnz x, b 1162 // 1163 // Because x' only has the b-th bit set if x does not. 1164 if ((*C >> Bit) & 1) 1165 Invert = !Invert; 1166 NextReg = TestReg; 1167 break; 1168 } 1169 1170 // Check if we found anything worth folding. 1171 if (!NextReg.isValid()) 1172 return Reg; 1173 Reg = NextReg; 1174 } 1175 1176 return Reg; 1177 } 1178 1179 MachineInstr *AArch64InstructionSelector::emitTestBit( 1180 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, 1181 MachineIRBuilder &MIB) const { 1182 assert(TestReg.isValid()); 1183 assert(ProduceNonFlagSettingCondBr && 1184 "Cannot emit TB(N)Z with speculation tracking!"); 1185 MachineRegisterInfo &MRI = *MIB.getMRI(); 1186 1187 // Attempt to optimize the test bit by walking over instructions. 1188 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI); 1189 LLT Ty = MRI.getType(TestReg); 1190 unsigned Size = Ty.getSizeInBits(); 1191 assert(!Ty.isVector() && "Expected a scalar!"); 1192 assert(Bit < 64 && "Bit is too large!"); 1193 1194 // When the test register is a 64-bit register, we have to narrow to make 1195 // TBNZW work. 1196 bool UseWReg = Bit < 32; 1197 unsigned NecessarySize = UseWReg ? 32 : 64; 1198 if (Size < NecessarySize) 1199 TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB); 1200 else if (Size > NecessarySize) 1201 TestReg = narrowExtendRegIfNeeded(TestReg, MIB); 1202 1203 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, 1204 {AArch64::TBZW, AArch64::TBNZW}}; 1205 unsigned Opc = OpcTable[UseWReg][IsNegative]; 1206 auto TestBitMI = 1207 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); 1208 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); 1209 return &*TestBitMI; 1210 } 1211 1212 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( 1213 MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred, 1214 MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const { 1215 // Given something like this: 1216 // 1217 // %x = ...Something... 1218 // %one = G_CONSTANT i64 1 1219 // %zero = G_CONSTANT i64 0 1220 // %and = G_AND %x, %one 1221 // %cmp = G_ICMP intpred(ne), %and, %zero 1222 // %cmp_trunc = G_TRUNC %cmp 1223 // G_BRCOND %cmp_trunc, %bb.3 1224 // 1225 // We want to try and fold the AND into the G_BRCOND and produce either a 1226 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). 1227 // 1228 // In this case, we'd get 1229 // 1230 // TBNZ %x %bb.3 1231 // 1232 if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND) 1233 return false; 1234 1235 // Need to be comparing against 0 to fold. 1236 if (CmpConstant != 0) 1237 return false; 1238 1239 MachineRegisterInfo &MRI = *MIB.getMRI(); 1240 1241 // Only support EQ and NE. If we have LT, then it *is* possible to fold, but 1242 // we don't want to do this. When we have an AND and LT, we need a TST/ANDS, 1243 // so folding would be redundant. 1244 if (Pred != CmpInst::Predicate::ICMP_EQ && 1245 Pred != CmpInst::Predicate::ICMP_NE) 1246 return false; 1247 1248 // Check if the AND has a constant on its RHS which we can use as a mask. 1249 // If it's a power of 2, then it's the same as checking a specific bit. 1250 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) 1251 auto MaybeBit = 1252 getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI); 1253 if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value)) 1254 return false; 1255 1256 uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value)); 1257 Register TestReg = AndInst->getOperand(1).getReg(); 1258 bool Invert = Pred == CmpInst::Predicate::ICMP_NE; 1259 1260 // Emit a TB(N)Z. 1261 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB); 1262 return true; 1263 } 1264 1265 bool AArch64InstructionSelector::selectCompareBranch( 1266 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1267 1268 const Register CondReg = I.getOperand(0).getReg(); 1269 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1270 MachineInstr *CCMI = MRI.getVRegDef(CondReg); 1271 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) 1272 CCMI = MRI.getVRegDef(CCMI->getOperand(1).getReg()); 1273 if (CCMI->getOpcode() != TargetOpcode::G_ICMP) 1274 return false; 1275 1276 Register LHS = CCMI->getOperand(2).getReg(); 1277 Register RHS = CCMI->getOperand(3).getReg(); 1278 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 1279 MachineIRBuilder MIB(I); 1280 CmpInst::Predicate Pred = 1281 (CmpInst::Predicate)CCMI->getOperand(1).getPredicate(); 1282 MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI); 1283 1284 // When we can emit a TB(N)Z, prefer that. 1285 // 1286 // Handle non-commutative condition codes first. 1287 // Note that we don't want to do this when we have a G_AND because it can 1288 // become a tst. The tst will make the test bit in the TB(N)Z redundant. 1289 if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) { 1290 int64_t C = VRegAndVal->Value; 1291 1292 // When we have a greater-than comparison, we can just test if the msb is 1293 // zero. 1294 if (C == -1 && Pred == CmpInst::ICMP_SGT) { 1295 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1296 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); 1297 I.eraseFromParent(); 1298 return true; 1299 } 1300 1301 // When we have a less than comparison, we can just test if the msb is not 1302 // zero. 1303 if (C == 0 && Pred == CmpInst::ICMP_SLT) { 1304 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1305 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB); 1306 I.eraseFromParent(); 1307 return true; 1308 } 1309 } 1310 1311 if (!VRegAndVal) { 1312 std::swap(RHS, LHS); 1313 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 1314 LHSMI = getDefIgnoringCopies(LHS, MRI); 1315 } 1316 1317 if (!VRegAndVal || VRegAndVal->Value != 0) { 1318 // If we can't select a CBZ then emit a cmp + Bcc. 1319 MachineInstr *Cmp; 1320 std::tie(Cmp, Pred) = emitIntegerCompare( 1321 CCMI->getOperand(2), CCMI->getOperand(3), CCMI->getOperand(1), MIB); 1322 if (!Cmp) 1323 return false; 1324 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred); 1325 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); 1326 I.eraseFromParent(); 1327 return true; 1328 } 1329 1330 // Try to emit a TB(N)Z for an eq or ne condition. 1331 if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB, 1332 MIB)) { 1333 I.eraseFromParent(); 1334 return true; 1335 } 1336 1337 const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI); 1338 if (RB.getID() != AArch64::GPRRegBankID) 1339 return false; 1340 if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ) 1341 return false; 1342 1343 const unsigned CmpWidth = MRI.getType(LHS).getSizeInBits(); 1344 unsigned CBOpc = 0; 1345 if (CmpWidth <= 32) 1346 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZW : AArch64::CBNZW); 1347 else if (CmpWidth == 64) 1348 CBOpc = (Pred == CmpInst::ICMP_EQ ? AArch64::CBZX : AArch64::CBNZX); 1349 else 1350 return false; 1351 1352 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(CBOpc)) 1353 .addUse(LHS) 1354 .addMBB(DestMBB) 1355 .constrainAllUses(TII, TRI, RBI); 1356 1357 I.eraseFromParent(); 1358 return true; 1359 } 1360 1361 /// Returns the element immediate value of a vector shift operand if found. 1362 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. 1363 static Optional<int64_t> getVectorShiftImm(Register Reg, 1364 MachineRegisterInfo &MRI) { 1365 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"); 1366 MachineInstr *OpMI = MRI.getVRegDef(Reg); 1367 assert(OpMI && "Expected to find a vreg def for vector shift operand"); 1368 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR) 1369 return None; 1370 1371 // Check all operands are identical immediates. 1372 int64_t ImmVal = 0; 1373 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) { 1374 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI); 1375 if (!VRegAndVal) 1376 return None; 1377 1378 if (Idx == 1) 1379 ImmVal = VRegAndVal->Value; 1380 if (ImmVal != VRegAndVal->Value) 1381 return None; 1382 } 1383 1384 return ImmVal; 1385 } 1386 1387 /// Matches and returns the shift immediate value for a SHL instruction given 1388 /// a shift operand. 1389 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { 1390 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); 1391 if (!ShiftImm) 1392 return None; 1393 // Check the immediate is in range for a SHL. 1394 int64_t Imm = *ShiftImm; 1395 if (Imm < 0) 1396 return None; 1397 switch (SrcTy.getElementType().getSizeInBits()) { 1398 default: 1399 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift"); 1400 return None; 1401 case 8: 1402 if (Imm > 7) 1403 return None; 1404 break; 1405 case 16: 1406 if (Imm > 15) 1407 return None; 1408 break; 1409 case 32: 1410 if (Imm > 31) 1411 return None; 1412 break; 1413 case 64: 1414 if (Imm > 63) 1415 return None; 1416 break; 1417 } 1418 return Imm; 1419 } 1420 1421 bool AArch64InstructionSelector::selectVectorSHL( 1422 MachineInstr &I, MachineRegisterInfo &MRI) const { 1423 assert(I.getOpcode() == TargetOpcode::G_SHL); 1424 Register DstReg = I.getOperand(0).getReg(); 1425 const LLT Ty = MRI.getType(DstReg); 1426 Register Src1Reg = I.getOperand(1).getReg(); 1427 Register Src2Reg = I.getOperand(2).getReg(); 1428 1429 if (!Ty.isVector()) 1430 return false; 1431 1432 // Check if we have a vector of constants on RHS that we can select as the 1433 // immediate form. 1434 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); 1435 1436 unsigned Opc = 0; 1437 if (Ty == LLT::vector(2, 64)) { 1438 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; 1439 } else if (Ty == LLT::vector(4, 32)) { 1440 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; 1441 } else if (Ty == LLT::vector(2, 32)) { 1442 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; 1443 } else { 1444 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); 1445 return false; 1446 } 1447 1448 MachineIRBuilder MIB(I); 1449 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); 1450 if (ImmVal) 1451 Shl.addImm(*ImmVal); 1452 else 1453 Shl.addUse(Src2Reg); 1454 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); 1455 I.eraseFromParent(); 1456 return true; 1457 } 1458 1459 bool AArch64InstructionSelector::selectVectorASHR( 1460 MachineInstr &I, MachineRegisterInfo &MRI) const { 1461 assert(I.getOpcode() == TargetOpcode::G_ASHR); 1462 Register DstReg = I.getOperand(0).getReg(); 1463 const LLT Ty = MRI.getType(DstReg); 1464 Register Src1Reg = I.getOperand(1).getReg(); 1465 Register Src2Reg = I.getOperand(2).getReg(); 1466 1467 if (!Ty.isVector()) 1468 return false; 1469 1470 // There is not a shift right register instruction, but the shift left 1471 // register instruction takes a signed value, where negative numbers specify a 1472 // right shift. 1473 1474 unsigned Opc = 0; 1475 unsigned NegOpc = 0; 1476 const TargetRegisterClass *RC = nullptr; 1477 if (Ty == LLT::vector(2, 64)) { 1478 Opc = AArch64::SSHLv2i64; 1479 NegOpc = AArch64::NEGv2i64; 1480 RC = &AArch64::FPR128RegClass; 1481 } else if (Ty == LLT::vector(4, 32)) { 1482 Opc = AArch64::SSHLv4i32; 1483 NegOpc = AArch64::NEGv4i32; 1484 RC = &AArch64::FPR128RegClass; 1485 } else if (Ty == LLT::vector(2, 32)) { 1486 Opc = AArch64::SSHLv2i32; 1487 NegOpc = AArch64::NEGv2i32; 1488 RC = &AArch64::FPR64RegClass; 1489 } else { 1490 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); 1491 return false; 1492 } 1493 1494 MachineIRBuilder MIB(I); 1495 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); 1496 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); 1497 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); 1498 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); 1499 I.eraseFromParent(); 1500 return true; 1501 } 1502 1503 bool AArch64InstructionSelector::selectVaStartAAPCS( 1504 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1505 return false; 1506 } 1507 1508 bool AArch64InstructionSelector::selectVaStartDarwin( 1509 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1510 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 1511 Register ListReg = I.getOperand(0).getReg(); 1512 1513 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1514 1515 auto MIB = 1516 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) 1517 .addDef(ArgsAddrReg) 1518 .addFrameIndex(FuncInfo->getVarArgsStackIndex()) 1519 .addImm(0) 1520 .addImm(0); 1521 1522 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1523 1524 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) 1525 .addUse(ArgsAddrReg) 1526 .addUse(ListReg) 1527 .addImm(0) 1528 .addMemOperand(*I.memoperands_begin()); 1529 1530 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1531 I.eraseFromParent(); 1532 return true; 1533 } 1534 1535 void AArch64InstructionSelector::materializeLargeCMVal( 1536 MachineInstr &I, const Value *V, unsigned OpFlags) const { 1537 MachineBasicBlock &MBB = *I.getParent(); 1538 MachineFunction &MF = *MBB.getParent(); 1539 MachineRegisterInfo &MRI = MF.getRegInfo(); 1540 MachineIRBuilder MIB(I); 1541 1542 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); 1543 MovZ->addOperand(MF, I.getOperand(1)); 1544 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | 1545 AArch64II::MO_NC); 1546 MovZ->addOperand(MF, MachineOperand::CreateImm(0)); 1547 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); 1548 1549 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, 1550 Register ForceDstReg) { 1551 Register DstReg = ForceDstReg 1552 ? ForceDstReg 1553 : MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1554 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); 1555 if (auto *GV = dyn_cast<GlobalValue>(V)) { 1556 MovI->addOperand(MF, MachineOperand::CreateGA( 1557 GV, MovZ->getOperand(1).getOffset(), Flags)); 1558 } else { 1559 MovI->addOperand( 1560 MF, MachineOperand::CreateBA(cast<BlockAddress>(V), 1561 MovZ->getOperand(1).getOffset(), Flags)); 1562 } 1563 MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); 1564 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); 1565 return DstReg; 1566 }; 1567 Register DstReg = BuildMovK(MovZ.getReg(0), 1568 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); 1569 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); 1570 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); 1571 return; 1572 } 1573 1574 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { 1575 MachineBasicBlock &MBB = *I.getParent(); 1576 MachineFunction &MF = *MBB.getParent(); 1577 MachineRegisterInfo &MRI = MF.getRegInfo(); 1578 1579 switch (I.getOpcode()) { 1580 case TargetOpcode::G_SHL: 1581 case TargetOpcode::G_ASHR: 1582 case TargetOpcode::G_LSHR: { 1583 // These shifts are legalized to have 64 bit shift amounts because we want 1584 // to take advantage of the existing imported selection patterns that assume 1585 // the immediates are s64s. However, if the shifted type is 32 bits and for 1586 // some reason we receive input GMIR that has an s64 shift amount that's not 1587 // a G_CONSTANT, insert a truncate so that we can still select the s32 1588 // register-register variant. 1589 Register SrcReg = I.getOperand(1).getReg(); 1590 Register ShiftReg = I.getOperand(2).getReg(); 1591 const LLT ShiftTy = MRI.getType(ShiftReg); 1592 const LLT SrcTy = MRI.getType(SrcReg); 1593 if (SrcTy.isVector()) 1594 return false; 1595 assert(!ShiftTy.isVector() && "unexpected vector shift ty"); 1596 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64) 1597 return false; 1598 auto *AmtMI = MRI.getVRegDef(ShiftReg); 1599 assert(AmtMI && "could not find a vreg definition for shift amount"); 1600 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) { 1601 // Insert a subregister copy to implement a 64->32 trunc 1602 MachineIRBuilder MIB(I); 1603 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) 1604 .addReg(ShiftReg, 0, AArch64::sub_32); 1605 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 1606 I.getOperand(2).setReg(Trunc.getReg(0)); 1607 } 1608 return true; 1609 } 1610 case TargetOpcode::G_STORE: 1611 return contractCrossBankCopyIntoStore(I, MRI); 1612 case TargetOpcode::G_PTR_ADD: 1613 return convertPtrAddToAdd(I, MRI); 1614 case TargetOpcode::G_LOAD: { 1615 // For scalar loads of pointers, we try to convert the dest type from p0 1616 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD 1617 // conversion, this should be ok because all users should have been 1618 // selected already, so the type doesn't matter for them. 1619 Register DstReg = I.getOperand(0).getReg(); 1620 const LLT DstTy = MRI.getType(DstReg); 1621 if (!DstTy.isPointer()) 1622 return false; 1623 MRI.setType(DstReg, LLT::scalar(64)); 1624 return true; 1625 } 1626 default: 1627 return false; 1628 } 1629 } 1630 1631 /// This lowering tries to look for G_PTR_ADD instructions and then converts 1632 /// them to a standard G_ADD with a COPY on the source. 1633 /// 1634 /// The motivation behind this is to expose the add semantics to the imported 1635 /// tablegen patterns. We shouldn't need to check for uses being loads/stores, 1636 /// because the selector works bottom up, uses before defs. By the time we 1637 /// end up trying to select a G_PTR_ADD, we should have already attempted to 1638 /// fold this into addressing modes and were therefore unsuccessful. 1639 bool AArch64InstructionSelector::convertPtrAddToAdd( 1640 MachineInstr &I, MachineRegisterInfo &MRI) { 1641 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 1642 Register DstReg = I.getOperand(0).getReg(); 1643 Register AddOp1Reg = I.getOperand(1).getReg(); 1644 const LLT PtrTy = MRI.getType(DstReg); 1645 if (PtrTy.getAddressSpace() != 0) 1646 return false; 1647 1648 MachineIRBuilder MIB(I); 1649 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64); 1650 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg); 1651 // Set regbanks on the registers. 1652 if (PtrTy.isVector()) 1653 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); 1654 else 1655 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 1656 1657 // Now turn the %dst(p0) = G_PTR_ADD %base, off into: 1658 // %dst(intty) = G_ADD %intbase, off 1659 I.setDesc(TII.get(TargetOpcode::G_ADD)); 1660 MRI.setType(DstReg, CastPtrTy); 1661 I.getOperand(1).setReg(PtrToInt.getReg(0)); 1662 if (!select(*PtrToInt)) { 1663 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"); 1664 return false; 1665 } 1666 return true; 1667 } 1668 1669 bool AArch64InstructionSelector::earlySelectSHL( 1670 MachineInstr &I, MachineRegisterInfo &MRI) const { 1671 // We try to match the immediate variant of LSL, which is actually an alias 1672 // for a special case of UBFM. Otherwise, we fall back to the imported 1673 // selector which will match the register variant. 1674 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"); 1675 const auto &MO = I.getOperand(2); 1676 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI); 1677 if (!VRegAndVal) 1678 return false; 1679 1680 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1681 if (DstTy.isVector()) 1682 return false; 1683 bool Is64Bit = DstTy.getSizeInBits() == 64; 1684 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); 1685 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); 1686 MachineIRBuilder MIB(I); 1687 1688 if (!Imm1Fn || !Imm2Fn) 1689 return false; 1690 1691 auto NewI = 1692 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, 1693 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); 1694 1695 for (auto &RenderFn : *Imm1Fn) 1696 RenderFn(NewI); 1697 for (auto &RenderFn : *Imm2Fn) 1698 RenderFn(NewI); 1699 1700 I.eraseFromParent(); 1701 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 1702 } 1703 1704 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( 1705 MachineInstr &I, MachineRegisterInfo &MRI) { 1706 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); 1707 // If we're storing a scalar, it doesn't matter what register bank that 1708 // scalar is on. All that matters is the size. 1709 // 1710 // So, if we see something like this (with a 32-bit scalar as an example): 1711 // 1712 // %x:gpr(s32) = ... something ... 1713 // %y:fpr(s32) = COPY %x:gpr(s32) 1714 // G_STORE %y:fpr(s32) 1715 // 1716 // We can fix this up into something like this: 1717 // 1718 // G_STORE %x:gpr(s32) 1719 // 1720 // And then continue the selection process normally. 1721 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); 1722 if (!DefDstReg.isValid()) 1723 return false; 1724 LLT DefDstTy = MRI.getType(DefDstReg); 1725 Register StoreSrcReg = I.getOperand(0).getReg(); 1726 LLT StoreSrcTy = MRI.getType(StoreSrcReg); 1727 1728 // If we get something strange like a physical register, then we shouldn't 1729 // go any further. 1730 if (!DefDstTy.isValid()) 1731 return false; 1732 1733 // Are the source and dst types the same size? 1734 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) 1735 return false; 1736 1737 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == 1738 RBI.getRegBank(DefDstReg, MRI, TRI)) 1739 return false; 1740 1741 // We have a cross-bank copy, which is entering a store. Let's fold it. 1742 I.getOperand(0).setReg(DefDstReg); 1743 return true; 1744 } 1745 1746 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { 1747 assert(I.getParent() && "Instruction should be in a basic block!"); 1748 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 1749 1750 MachineBasicBlock &MBB = *I.getParent(); 1751 MachineFunction &MF = *MBB.getParent(); 1752 MachineRegisterInfo &MRI = MF.getRegInfo(); 1753 1754 switch (I.getOpcode()) { 1755 case TargetOpcode::G_SHL: 1756 return earlySelectSHL(I, MRI); 1757 case TargetOpcode::G_CONSTANT: { 1758 bool IsZero = false; 1759 if (I.getOperand(1).isCImm()) 1760 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; 1761 else if (I.getOperand(1).isImm()) 1762 IsZero = I.getOperand(1).getImm() == 0; 1763 1764 if (!IsZero) 1765 return false; 1766 1767 Register DefReg = I.getOperand(0).getReg(); 1768 LLT Ty = MRI.getType(DefReg); 1769 if (Ty.getSizeInBits() == 64) { 1770 I.getOperand(1).ChangeToRegister(AArch64::XZR, false); 1771 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); 1772 } else if (Ty.getSizeInBits() == 32) { 1773 I.getOperand(1).ChangeToRegister(AArch64::WZR, false); 1774 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); 1775 } else 1776 return false; 1777 1778 I.setDesc(TII.get(TargetOpcode::COPY)); 1779 return true; 1780 } 1781 default: 1782 return false; 1783 } 1784 } 1785 1786 bool AArch64InstructionSelector::select(MachineInstr &I) { 1787 assert(I.getParent() && "Instruction should be in a basic block!"); 1788 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 1789 1790 MachineBasicBlock &MBB = *I.getParent(); 1791 MachineFunction &MF = *MBB.getParent(); 1792 MachineRegisterInfo &MRI = MF.getRegInfo(); 1793 1794 const AArch64Subtarget *Subtarget = 1795 &static_cast<const AArch64Subtarget &>(MF.getSubtarget()); 1796 if (Subtarget->requiresStrictAlign()) { 1797 // We don't support this feature yet. 1798 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n"); 1799 return false; 1800 } 1801 1802 unsigned Opcode = I.getOpcode(); 1803 // G_PHI requires same handling as PHI 1804 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { 1805 // Certain non-generic instructions also need some special handling. 1806 1807 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 1808 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1809 1810 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { 1811 const Register DefReg = I.getOperand(0).getReg(); 1812 const LLT DefTy = MRI.getType(DefReg); 1813 1814 const RegClassOrRegBank &RegClassOrBank = 1815 MRI.getRegClassOrRegBank(DefReg); 1816 1817 const TargetRegisterClass *DefRC 1818 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 1819 if (!DefRC) { 1820 if (!DefTy.isValid()) { 1821 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 1822 return false; 1823 } 1824 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 1825 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); 1826 if (!DefRC) { 1827 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 1828 return false; 1829 } 1830 } 1831 1832 I.setDesc(TII.get(TargetOpcode::PHI)); 1833 1834 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 1835 } 1836 1837 if (I.isCopy()) 1838 return selectCopy(I, TII, MRI, TRI, RBI); 1839 1840 return true; 1841 } 1842 1843 1844 if (I.getNumOperands() != I.getNumExplicitOperands()) { 1845 LLVM_DEBUG( 1846 dbgs() << "Generic instruction has unexpected implicit operands\n"); 1847 return false; 1848 } 1849 1850 // Try to do some lowering before we start instruction selecting. These 1851 // lowerings are purely transformations on the input G_MIR and so selection 1852 // must continue after any modification of the instruction. 1853 if (preISelLower(I)) { 1854 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. 1855 } 1856 1857 // There may be patterns where the importer can't deal with them optimally, 1858 // but does select it to a suboptimal sequence so our custom C++ selection 1859 // code later never has a chance to work on it. Therefore, we have an early 1860 // selection attempt here to give priority to certain selection routines 1861 // over the imported ones. 1862 if (earlySelect(I)) 1863 return true; 1864 1865 if (selectImpl(I, *CoverageInfo)) 1866 return true; 1867 1868 LLT Ty = 1869 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; 1870 1871 MachineIRBuilder MIB(I); 1872 1873 switch (Opcode) { 1874 case TargetOpcode::G_BRCOND: { 1875 if (Ty.getSizeInBits() > 32) { 1876 // We shouldn't need this on AArch64, but it would be implemented as an 1877 // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the 1878 // bit being tested is < 32. 1879 LLVM_DEBUG(dbgs() << "G_BRCOND has type: " << Ty 1880 << ", expected at most 32-bits"); 1881 return false; 1882 } 1883 1884 const Register CondReg = I.getOperand(0).getReg(); 1885 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1886 1887 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1888 // instructions will not be produced, as they are conditional branch 1889 // instructions that do not set flags. 1890 if (ProduceNonFlagSettingCondBr && selectCompareBranch(I, MF, MRI)) 1891 return true; 1892 1893 if (ProduceNonFlagSettingCondBr) { 1894 auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW)) 1895 .addUse(CondReg) 1896 .addImm(/*bit offset=*/0) 1897 .addMBB(DestMBB); 1898 1899 I.eraseFromParent(); 1900 return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI); 1901 } else { 1902 auto CMP = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) 1903 .addDef(AArch64::WZR) 1904 .addUse(CondReg) 1905 .addImm(1); 1906 constrainSelectedInstRegOperands(*CMP.getInstr(), TII, TRI, RBI); 1907 auto Bcc = 1908 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::Bcc)) 1909 .addImm(AArch64CC::EQ) 1910 .addMBB(DestMBB); 1911 1912 I.eraseFromParent(); 1913 return constrainSelectedInstRegOperands(*Bcc.getInstr(), TII, TRI, RBI); 1914 } 1915 } 1916 1917 case TargetOpcode::G_BRINDIRECT: { 1918 I.setDesc(TII.get(AArch64::BR)); 1919 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1920 } 1921 1922 case TargetOpcode::G_BRJT: 1923 return selectBrJT(I, MRI); 1924 1925 case AArch64::G_ADD_LOW: { 1926 // This op may have been separated from it's ADRP companion by the localizer 1927 // or some other code motion pass. Given that many CPUs will try to 1928 // macro fuse these operations anyway, select this into a MOVaddr pseudo 1929 // which will later be expanded into an ADRP+ADD pair after scheduling. 1930 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg()); 1931 if (BaseMI->getOpcode() != AArch64::ADRP) { 1932 I.setDesc(TII.get(AArch64::ADDXri)); 1933 I.addOperand(MachineOperand::CreateImm(0)); 1934 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1935 } 1936 assert(TM.getCodeModel() == CodeModel::Small && 1937 "Expected small code model"); 1938 MachineIRBuilder MIB(I); 1939 auto Op1 = BaseMI->getOperand(1); 1940 auto Op2 = I.getOperand(2); 1941 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) 1942 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), 1943 Op1.getTargetFlags()) 1944 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), 1945 Op2.getTargetFlags()); 1946 I.eraseFromParent(); 1947 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); 1948 } 1949 1950 case TargetOpcode::G_BSWAP: { 1951 // Handle vector types for G_BSWAP directly. 1952 Register DstReg = I.getOperand(0).getReg(); 1953 LLT DstTy = MRI.getType(DstReg); 1954 1955 // We should only get vector types here; everything else is handled by the 1956 // importer right now. 1957 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { 1958 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n"); 1959 return false; 1960 } 1961 1962 // Only handle 4 and 2 element vectors for now. 1963 // TODO: 16-bit elements. 1964 unsigned NumElts = DstTy.getNumElements(); 1965 if (NumElts != 4 && NumElts != 2) { 1966 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n"); 1967 return false; 1968 } 1969 1970 // Choose the correct opcode for the supported types. Right now, that's 1971 // v2s32, v4s32, and v2s64. 1972 unsigned Opc = 0; 1973 unsigned EltSize = DstTy.getElementType().getSizeInBits(); 1974 if (EltSize == 32) 1975 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 1976 : AArch64::REV32v16i8; 1977 else if (EltSize == 64) 1978 Opc = AArch64::REV64v16i8; 1979 1980 // We should always get something by the time we get here... 1981 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?"); 1982 1983 I.setDesc(TII.get(Opc)); 1984 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1985 } 1986 1987 case TargetOpcode::G_FCONSTANT: 1988 case TargetOpcode::G_CONSTANT: { 1989 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; 1990 1991 const LLT s8 = LLT::scalar(8); 1992 const LLT s16 = LLT::scalar(16); 1993 const LLT s32 = LLT::scalar(32); 1994 const LLT s64 = LLT::scalar(64); 1995 const LLT p0 = LLT::pointer(0, 64); 1996 1997 const Register DefReg = I.getOperand(0).getReg(); 1998 const LLT DefTy = MRI.getType(DefReg); 1999 const unsigned DefSize = DefTy.getSizeInBits(); 2000 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2001 2002 // FIXME: Redundant check, but even less readable when factored out. 2003 if (isFP) { 2004 if (Ty != s32 && Ty != s64) { 2005 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2006 << " constant, expected: " << s32 << " or " << s64 2007 << '\n'); 2008 return false; 2009 } 2010 2011 if (RB.getID() != AArch64::FPRRegBankID) { 2012 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2013 << " constant on bank: " << RB 2014 << ", expected: FPR\n"); 2015 return false; 2016 } 2017 2018 // The case when we have 0.0 is covered by tablegen. Reject it here so we 2019 // can be sure tablegen works correctly and isn't rescued by this code. 2020 if (I.getOperand(1).getFPImm()->getValueAPF().isExactlyValue(0.0)) 2021 return false; 2022 } else { 2023 // s32 and s64 are covered by tablegen. 2024 if (Ty != p0 && Ty != s8 && Ty != s16) { 2025 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2026 << " constant, expected: " << s32 << ", " << s64 2027 << ", or " << p0 << '\n'); 2028 return false; 2029 } 2030 2031 if (RB.getID() != AArch64::GPRRegBankID) { 2032 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2033 << " constant on bank: " << RB 2034 << ", expected: GPR\n"); 2035 return false; 2036 } 2037 } 2038 2039 // We allow G_CONSTANT of types < 32b. 2040 const unsigned MovOpc = 2041 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; 2042 2043 if (isFP) { 2044 // Either emit a FMOV, or emit a copy to emit a normal mov. 2045 const TargetRegisterClass &GPRRC = 2046 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass; 2047 const TargetRegisterClass &FPRRC = 2048 DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass; 2049 2050 // Can we use a FMOV instruction to represent the immediate? 2051 if (emitFMovForFConstant(I, MRI)) 2052 return true; 2053 2054 // For 64b values, emit a constant pool load instead. 2055 if (DefSize == 64) { 2056 auto *FPImm = I.getOperand(1).getFPImm(); 2057 MachineIRBuilder MIB(I); 2058 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB); 2059 if (!LoadMI) { 2060 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n"); 2061 return false; 2062 } 2063 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()}); 2064 I.eraseFromParent(); 2065 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI); 2066 } 2067 2068 // Nope. Emit a copy and use a normal mov instead. 2069 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC); 2070 MachineOperand &RegOp = I.getOperand(0); 2071 RegOp.setReg(DefGPRReg); 2072 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2073 MIB.buildCopy({DefReg}, {DefGPRReg}); 2074 2075 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { 2076 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n"); 2077 return false; 2078 } 2079 2080 MachineOperand &ImmOp = I.getOperand(1); 2081 // FIXME: Is going through int64_t always correct? 2082 ImmOp.ChangeToImmediate( 2083 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); 2084 } else if (I.getOperand(1).isCImm()) { 2085 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); 2086 I.getOperand(1).ChangeToImmediate(Val); 2087 } else if (I.getOperand(1).isImm()) { 2088 uint64_t Val = I.getOperand(1).getImm(); 2089 I.getOperand(1).ChangeToImmediate(Val); 2090 } 2091 2092 I.setDesc(TII.get(MovOpc)); 2093 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2094 return true; 2095 } 2096 case TargetOpcode::G_EXTRACT: { 2097 Register DstReg = I.getOperand(0).getReg(); 2098 Register SrcReg = I.getOperand(1).getReg(); 2099 LLT SrcTy = MRI.getType(SrcReg); 2100 LLT DstTy = MRI.getType(DstReg); 2101 (void)DstTy; 2102 unsigned SrcSize = SrcTy.getSizeInBits(); 2103 2104 if (SrcTy.getSizeInBits() > 64) { 2105 // This should be an extract of an s128, which is like a vector extract. 2106 if (SrcTy.getSizeInBits() != 128) 2107 return false; 2108 // Only support extracting 64 bits from an s128 at the moment. 2109 if (DstTy.getSizeInBits() != 64) 2110 return false; 2111 2112 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2113 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2114 // Check we have the right regbank always. 2115 assert(SrcRB.getID() == AArch64::FPRRegBankID && 2116 DstRB.getID() == AArch64::FPRRegBankID && 2117 "Wrong extract regbank!"); 2118 (void)SrcRB; 2119 2120 // Emit the same code as a vector extract. 2121 // Offset must be a multiple of 64. 2122 unsigned Offset = I.getOperand(2).getImm(); 2123 if (Offset % 64 != 0) 2124 return false; 2125 unsigned LaneIdx = Offset / 64; 2126 MachineIRBuilder MIB(I); 2127 MachineInstr *Extract = emitExtractVectorElt( 2128 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); 2129 if (!Extract) 2130 return false; 2131 I.eraseFromParent(); 2132 return true; 2133 } 2134 2135 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); 2136 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + 2137 Ty.getSizeInBits() - 1); 2138 2139 if (SrcSize < 64) { 2140 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && 2141 "unexpected G_EXTRACT types"); 2142 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2143 } 2144 2145 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2146 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2147 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 2148 .addReg(DstReg, 0, AArch64::sub_32); 2149 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 2150 AArch64::GPR32RegClass, MRI); 2151 I.getOperand(0).setReg(DstReg); 2152 2153 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2154 } 2155 2156 case TargetOpcode::G_INSERT: { 2157 LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); 2158 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2159 unsigned DstSize = DstTy.getSizeInBits(); 2160 // Larger inserts are vectors, same-size ones should be something else by 2161 // now (split up or turned into COPYs). 2162 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) 2163 return false; 2164 2165 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); 2166 unsigned LSB = I.getOperand(3).getImm(); 2167 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 2168 I.getOperand(3).setImm((DstSize - LSB) % DstSize); 2169 MachineInstrBuilder(MF, I).addImm(Width - 1); 2170 2171 if (DstSize < 64) { 2172 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && 2173 "unexpected G_INSERT types"); 2174 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2175 } 2176 2177 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2178 BuildMI(MBB, I.getIterator(), I.getDebugLoc(), 2179 TII.get(AArch64::SUBREG_TO_REG)) 2180 .addDef(SrcReg) 2181 .addImm(0) 2182 .addUse(I.getOperand(2).getReg()) 2183 .addImm(AArch64::sub_32); 2184 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 2185 AArch64::GPR32RegClass, MRI); 2186 I.getOperand(2).setReg(SrcReg); 2187 2188 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2189 } 2190 case TargetOpcode::G_FRAME_INDEX: { 2191 // allocas and G_FRAME_INDEX are only supported in addrspace(0). 2192 if (Ty != LLT::pointer(0, 64)) { 2193 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty 2194 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2195 return false; 2196 } 2197 I.setDesc(TII.get(AArch64::ADDXri)); 2198 2199 // MOs for a #0 shifted immediate. 2200 I.addOperand(MachineOperand::CreateImm(0)); 2201 I.addOperand(MachineOperand::CreateImm(0)); 2202 2203 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2204 } 2205 2206 case TargetOpcode::G_GLOBAL_VALUE: { 2207 auto GV = I.getOperand(1).getGlobal(); 2208 if (GV->isThreadLocal()) 2209 return selectTLSGlobalValue(I, MRI); 2210 2211 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); 2212 if (OpFlags & AArch64II::MO_GOT) { 2213 I.setDesc(TII.get(AArch64::LOADgot)); 2214 I.getOperand(1).setTargetFlags(OpFlags); 2215 } else if (TM.getCodeModel() == CodeModel::Large) { 2216 // Materialize the global using movz/movk instructions. 2217 materializeLargeCMVal(I, GV, OpFlags); 2218 I.eraseFromParent(); 2219 return true; 2220 } else if (TM.getCodeModel() == CodeModel::Tiny) { 2221 I.setDesc(TII.get(AArch64::ADR)); 2222 I.getOperand(1).setTargetFlags(OpFlags); 2223 } else { 2224 I.setDesc(TII.get(AArch64::MOVaddr)); 2225 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); 2226 MachineInstrBuilder MIB(MF, I); 2227 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), 2228 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 2229 } 2230 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2231 } 2232 2233 case TargetOpcode::G_ZEXTLOAD: 2234 case TargetOpcode::G_LOAD: 2235 case TargetOpcode::G_STORE: { 2236 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; 2237 MachineIRBuilder MIB(I); 2238 2239 LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); 2240 2241 if (PtrTy != LLT::pointer(0, 64)) { 2242 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy 2243 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2244 return false; 2245 } 2246 2247 auto &MemOp = **I.memoperands_begin(); 2248 if (MemOp.isAtomic()) { 2249 // For now we just support s8 acquire loads to be able to compile stack 2250 // protector code. 2251 if (MemOp.getOrdering() == AtomicOrdering::Acquire && 2252 MemOp.getSize() == 1) { 2253 I.setDesc(TII.get(AArch64::LDARB)); 2254 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2255 } 2256 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n"); 2257 return false; 2258 } 2259 unsigned MemSizeInBits = MemOp.getSize() * 8; 2260 2261 const Register PtrReg = I.getOperand(1).getReg(); 2262 #ifndef NDEBUG 2263 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); 2264 // Sanity-check the pointer register. 2265 assert(PtrRB.getID() == AArch64::GPRRegBankID && 2266 "Load/Store pointer operand isn't a GPR"); 2267 assert(MRI.getType(PtrReg).isPointer() && 2268 "Load/Store pointer operand isn't a pointer"); 2269 #endif 2270 2271 const Register ValReg = I.getOperand(0).getReg(); 2272 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); 2273 2274 const unsigned NewOpc = 2275 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); 2276 if (NewOpc == I.getOpcode()) 2277 return false; 2278 2279 I.setDesc(TII.get(NewOpc)); 2280 2281 uint64_t Offset = 0; 2282 auto *PtrMI = MRI.getVRegDef(PtrReg); 2283 2284 // Try to fold a GEP into our unsigned immediate addressing mode. 2285 if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) { 2286 if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) { 2287 int64_t Imm = *COff; 2288 const unsigned Size = MemSizeInBits / 8; 2289 const unsigned Scale = Log2_32(Size); 2290 if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) { 2291 Register Ptr2Reg = PtrMI->getOperand(1).getReg(); 2292 I.getOperand(1).setReg(Ptr2Reg); 2293 PtrMI = MRI.getVRegDef(Ptr2Reg); 2294 Offset = Imm / Size; 2295 } 2296 } 2297 } 2298 2299 // If we haven't folded anything into our addressing mode yet, try to fold 2300 // a frame index into the base+offset. 2301 if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX) 2302 I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex()); 2303 2304 I.addOperand(MachineOperand::CreateImm(Offset)); 2305 2306 // If we're storing a 0, use WZR/XZR. 2307 if (auto CVal = getConstantVRegVal(ValReg, MRI)) { 2308 if (*CVal == 0 && Opcode == TargetOpcode::G_STORE) { 2309 if (I.getOpcode() == AArch64::STRWui) 2310 I.getOperand(0).setReg(AArch64::WZR); 2311 else if (I.getOpcode() == AArch64::STRXui) 2312 I.getOperand(0).setReg(AArch64::XZR); 2313 } 2314 } 2315 2316 if (IsZExtLoad) { 2317 // The zextload from a smaller type to i32 should be handled by the importer. 2318 if (MRI.getType(ValReg).getSizeInBits() != 64) 2319 return false; 2320 // If we have a ZEXTLOAD then change the load's type to be a narrower reg 2321 //and zero_extend with SUBREG_TO_REG. 2322 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2323 Register DstReg = I.getOperand(0).getReg(); 2324 I.getOperand(0).setReg(LdReg); 2325 2326 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2327 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) 2328 .addImm(0) 2329 .addUse(LdReg) 2330 .addImm(AArch64::sub_32); 2331 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2332 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, 2333 MRI); 2334 } 2335 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2336 } 2337 2338 case TargetOpcode::G_SMULH: 2339 case TargetOpcode::G_UMULH: { 2340 // Reject the various things we don't support yet. 2341 if (unsupportedBinOp(I, RBI, MRI, TRI)) 2342 return false; 2343 2344 const Register DefReg = I.getOperand(0).getReg(); 2345 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2346 2347 if (RB.getID() != AArch64::GPRRegBankID) { 2348 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"); 2349 return false; 2350 } 2351 2352 if (Ty != LLT::scalar(64)) { 2353 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty 2354 << ", expected: " << LLT::scalar(64) << '\n'); 2355 return false; 2356 } 2357 2358 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr 2359 : AArch64::UMULHrr; 2360 I.setDesc(TII.get(NewOpc)); 2361 2362 // Now that we selected an opcode, we need to constrain the register 2363 // operands to use appropriate classes. 2364 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2365 } 2366 case TargetOpcode::G_FADD: 2367 case TargetOpcode::G_FSUB: 2368 case TargetOpcode::G_FMUL: 2369 case TargetOpcode::G_FDIV: 2370 2371 case TargetOpcode::G_ASHR: 2372 if (MRI.getType(I.getOperand(0).getReg()).isVector()) 2373 return selectVectorASHR(I, MRI); 2374 LLVM_FALLTHROUGH; 2375 case TargetOpcode::G_SHL: 2376 if (Opcode == TargetOpcode::G_SHL && 2377 MRI.getType(I.getOperand(0).getReg()).isVector()) 2378 return selectVectorSHL(I, MRI); 2379 LLVM_FALLTHROUGH; 2380 case TargetOpcode::G_OR: 2381 case TargetOpcode::G_LSHR: { 2382 // Reject the various things we don't support yet. 2383 if (unsupportedBinOp(I, RBI, MRI, TRI)) 2384 return false; 2385 2386 const unsigned OpSize = Ty.getSizeInBits(); 2387 2388 const Register DefReg = I.getOperand(0).getReg(); 2389 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2390 2391 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); 2392 if (NewOpc == I.getOpcode()) 2393 return false; 2394 2395 I.setDesc(TII.get(NewOpc)); 2396 // FIXME: Should the type be always reset in setDesc? 2397 2398 // Now that we selected an opcode, we need to constrain the register 2399 // operands to use appropriate classes. 2400 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2401 } 2402 2403 case TargetOpcode::G_PTR_ADD: { 2404 MachineIRBuilder MIRBuilder(I); 2405 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), 2406 MIRBuilder); 2407 I.eraseFromParent(); 2408 return true; 2409 } 2410 case TargetOpcode::G_UADDO: { 2411 // TODO: Support other types. 2412 unsigned OpSize = Ty.getSizeInBits(); 2413 if (OpSize != 32 && OpSize != 64) { 2414 LLVM_DEBUG( 2415 dbgs() 2416 << "G_UADDO currently only supported for 32 and 64 b types.\n"); 2417 return false; 2418 } 2419 2420 // TODO: Support vectors. 2421 if (Ty.isVector()) { 2422 LLVM_DEBUG(dbgs() << "G_UADDO currently only supported for scalars.\n"); 2423 return false; 2424 } 2425 2426 // Add and set the set condition flag. 2427 unsigned AddsOpc = OpSize == 32 ? AArch64::ADDSWrr : AArch64::ADDSXrr; 2428 MachineIRBuilder MIRBuilder(I); 2429 auto AddsMI = MIRBuilder.buildInstr(AddsOpc, {I.getOperand(0)}, 2430 {I.getOperand(2), I.getOperand(3)}); 2431 constrainSelectedInstRegOperands(*AddsMI, TII, TRI, RBI); 2432 2433 // Now, put the overflow result in the register given by the first operand 2434 // to the G_UADDO. CSINC increments the result when the predicate is false, 2435 // so to get the increment when it's true, we need to use the inverse. In 2436 // this case, we want to increment when carry is set. 2437 auto CsetMI = MIRBuilder 2438 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, 2439 {Register(AArch64::WZR), Register(AArch64::WZR)}) 2440 .addImm(getInvertedCondCode(AArch64CC::HS)); 2441 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); 2442 I.eraseFromParent(); 2443 return true; 2444 } 2445 2446 case TargetOpcode::G_PTRMASK: { 2447 Register MaskReg = I.getOperand(2).getReg(); 2448 Optional<int64_t> MaskVal = getConstantVRegVal(MaskReg, MRI); 2449 // TODO: Implement arbitrary cases 2450 if (!MaskVal || !isShiftedMask_64(*MaskVal)) 2451 return false; 2452 2453 uint64_t Mask = *MaskVal; 2454 I.setDesc(TII.get(AArch64::ANDXri)); 2455 I.getOperand(2).ChangeToImmediate( 2456 AArch64_AM::encodeLogicalImmediate(Mask, 64)); 2457 2458 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2459 } 2460 case TargetOpcode::G_PTRTOINT: 2461 case TargetOpcode::G_TRUNC: { 2462 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2463 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 2464 2465 const Register DstReg = I.getOperand(0).getReg(); 2466 const Register SrcReg = I.getOperand(1).getReg(); 2467 2468 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2469 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2470 2471 if (DstRB.getID() != SrcRB.getID()) { 2472 LLVM_DEBUG( 2473 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); 2474 return false; 2475 } 2476 2477 if (DstRB.getID() == AArch64::GPRRegBankID) { 2478 const TargetRegisterClass *DstRC = 2479 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 2480 if (!DstRC) 2481 return false; 2482 2483 const TargetRegisterClass *SrcRC = 2484 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); 2485 if (!SrcRC) 2486 return false; 2487 2488 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 2489 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 2490 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); 2491 return false; 2492 } 2493 2494 if (DstRC == SrcRC) { 2495 // Nothing to be done 2496 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && 2497 SrcTy == LLT::scalar(64)) { 2498 llvm_unreachable("TableGen can import this case"); 2499 return false; 2500 } else if (DstRC == &AArch64::GPR32RegClass && 2501 SrcRC == &AArch64::GPR64RegClass) { 2502 I.getOperand(1).setSubReg(AArch64::sub_32); 2503 } else { 2504 LLVM_DEBUG( 2505 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); 2506 return false; 2507 } 2508 2509 I.setDesc(TII.get(TargetOpcode::COPY)); 2510 return true; 2511 } else if (DstRB.getID() == AArch64::FPRRegBankID) { 2512 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) { 2513 I.setDesc(TII.get(AArch64::XTNv4i16)); 2514 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2515 return true; 2516 } 2517 2518 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { 2519 MachineIRBuilder MIB(I); 2520 MachineInstr *Extract = emitExtractVectorElt( 2521 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); 2522 if (!Extract) 2523 return false; 2524 I.eraseFromParent(); 2525 return true; 2526 } 2527 2528 // We might have a vector G_PTRTOINT, in which case just emit a COPY. 2529 if (Opcode == TargetOpcode::G_PTRTOINT) { 2530 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"); 2531 I.setDesc(TII.get(TargetOpcode::COPY)); 2532 return true; 2533 } 2534 } 2535 2536 return false; 2537 } 2538 2539 case TargetOpcode::G_ANYEXT: { 2540 const Register DstReg = I.getOperand(0).getReg(); 2541 const Register SrcReg = I.getOperand(1).getReg(); 2542 2543 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); 2544 if (RBDst.getID() != AArch64::GPRRegBankID) { 2545 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst 2546 << ", expected: GPR\n"); 2547 return false; 2548 } 2549 2550 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); 2551 if (RBSrc.getID() != AArch64::GPRRegBankID) { 2552 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc 2553 << ", expected: GPR\n"); 2554 return false; 2555 } 2556 2557 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); 2558 2559 if (DstSize == 0) { 2560 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"); 2561 return false; 2562 } 2563 2564 if (DstSize != 64 && DstSize > 32) { 2565 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize 2566 << ", expected: 32 or 64\n"); 2567 return false; 2568 } 2569 // At this point G_ANYEXT is just like a plain COPY, but we need 2570 // to explicitly form the 64-bit value if any. 2571 if (DstSize > 32) { 2572 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); 2573 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) 2574 .addDef(ExtSrc) 2575 .addImm(0) 2576 .addUse(SrcReg) 2577 .addImm(AArch64::sub_32); 2578 I.getOperand(1).setReg(ExtSrc); 2579 } 2580 return selectCopy(I, TII, MRI, TRI, RBI); 2581 } 2582 2583 case TargetOpcode::G_ZEXT: 2584 case TargetOpcode::G_SEXT_INREG: 2585 case TargetOpcode::G_SEXT: { 2586 unsigned Opcode = I.getOpcode(); 2587 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; 2588 const Register DefReg = I.getOperand(0).getReg(); 2589 Register SrcReg = I.getOperand(1).getReg(); 2590 const LLT DstTy = MRI.getType(DefReg); 2591 const LLT SrcTy = MRI.getType(SrcReg); 2592 unsigned DstSize = DstTy.getSizeInBits(); 2593 unsigned SrcSize = SrcTy.getSizeInBits(); 2594 2595 // SEXT_INREG has the same src reg size as dst, the size of the value to be 2596 // extended is encoded in the imm. 2597 if (Opcode == TargetOpcode::G_SEXT_INREG) 2598 SrcSize = I.getOperand(2).getImm(); 2599 2600 if (DstTy.isVector()) 2601 return false; // Should be handled by imported patterns. 2602 2603 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == 2604 AArch64::GPRRegBankID && 2605 "Unexpected ext regbank"); 2606 2607 MachineIRBuilder MIB(I); 2608 MachineInstr *ExtI; 2609 2610 // First check if we're extending the result of a load which has a dest type 2611 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest 2612 // GPR register on AArch64 and all loads which are smaller automatically 2613 // zero-extend the upper bits. E.g. 2614 // %v(s8) = G_LOAD %p, :: (load 1) 2615 // %v2(s32) = G_ZEXT %v(s8) 2616 if (!IsSigned) { 2617 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 2618 bool IsGPR = 2619 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; 2620 if (LoadMI && IsGPR) { 2621 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); 2622 unsigned BytesLoaded = MemOp->getSize(); 2623 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) 2624 return selectCopy(I, TII, MRI, TRI, RBI); 2625 } 2626 2627 // If we are zero extending from 32 bits to 64 bits, it's possible that 2628 // the instruction implicitly does the zero extend for us. In that case, 2629 // we can just emit a SUBREG_TO_REG. 2630 if (IsGPR && SrcSize == 32 && DstSize == 64) { 2631 // Unlike with the G_LOAD case, we don't want to look through copies 2632 // here. 2633 MachineInstr *Def = MRI.getVRegDef(SrcReg); 2634 if (Def && isDef32(*Def)) { 2635 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) 2636 .addImm(0) 2637 .addUse(SrcReg) 2638 .addImm(AArch64::sub_32); 2639 2640 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, 2641 MRI)) { 2642 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n"); 2643 return false; 2644 } 2645 2646 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 2647 MRI)) { 2648 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n"); 2649 return false; 2650 } 2651 2652 I.eraseFromParent(); 2653 return true; 2654 } 2655 } 2656 } 2657 2658 if (DstSize == 64) { 2659 if (Opcode != TargetOpcode::G_SEXT_INREG) { 2660 // FIXME: Can we avoid manually doing this? 2661 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 2662 MRI)) { 2663 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) 2664 << " operand\n"); 2665 return false; 2666 } 2667 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, 2668 {&AArch64::GPR64RegClass}, {}) 2669 .addImm(0) 2670 .addUse(SrcReg) 2671 .addImm(AArch64::sub_32) 2672 .getReg(0); 2673 } 2674 2675 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, 2676 {DefReg}, {SrcReg}) 2677 .addImm(0) 2678 .addImm(SrcSize - 1); 2679 } else if (DstSize <= 32) { 2680 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, 2681 {DefReg}, {SrcReg}) 2682 .addImm(0) 2683 .addImm(SrcSize - 1); 2684 } else { 2685 return false; 2686 } 2687 2688 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 2689 I.eraseFromParent(); 2690 return true; 2691 } 2692 2693 case TargetOpcode::G_SITOFP: 2694 case TargetOpcode::G_UITOFP: 2695 case TargetOpcode::G_FPTOSI: 2696 case TargetOpcode::G_FPTOUI: { 2697 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), 2698 SrcTy = MRI.getType(I.getOperand(1).getReg()); 2699 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); 2700 if (NewOpc == Opcode) 2701 return false; 2702 2703 I.setDesc(TII.get(NewOpc)); 2704 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2705 2706 return true; 2707 } 2708 2709 case TargetOpcode::G_FREEZE: 2710 return selectCopy(I, TII, MRI, TRI, RBI); 2711 2712 case TargetOpcode::G_INTTOPTR: 2713 // The importer is currently unable to import pointer types since they 2714 // didn't exist in SelectionDAG. 2715 return selectCopy(I, TII, MRI, TRI, RBI); 2716 2717 case TargetOpcode::G_BITCAST: 2718 // Imported SelectionDAG rules can handle every bitcast except those that 2719 // bitcast from a type to the same type. Ideally, these shouldn't occur 2720 // but we might not run an optimizer that deletes them. The other exception 2721 // is bitcasts involving pointer types, as SelectionDAG has no knowledge 2722 // of them. 2723 return selectCopy(I, TII, MRI, TRI, RBI); 2724 2725 case TargetOpcode::G_SELECT: { 2726 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { 2727 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty 2728 << ", expected: " << LLT::scalar(1) << '\n'); 2729 return false; 2730 } 2731 2732 const Register CondReg = I.getOperand(1).getReg(); 2733 const Register TReg = I.getOperand(2).getReg(); 2734 const Register FReg = I.getOperand(3).getReg(); 2735 2736 if (tryOptSelect(I)) 2737 return true; 2738 2739 Register CSelOpc = selectSelectOpc(I, MRI, RBI); 2740 MachineInstr &TstMI = 2741 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri)) 2742 .addDef(AArch64::WZR) 2743 .addUse(CondReg) 2744 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 2745 2746 MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc)) 2747 .addDef(I.getOperand(0).getReg()) 2748 .addUse(TReg) 2749 .addUse(FReg) 2750 .addImm(AArch64CC::NE); 2751 2752 constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI); 2753 constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI); 2754 2755 I.eraseFromParent(); 2756 return true; 2757 } 2758 case TargetOpcode::G_ICMP: { 2759 if (Ty.isVector()) 2760 return selectVectorICmp(I, MRI); 2761 2762 if (Ty != LLT::scalar(32)) { 2763 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty 2764 << ", expected: " << LLT::scalar(32) << '\n'); 2765 return false; 2766 } 2767 2768 MachineIRBuilder MIRBuilder(I); 2769 MachineInstr *Cmp; 2770 CmpInst::Predicate Pred; 2771 std::tie(Cmp, Pred) = emitIntegerCompare(I.getOperand(2), I.getOperand(3), 2772 I.getOperand(1), MIRBuilder); 2773 if (!Cmp) 2774 return false; 2775 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder); 2776 I.eraseFromParent(); 2777 return true; 2778 } 2779 2780 case TargetOpcode::G_FCMP: { 2781 if (Ty != LLT::scalar(32)) { 2782 LLVM_DEBUG(dbgs() << "G_FCMP result has type: " << Ty 2783 << ", expected: " << LLT::scalar(32) << '\n'); 2784 return false; 2785 } 2786 2787 unsigned CmpOpc = selectFCMPOpc(I, MRI); 2788 if (!CmpOpc) 2789 return false; 2790 2791 // FIXME: regbank 2792 2793 AArch64CC::CondCode CC1, CC2; 2794 changeFCMPPredToAArch64CC( 2795 (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2); 2796 2797 // Partially build the compare. Decide if we need to add a use for the 2798 // third operand based off whether or not we're comparing against 0.0. 2799 auto CmpMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc)) 2800 .addUse(I.getOperand(2).getReg()); 2801 2802 // If we don't have an immediate compare, then we need to add a use of the 2803 // register which wasn't used for the immediate. 2804 // Note that the immediate will always be the last operand. 2805 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) 2806 CmpMI = CmpMI.addUse(I.getOperand(3).getReg()); 2807 2808 const Register DefReg = I.getOperand(0).getReg(); 2809 Register Def1Reg = DefReg; 2810 if (CC2 != AArch64CC::AL) 2811 Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2812 2813 MachineInstr &CSetMI = 2814 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) 2815 .addDef(Def1Reg) 2816 .addUse(AArch64::WZR) 2817 .addUse(AArch64::WZR) 2818 .addImm(getInvertedCondCode(CC1)); 2819 2820 if (CC2 != AArch64CC::AL) { 2821 Register Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2822 MachineInstr &CSet2MI = 2823 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr)) 2824 .addDef(Def2Reg) 2825 .addUse(AArch64::WZR) 2826 .addUse(AArch64::WZR) 2827 .addImm(getInvertedCondCode(CC2)); 2828 MachineInstr &OrMI = 2829 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr)) 2830 .addDef(DefReg) 2831 .addUse(Def1Reg) 2832 .addUse(Def2Reg); 2833 constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI); 2834 constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI); 2835 } 2836 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 2837 constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI); 2838 2839 I.eraseFromParent(); 2840 return true; 2841 } 2842 case TargetOpcode::G_VASTART: 2843 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) 2844 : selectVaStartAAPCS(I, MF, MRI); 2845 case TargetOpcode::G_INTRINSIC: 2846 return selectIntrinsic(I, MRI); 2847 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 2848 return selectIntrinsicWithSideEffects(I, MRI); 2849 case TargetOpcode::G_IMPLICIT_DEF: { 2850 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 2851 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2852 const Register DstReg = I.getOperand(0).getReg(); 2853 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2854 const TargetRegisterClass *DstRC = 2855 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 2856 RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 2857 return true; 2858 } 2859 case TargetOpcode::G_BLOCK_ADDR: { 2860 if (TM.getCodeModel() == CodeModel::Large) { 2861 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); 2862 I.eraseFromParent(); 2863 return true; 2864 } else { 2865 I.setDesc(TII.get(AArch64::MOVaddrBA)); 2866 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), 2867 I.getOperand(0).getReg()) 2868 .addBlockAddress(I.getOperand(1).getBlockAddress(), 2869 /* Offset */ 0, AArch64II::MO_PAGE) 2870 .addBlockAddress( 2871 I.getOperand(1).getBlockAddress(), /* Offset */ 0, 2872 AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 2873 I.eraseFromParent(); 2874 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 2875 } 2876 } 2877 case TargetOpcode::G_INTRINSIC_TRUNC: 2878 return selectIntrinsicTrunc(I, MRI); 2879 case TargetOpcode::G_INTRINSIC_ROUND: 2880 return selectIntrinsicRound(I, MRI); 2881 case TargetOpcode::G_BUILD_VECTOR: 2882 return selectBuildVector(I, MRI); 2883 case TargetOpcode::G_MERGE_VALUES: 2884 return selectMergeValues(I, MRI); 2885 case TargetOpcode::G_UNMERGE_VALUES: 2886 return selectUnmergeValues(I, MRI); 2887 case TargetOpcode::G_SHUFFLE_VECTOR: 2888 return selectShuffleVector(I, MRI); 2889 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 2890 return selectExtractElt(I, MRI); 2891 case TargetOpcode::G_INSERT_VECTOR_ELT: 2892 return selectInsertElt(I, MRI); 2893 case TargetOpcode::G_CONCAT_VECTORS: 2894 return selectConcatVectors(I, MRI); 2895 case TargetOpcode::G_JUMP_TABLE: 2896 return selectJumpTable(I, MRI); 2897 } 2898 2899 return false; 2900 } 2901 2902 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, 2903 MachineRegisterInfo &MRI) const { 2904 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); 2905 Register JTAddr = I.getOperand(0).getReg(); 2906 unsigned JTI = I.getOperand(1).getIndex(); 2907 Register Index = I.getOperand(2).getReg(); 2908 MachineIRBuilder MIB(I); 2909 2910 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 2911 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 2912 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, 2913 {TargetReg, ScratchReg}, {JTAddr, Index}) 2914 .addJumpTableIndex(JTI); 2915 // Build the indirect branch. 2916 MIB.buildInstr(AArch64::BR, {}, {TargetReg}); 2917 I.eraseFromParent(); 2918 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); 2919 } 2920 2921 bool AArch64InstructionSelector::selectJumpTable( 2922 MachineInstr &I, MachineRegisterInfo &MRI) const { 2923 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"); 2924 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!"); 2925 2926 Register DstReg = I.getOperand(0).getReg(); 2927 unsigned JTI = I.getOperand(1).getIndex(); 2928 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. 2929 MachineIRBuilder MIB(I); 2930 auto MovMI = 2931 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) 2932 .addJumpTableIndex(JTI, AArch64II::MO_PAGE) 2933 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 2934 I.eraseFromParent(); 2935 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 2936 } 2937 2938 bool AArch64InstructionSelector::selectTLSGlobalValue( 2939 MachineInstr &I, MachineRegisterInfo &MRI) const { 2940 if (!STI.isTargetMachO()) 2941 return false; 2942 MachineFunction &MF = *I.getParent()->getParent(); 2943 MF.getFrameInfo().setAdjustsStack(true); 2944 2945 const GlobalValue &GV = *I.getOperand(1).getGlobal(); 2946 MachineIRBuilder MIB(I); 2947 2948 MIB.buildInstr(AArch64::LOADgot, {AArch64::X0}, {}) 2949 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); 2950 2951 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, 2952 {Register(AArch64::X0)}) 2953 .addImm(0); 2954 2955 // TLS calls preserve all registers except those that absolutely must be 2956 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be 2957 // silly). 2958 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) 2959 .addDef(AArch64::X0, RegState::Implicit) 2960 .addRegMask(TRI.getTLSCallPreservedMask()); 2961 2962 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); 2963 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, 2964 MRI); 2965 I.eraseFromParent(); 2966 return true; 2967 } 2968 2969 bool AArch64InstructionSelector::selectIntrinsicTrunc( 2970 MachineInstr &I, MachineRegisterInfo &MRI) const { 2971 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 2972 2973 // Select the correct opcode. 2974 unsigned Opc = 0; 2975 if (!SrcTy.isVector()) { 2976 switch (SrcTy.getSizeInBits()) { 2977 default: 2978 case 16: 2979 Opc = AArch64::FRINTZHr; 2980 break; 2981 case 32: 2982 Opc = AArch64::FRINTZSr; 2983 break; 2984 case 64: 2985 Opc = AArch64::FRINTZDr; 2986 break; 2987 } 2988 } else { 2989 unsigned NumElts = SrcTy.getNumElements(); 2990 switch (SrcTy.getElementType().getSizeInBits()) { 2991 default: 2992 break; 2993 case 16: 2994 if (NumElts == 4) 2995 Opc = AArch64::FRINTZv4f16; 2996 else if (NumElts == 8) 2997 Opc = AArch64::FRINTZv8f16; 2998 break; 2999 case 32: 3000 if (NumElts == 2) 3001 Opc = AArch64::FRINTZv2f32; 3002 else if (NumElts == 4) 3003 Opc = AArch64::FRINTZv4f32; 3004 break; 3005 case 64: 3006 if (NumElts == 2) 3007 Opc = AArch64::FRINTZv2f64; 3008 break; 3009 } 3010 } 3011 3012 if (!Opc) { 3013 // Didn't get an opcode above, bail. 3014 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"); 3015 return false; 3016 } 3017 3018 // Legalization would have set us up perfectly for this; we just need to 3019 // set the opcode and move on. 3020 I.setDesc(TII.get(Opc)); 3021 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3022 } 3023 3024 bool AArch64InstructionSelector::selectIntrinsicRound( 3025 MachineInstr &I, MachineRegisterInfo &MRI) const { 3026 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 3027 3028 // Select the correct opcode. 3029 unsigned Opc = 0; 3030 if (!SrcTy.isVector()) { 3031 switch (SrcTy.getSizeInBits()) { 3032 default: 3033 case 16: 3034 Opc = AArch64::FRINTAHr; 3035 break; 3036 case 32: 3037 Opc = AArch64::FRINTASr; 3038 break; 3039 case 64: 3040 Opc = AArch64::FRINTADr; 3041 break; 3042 } 3043 } else { 3044 unsigned NumElts = SrcTy.getNumElements(); 3045 switch (SrcTy.getElementType().getSizeInBits()) { 3046 default: 3047 break; 3048 case 16: 3049 if (NumElts == 4) 3050 Opc = AArch64::FRINTAv4f16; 3051 else if (NumElts == 8) 3052 Opc = AArch64::FRINTAv8f16; 3053 break; 3054 case 32: 3055 if (NumElts == 2) 3056 Opc = AArch64::FRINTAv2f32; 3057 else if (NumElts == 4) 3058 Opc = AArch64::FRINTAv4f32; 3059 break; 3060 case 64: 3061 if (NumElts == 2) 3062 Opc = AArch64::FRINTAv2f64; 3063 break; 3064 } 3065 } 3066 3067 if (!Opc) { 3068 // Didn't get an opcode above, bail. 3069 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"); 3070 return false; 3071 } 3072 3073 // Legalization would have set us up perfectly for this; we just need to 3074 // set the opcode and move on. 3075 I.setDesc(TII.get(Opc)); 3076 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3077 } 3078 3079 bool AArch64InstructionSelector::selectVectorICmp( 3080 MachineInstr &I, MachineRegisterInfo &MRI) const { 3081 Register DstReg = I.getOperand(0).getReg(); 3082 LLT DstTy = MRI.getType(DstReg); 3083 Register SrcReg = I.getOperand(2).getReg(); 3084 Register Src2Reg = I.getOperand(3).getReg(); 3085 LLT SrcTy = MRI.getType(SrcReg); 3086 3087 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); 3088 unsigned NumElts = DstTy.getNumElements(); 3089 3090 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b 3091 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 3092 // Third index is cc opcode: 3093 // 0 == eq 3094 // 1 == ugt 3095 // 2 == uge 3096 // 3 == ult 3097 // 4 == ule 3098 // 5 == sgt 3099 // 6 == sge 3100 // 7 == slt 3101 // 8 == sle 3102 // ne is done by negating 'eq' result. 3103 3104 // This table below assumes that for some comparisons the operands will be 3105 // commuted. 3106 // ult op == commute + ugt op 3107 // ule op == commute + uge op 3108 // slt op == commute + sgt op 3109 // sle op == commute + sge op 3110 unsigned PredIdx = 0; 3111 bool SwapOperands = false; 3112 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 3113 switch (Pred) { 3114 case CmpInst::ICMP_NE: 3115 case CmpInst::ICMP_EQ: 3116 PredIdx = 0; 3117 break; 3118 case CmpInst::ICMP_UGT: 3119 PredIdx = 1; 3120 break; 3121 case CmpInst::ICMP_UGE: 3122 PredIdx = 2; 3123 break; 3124 case CmpInst::ICMP_ULT: 3125 PredIdx = 3; 3126 SwapOperands = true; 3127 break; 3128 case CmpInst::ICMP_ULE: 3129 PredIdx = 4; 3130 SwapOperands = true; 3131 break; 3132 case CmpInst::ICMP_SGT: 3133 PredIdx = 5; 3134 break; 3135 case CmpInst::ICMP_SGE: 3136 PredIdx = 6; 3137 break; 3138 case CmpInst::ICMP_SLT: 3139 PredIdx = 7; 3140 SwapOperands = true; 3141 break; 3142 case CmpInst::ICMP_SLE: 3143 PredIdx = 8; 3144 SwapOperands = true; 3145 break; 3146 default: 3147 llvm_unreachable("Unhandled icmp predicate"); 3148 return false; 3149 } 3150 3151 // This table obviously should be tablegen'd when we have our GISel native 3152 // tablegen selector. 3153 3154 static const unsigned OpcTable[4][4][9] = { 3155 { 3156 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3157 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3158 0 /* invalid */}, 3159 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3160 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3161 0 /* invalid */}, 3162 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, 3163 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, 3164 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, 3165 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, 3166 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, 3167 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} 3168 }, 3169 { 3170 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3171 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3172 0 /* invalid */}, 3173 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, 3174 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, 3175 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, 3176 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, 3177 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, 3178 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, 3179 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3180 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3181 0 /* invalid */} 3182 }, 3183 { 3184 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, 3185 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, 3186 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, 3187 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, 3188 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, 3189 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, 3190 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3191 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3192 0 /* invalid */}, 3193 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3194 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3195 0 /* invalid */} 3196 }, 3197 { 3198 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, 3199 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, 3200 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, 3201 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3202 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3203 0 /* invalid */}, 3204 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3205 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3206 0 /* invalid */}, 3207 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3208 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3209 0 /* invalid */} 3210 }, 3211 }; 3212 unsigned EltIdx = Log2_32(SrcEltSize / 8); 3213 unsigned NumEltsIdx = Log2_32(NumElts / 2); 3214 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; 3215 if (!Opc) { 3216 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode"); 3217 return false; 3218 } 3219 3220 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); 3221 const TargetRegisterClass *SrcRC = 3222 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); 3223 if (!SrcRC) { 3224 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 3225 return false; 3226 } 3227 3228 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; 3229 if (SrcTy.getSizeInBits() == 128) 3230 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; 3231 3232 if (SwapOperands) 3233 std::swap(SrcReg, Src2Reg); 3234 3235 MachineIRBuilder MIB(I); 3236 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); 3237 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3238 3239 // Invert if we had a 'ne' cc. 3240 if (NotOpc) { 3241 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); 3242 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3243 } else { 3244 MIB.buildCopy(DstReg, Cmp.getReg(0)); 3245 } 3246 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); 3247 I.eraseFromParent(); 3248 return true; 3249 } 3250 3251 MachineInstr *AArch64InstructionSelector::emitScalarToVector( 3252 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, 3253 MachineIRBuilder &MIRBuilder) const { 3254 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); 3255 3256 auto BuildFn = [&](unsigned SubregIndex) { 3257 auto Ins = 3258 MIRBuilder 3259 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) 3260 .addImm(SubregIndex); 3261 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); 3262 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); 3263 return &*Ins; 3264 }; 3265 3266 switch (EltSize) { 3267 case 16: 3268 return BuildFn(AArch64::hsub); 3269 case 32: 3270 return BuildFn(AArch64::ssub); 3271 case 64: 3272 return BuildFn(AArch64::dsub); 3273 default: 3274 return nullptr; 3275 } 3276 } 3277 3278 bool AArch64InstructionSelector::selectMergeValues( 3279 MachineInstr &I, MachineRegisterInfo &MRI) const { 3280 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"); 3281 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3282 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 3283 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); 3284 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 3285 3286 if (I.getNumOperands() != 3) 3287 return false; 3288 3289 // Merging 2 s64s into an s128. 3290 if (DstTy == LLT::scalar(128)) { 3291 if (SrcTy.getSizeInBits() != 64) 3292 return false; 3293 MachineIRBuilder MIB(I); 3294 Register DstReg = I.getOperand(0).getReg(); 3295 Register Src1Reg = I.getOperand(1).getReg(); 3296 Register Src2Reg = I.getOperand(2).getReg(); 3297 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); 3298 MachineInstr *InsMI = 3299 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); 3300 if (!InsMI) 3301 return false; 3302 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), 3303 Src2Reg, /* LaneIdx */ 1, RB, MIB); 3304 if (!Ins2MI) 3305 return false; 3306 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 3307 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); 3308 I.eraseFromParent(); 3309 return true; 3310 } 3311 3312 if (RB.getID() != AArch64::GPRRegBankID) 3313 return false; 3314 3315 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) 3316 return false; 3317 3318 auto *DstRC = &AArch64::GPR64RegClass; 3319 Register SubToRegDef = MRI.createVirtualRegister(DstRC); 3320 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 3321 TII.get(TargetOpcode::SUBREG_TO_REG)) 3322 .addDef(SubToRegDef) 3323 .addImm(0) 3324 .addUse(I.getOperand(1).getReg()) 3325 .addImm(AArch64::sub_32); 3326 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); 3327 // Need to anyext the second scalar before we can use bfm 3328 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 3329 TII.get(TargetOpcode::SUBREG_TO_REG)) 3330 .addDef(SubToRegDef2) 3331 .addImm(0) 3332 .addUse(I.getOperand(2).getReg()) 3333 .addImm(AArch64::sub_32); 3334 MachineInstr &BFM = 3335 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) 3336 .addDef(I.getOperand(0).getReg()) 3337 .addUse(SubToRegDef) 3338 .addUse(SubToRegDef2) 3339 .addImm(32) 3340 .addImm(31); 3341 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); 3342 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); 3343 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); 3344 I.eraseFromParent(); 3345 return true; 3346 } 3347 3348 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, 3349 const unsigned EltSize) { 3350 // Choose a lane copy opcode and subregister based off of the size of the 3351 // vector's elements. 3352 switch (EltSize) { 3353 case 16: 3354 CopyOpc = AArch64::CPYi16; 3355 ExtractSubReg = AArch64::hsub; 3356 break; 3357 case 32: 3358 CopyOpc = AArch64::CPYi32; 3359 ExtractSubReg = AArch64::ssub; 3360 break; 3361 case 64: 3362 CopyOpc = AArch64::CPYi64; 3363 ExtractSubReg = AArch64::dsub; 3364 break; 3365 default: 3366 // Unknown size, bail out. 3367 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n"); 3368 return false; 3369 } 3370 return true; 3371 } 3372 3373 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( 3374 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, 3375 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { 3376 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 3377 unsigned CopyOpc = 0; 3378 unsigned ExtractSubReg = 0; 3379 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { 3380 LLVM_DEBUG( 3381 dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); 3382 return nullptr; 3383 } 3384 3385 const TargetRegisterClass *DstRC = 3386 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); 3387 if (!DstRC) { 3388 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); 3389 return nullptr; 3390 } 3391 3392 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); 3393 const LLT &VecTy = MRI.getType(VecReg); 3394 const TargetRegisterClass *VecRC = 3395 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); 3396 if (!VecRC) { 3397 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 3398 return nullptr; 3399 } 3400 3401 // The register that we're going to copy into. 3402 Register InsertReg = VecReg; 3403 if (!DstReg) 3404 DstReg = MRI.createVirtualRegister(DstRC); 3405 // If the lane index is 0, we just use a subregister COPY. 3406 if (LaneIdx == 0) { 3407 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) 3408 .addReg(VecReg, 0, ExtractSubReg); 3409 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 3410 return &*Copy; 3411 } 3412 3413 // Lane copies require 128-bit wide registers. If we're dealing with an 3414 // unpacked vector, then we need to move up to that width. Insert an implicit 3415 // def and a subregister insert to get us there. 3416 if (VecTy.getSizeInBits() != 128) { 3417 MachineInstr *ScalarToVector = emitScalarToVector( 3418 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); 3419 if (!ScalarToVector) 3420 return nullptr; 3421 InsertReg = ScalarToVector->getOperand(0).getReg(); 3422 } 3423 3424 MachineInstr *LaneCopyMI = 3425 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); 3426 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); 3427 3428 // Make sure that we actually constrain the initial copy. 3429 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 3430 return LaneCopyMI; 3431 } 3432 3433 bool AArch64InstructionSelector::selectExtractElt( 3434 MachineInstr &I, MachineRegisterInfo &MRI) const { 3435 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && 3436 "unexpected opcode!"); 3437 Register DstReg = I.getOperand(0).getReg(); 3438 const LLT NarrowTy = MRI.getType(DstReg); 3439 const Register SrcReg = I.getOperand(1).getReg(); 3440 const LLT WideTy = MRI.getType(SrcReg); 3441 (void)WideTy; 3442 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && 3443 "source register size too small!"); 3444 assert(NarrowTy.isScalar() && "cannot extract vector into vector!"); 3445 3446 // Need the lane index to determine the correct copy opcode. 3447 MachineOperand &LaneIdxOp = I.getOperand(2); 3448 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?"); 3449 3450 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 3451 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n"); 3452 return false; 3453 } 3454 3455 // Find the index to extract from. 3456 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); 3457 if (!VRegAndVal) 3458 return false; 3459 unsigned LaneIdx = VRegAndVal->Value; 3460 3461 MachineIRBuilder MIRBuilder(I); 3462 3463 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 3464 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, 3465 LaneIdx, MIRBuilder); 3466 if (!Extract) 3467 return false; 3468 3469 I.eraseFromParent(); 3470 return true; 3471 } 3472 3473 bool AArch64InstructionSelector::selectSplitVectorUnmerge( 3474 MachineInstr &I, MachineRegisterInfo &MRI) const { 3475 unsigned NumElts = I.getNumOperands() - 1; 3476 Register SrcReg = I.getOperand(NumElts).getReg(); 3477 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3478 const LLT SrcTy = MRI.getType(SrcReg); 3479 3480 assert(NarrowTy.isVector() && "Expected an unmerge into vectors"); 3481 if (SrcTy.getSizeInBits() > 128) { 3482 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge"); 3483 return false; 3484 } 3485 3486 MachineIRBuilder MIB(I); 3487 3488 // We implement a split vector operation by treating the sub-vectors as 3489 // scalars and extracting them. 3490 const RegisterBank &DstRB = 3491 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); 3492 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { 3493 Register Dst = I.getOperand(OpIdx).getReg(); 3494 MachineInstr *Extract = 3495 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); 3496 if (!Extract) 3497 return false; 3498 } 3499 I.eraseFromParent(); 3500 return true; 3501 } 3502 3503 bool AArch64InstructionSelector::selectUnmergeValues( 3504 MachineInstr &I, MachineRegisterInfo &MRI) const { 3505 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 3506 "unexpected opcode"); 3507 3508 // TODO: Handle unmerging into GPRs and from scalars to scalars. 3509 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 3510 AArch64::FPRRegBankID || 3511 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 3512 AArch64::FPRRegBankID) { 3513 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " 3514 "currently unsupported.\n"); 3515 return false; 3516 } 3517 3518 // The last operand is the vector source register, and every other operand is 3519 // a register to unpack into. 3520 unsigned NumElts = I.getNumOperands() - 1; 3521 Register SrcReg = I.getOperand(NumElts).getReg(); 3522 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3523 const LLT WideTy = MRI.getType(SrcReg); 3524 (void)WideTy; 3525 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && 3526 "can only unmerge from vector or s128 types!"); 3527 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && 3528 "source register size too small!"); 3529 3530 if (!NarrowTy.isScalar()) 3531 return selectSplitVectorUnmerge(I, MRI); 3532 3533 MachineIRBuilder MIB(I); 3534 3535 // Choose a lane copy opcode and subregister based off of the size of the 3536 // vector's elements. 3537 unsigned CopyOpc = 0; 3538 unsigned ExtractSubReg = 0; 3539 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) 3540 return false; 3541 3542 // Set up for the lane copies. 3543 MachineBasicBlock &MBB = *I.getParent(); 3544 3545 // Stores the registers we'll be copying from. 3546 SmallVector<Register, 4> InsertRegs; 3547 3548 // We'll use the first register twice, so we only need NumElts-1 registers. 3549 unsigned NumInsertRegs = NumElts - 1; 3550 3551 // If our elements fit into exactly 128 bits, then we can copy from the source 3552 // directly. Otherwise, we need to do a bit of setup with some subregister 3553 // inserts. 3554 if (NarrowTy.getSizeInBits() * NumElts == 128) { 3555 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); 3556 } else { 3557 // No. We have to perform subregister inserts. For each insert, create an 3558 // implicit def and a subregister insert, and save the register we create. 3559 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { 3560 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 3561 MachineInstr &ImpDefMI = 3562 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), 3563 ImpDefReg); 3564 3565 // Now, create the subregister insert from SrcReg. 3566 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 3567 MachineInstr &InsMI = 3568 *BuildMI(MBB, I, I.getDebugLoc(), 3569 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) 3570 .addUse(ImpDefReg) 3571 .addUse(SrcReg) 3572 .addImm(AArch64::dsub); 3573 3574 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); 3575 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); 3576 3577 // Save the register so that we can copy from it after. 3578 InsertRegs.push_back(InsertReg); 3579 } 3580 } 3581 3582 // Now that we've created any necessary subregister inserts, we can 3583 // create the copies. 3584 // 3585 // Perform the first copy separately as a subregister copy. 3586 Register CopyTo = I.getOperand(0).getReg(); 3587 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) 3588 .addReg(InsertRegs[0], 0, ExtractSubReg); 3589 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); 3590 3591 // Now, perform the remaining copies as vector lane copies. 3592 unsigned LaneIdx = 1; 3593 for (Register InsReg : InsertRegs) { 3594 Register CopyTo = I.getOperand(LaneIdx).getReg(); 3595 MachineInstr &CopyInst = 3596 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) 3597 .addUse(InsReg) 3598 .addImm(LaneIdx); 3599 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); 3600 ++LaneIdx; 3601 } 3602 3603 // Separately constrain the first copy's destination. Because of the 3604 // limitation in constrainOperandRegClass, we can't guarantee that this will 3605 // actually be constrained. So, do it ourselves using the second operand. 3606 const TargetRegisterClass *RC = 3607 MRI.getRegClassOrNull(I.getOperand(1).getReg()); 3608 if (!RC) { 3609 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n"); 3610 return false; 3611 } 3612 3613 RBI.constrainGenericRegister(CopyTo, *RC, MRI); 3614 I.eraseFromParent(); 3615 return true; 3616 } 3617 3618 bool AArch64InstructionSelector::selectConcatVectors( 3619 MachineInstr &I, MachineRegisterInfo &MRI) const { 3620 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 3621 "Unexpected opcode"); 3622 Register Dst = I.getOperand(0).getReg(); 3623 Register Op1 = I.getOperand(1).getReg(); 3624 Register Op2 = I.getOperand(2).getReg(); 3625 MachineIRBuilder MIRBuilder(I); 3626 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder); 3627 if (!ConcatMI) 3628 return false; 3629 I.eraseFromParent(); 3630 return true; 3631 } 3632 3633 unsigned 3634 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, 3635 MachineFunction &MF) const { 3636 Type *CPTy = CPVal->getType(); 3637 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy); 3638 3639 MachineConstantPool *MCP = MF.getConstantPool(); 3640 return MCP->getConstantPoolIndex(CPVal, Alignment); 3641 } 3642 3643 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( 3644 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { 3645 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF()); 3646 3647 auto Adrp = 3648 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) 3649 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 3650 3651 MachineInstr *LoadMI = nullptr; 3652 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) { 3653 case 16: 3654 LoadMI = 3655 &*MIRBuilder 3656 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp}) 3657 .addConstantPoolIndex(CPIdx, 0, 3658 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3659 break; 3660 case 8: 3661 LoadMI = &*MIRBuilder 3662 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) 3663 .addConstantPoolIndex( 3664 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3665 break; 3666 default: 3667 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " 3668 << *CPVal->getType()); 3669 return nullptr; 3670 } 3671 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); 3672 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); 3673 return LoadMI; 3674 } 3675 3676 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given 3677 /// size and RB. 3678 static std::pair<unsigned, unsigned> 3679 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { 3680 unsigned Opc, SubregIdx; 3681 if (RB.getID() == AArch64::GPRRegBankID) { 3682 if (EltSize == 32) { 3683 Opc = AArch64::INSvi32gpr; 3684 SubregIdx = AArch64::ssub; 3685 } else if (EltSize == 64) { 3686 Opc = AArch64::INSvi64gpr; 3687 SubregIdx = AArch64::dsub; 3688 } else { 3689 llvm_unreachable("invalid elt size!"); 3690 } 3691 } else { 3692 if (EltSize == 8) { 3693 Opc = AArch64::INSvi8lane; 3694 SubregIdx = AArch64::bsub; 3695 } else if (EltSize == 16) { 3696 Opc = AArch64::INSvi16lane; 3697 SubregIdx = AArch64::hsub; 3698 } else if (EltSize == 32) { 3699 Opc = AArch64::INSvi32lane; 3700 SubregIdx = AArch64::ssub; 3701 } else if (EltSize == 64) { 3702 Opc = AArch64::INSvi64lane; 3703 SubregIdx = AArch64::dsub; 3704 } else { 3705 llvm_unreachable("invalid elt size!"); 3706 } 3707 } 3708 return std::make_pair(Opc, SubregIdx); 3709 } 3710 3711 MachineInstr * 3712 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, 3713 MachineOperand &RHS, 3714 MachineIRBuilder &MIRBuilder) const { 3715 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3716 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3717 static const unsigned OpcTable[2][2]{{AArch64::ADDXrr, AArch64::ADDXri}, 3718 {AArch64::ADDWrr, AArch64::ADDWri}}; 3719 bool Is32Bit = MRI.getType(LHS.getReg()).getSizeInBits() == 32; 3720 auto ImmFns = selectArithImmed(RHS); 3721 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; 3722 auto AddMI = MIRBuilder.buildInstr(Opc, {DefReg}, {LHS}); 3723 3724 // If we matched a valid constant immediate, add those operands. 3725 if (ImmFns) { 3726 for (auto &RenderFn : *ImmFns) 3727 RenderFn(AddMI); 3728 } else { 3729 AddMI.addUse(RHS.getReg()); 3730 } 3731 3732 constrainSelectedInstRegOperands(*AddMI, TII, TRI, RBI); 3733 return &*AddMI; 3734 } 3735 3736 MachineInstr * 3737 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, 3738 MachineIRBuilder &MIRBuilder) const { 3739 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3740 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3741 static const unsigned OpcTable[2][2]{{AArch64::ADDSXrr, AArch64::ADDSXri}, 3742 {AArch64::ADDSWrr, AArch64::ADDSWri}}; 3743 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); 3744 auto ImmFns = selectArithImmed(RHS); 3745 unsigned Opc = OpcTable[Is32Bit][ImmFns.hasValue()]; 3746 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 3747 3748 auto CmpMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS}); 3749 3750 // If we matched a valid constant immediate, add those operands. 3751 if (ImmFns) { 3752 for (auto &RenderFn : *ImmFns) 3753 RenderFn(CmpMI); 3754 } else { 3755 CmpMI.addUse(RHS.getReg()); 3756 } 3757 3758 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 3759 return &*CmpMI; 3760 } 3761 3762 MachineInstr * 3763 AArch64InstructionSelector::emitTST(const Register &LHS, const Register &RHS, 3764 MachineIRBuilder &MIRBuilder) const { 3765 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3766 unsigned RegSize = MRI.getType(LHS).getSizeInBits(); 3767 bool Is32Bit = (RegSize == 32); 3768 static const unsigned OpcTable[2][2]{{AArch64::ANDSXrr, AArch64::ANDSXri}, 3769 {AArch64::ANDSWrr, AArch64::ANDSWri}}; 3770 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 3771 3772 // We might be able to fold in an immediate into the TST. We need to make sure 3773 // it's a logical immediate though, since ANDS requires that. 3774 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); 3775 bool IsImmForm = ValAndVReg.hasValue() && 3776 AArch64_AM::isLogicalImmediate(ValAndVReg->Value, RegSize); 3777 unsigned Opc = OpcTable[Is32Bit][IsImmForm]; 3778 auto TstMI = MIRBuilder.buildInstr(Opc, {ZReg}, {LHS}); 3779 3780 if (IsImmForm) 3781 TstMI.addImm( 3782 AArch64_AM::encodeLogicalImmediate(ValAndVReg->Value, RegSize)); 3783 else 3784 TstMI.addUse(RHS); 3785 3786 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 3787 return &*TstMI; 3788 } 3789 3790 std::pair<MachineInstr *, CmpInst::Predicate> 3791 AArch64InstructionSelector::emitIntegerCompare( 3792 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 3793 MachineIRBuilder &MIRBuilder) const { 3794 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 3795 assert(Predicate.isPredicate() && "Expected predicate?"); 3796 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3797 3798 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); 3799 3800 // Fold the compare if possible. 3801 MachineInstr *FoldCmp = 3802 tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder); 3803 if (FoldCmp) 3804 return {FoldCmp, P}; 3805 3806 // Can't fold into a CMN. Just emit a normal compare. 3807 unsigned CmpOpc = 0; 3808 Register ZReg; 3809 3810 LLT CmpTy = MRI.getType(LHS.getReg()); 3811 assert((CmpTy.isScalar() || CmpTy.isPointer()) && 3812 "Expected scalar or pointer"); 3813 if (CmpTy == LLT::scalar(32)) { 3814 CmpOpc = AArch64::SUBSWrr; 3815 ZReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 3816 } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) { 3817 CmpOpc = AArch64::SUBSXrr; 3818 ZReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3819 } else { 3820 return {nullptr, CmpInst::Predicate::BAD_ICMP_PREDICATE}; 3821 } 3822 3823 // Try to match immediate forms. 3824 MachineInstr *ImmedCmp = 3825 tryOptArithImmedIntegerCompare(LHS, RHS, P, MIRBuilder); 3826 if (ImmedCmp) 3827 return {ImmedCmp, P}; 3828 3829 // If we don't have an immediate, we may have a shift which can be folded 3830 // into the compare. 3831 MachineInstr *ShiftedCmp = tryOptArithShiftedCompare(LHS, RHS, MIRBuilder); 3832 if (ShiftedCmp) 3833 return {ShiftedCmp, P}; 3834 3835 auto CmpMI = 3836 MIRBuilder.buildInstr(CmpOpc, {ZReg}, {LHS.getReg(), RHS.getReg()}); 3837 // Make sure that we can constrain the compare that we emitted. 3838 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 3839 return {&*CmpMI, P}; 3840 } 3841 3842 MachineInstr *AArch64InstructionSelector::emitVectorConcat( 3843 Optional<Register> Dst, Register Op1, Register Op2, 3844 MachineIRBuilder &MIRBuilder) const { 3845 // We implement a vector concat by: 3846 // 1. Use scalar_to_vector to insert the lower vector into the larger dest 3847 // 2. Insert the upper vector into the destination's upper element 3848 // TODO: some of this code is common with G_BUILD_VECTOR handling. 3849 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 3850 3851 const LLT Op1Ty = MRI.getType(Op1); 3852 const LLT Op2Ty = MRI.getType(Op2); 3853 3854 if (Op1Ty != Op2Ty) { 3855 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys"); 3856 return nullptr; 3857 } 3858 assert(Op1Ty.isVector() && "Expected a vector for vector concat"); 3859 3860 if (Op1Ty.getSizeInBits() >= 128) { 3861 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors"); 3862 return nullptr; 3863 } 3864 3865 // At the moment we just support 64 bit vector concats. 3866 if (Op1Ty.getSizeInBits() != 64) { 3867 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors"); 3868 return nullptr; 3869 } 3870 3871 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); 3872 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); 3873 const TargetRegisterClass *DstRC = 3874 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); 3875 3876 MachineInstr *WidenedOp1 = 3877 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); 3878 MachineInstr *WidenedOp2 = 3879 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); 3880 if (!WidenedOp1 || !WidenedOp2) { 3881 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value"); 3882 return nullptr; 3883 } 3884 3885 // Now do the insert of the upper element. 3886 unsigned InsertOpc, InsSubRegIdx; 3887 std::tie(InsertOpc, InsSubRegIdx) = 3888 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); 3889 3890 if (!Dst) 3891 Dst = MRI.createVirtualRegister(DstRC); 3892 auto InsElt = 3893 MIRBuilder 3894 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) 3895 .addImm(1) /* Lane index */ 3896 .addUse(WidenedOp2->getOperand(0).getReg()) 3897 .addImm(0); 3898 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 3899 return &*InsElt; 3900 } 3901 3902 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant( 3903 MachineInstr &I, MachineRegisterInfo &MRI) const { 3904 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT && 3905 "Expected a G_FCONSTANT!"); 3906 MachineOperand &ImmOp = I.getOperand(1); 3907 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); 3908 3909 // Only handle 32 and 64 bit defs for now. 3910 if (DefSize != 32 && DefSize != 64) 3911 return nullptr; 3912 3913 // Don't handle null values using FMOV. 3914 if (ImmOp.getFPImm()->isNullValue()) 3915 return nullptr; 3916 3917 // Get the immediate representation for the FMOV. 3918 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF(); 3919 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF) 3920 : AArch64_AM::getFP64Imm(ImmValAPF); 3921 3922 // If this is -1, it means the immediate can't be represented as the requested 3923 // floating point value. Bail. 3924 if (Imm == -1) 3925 return nullptr; 3926 3927 // Update MI to represent the new FMOV instruction, constrain it, and return. 3928 ImmOp.ChangeToImmediate(Imm); 3929 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi; 3930 I.setDesc(TII.get(MovOpc)); 3931 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3932 return &I; 3933 } 3934 3935 MachineInstr * 3936 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, 3937 MachineIRBuilder &MIRBuilder) const { 3938 // CSINC increments the result when the predicate is false. Invert it. 3939 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( 3940 CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); 3941 auto I = 3942 MIRBuilder 3943 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)}) 3944 .addImm(InvCC); 3945 constrainSelectedInstRegOperands(*I, TII, TRI, RBI); 3946 return &*I; 3947 } 3948 3949 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { 3950 MachineIRBuilder MIB(I); 3951 MachineRegisterInfo &MRI = *MIB.getMRI(); 3952 const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo(); 3953 3954 // We want to recognize this pattern: 3955 // 3956 // $z = G_FCMP pred, $x, $y 3957 // ... 3958 // $w = G_SELECT $z, $a, $b 3959 // 3960 // Where the value of $z is *only* ever used by the G_SELECT (possibly with 3961 // some copies/truncs in between.) 3962 // 3963 // If we see this, then we can emit something like this: 3964 // 3965 // fcmp $x, $y 3966 // fcsel $w, $a, $b, pred 3967 // 3968 // Rather than emitting both of the rather long sequences in the standard 3969 // G_FCMP/G_SELECT select methods. 3970 3971 // First, check if the condition is defined by a compare. 3972 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); 3973 while (CondDef) { 3974 // We can only fold if all of the defs have one use. 3975 Register CondDefReg = CondDef->getOperand(0).getReg(); 3976 if (!MRI.hasOneNonDBGUse(CondDefReg)) { 3977 // Unless it's another select. 3978 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { 3979 if (CondDef == &UI) 3980 continue; 3981 if (UI.getOpcode() != TargetOpcode::G_SELECT) 3982 return false; 3983 } 3984 } 3985 3986 // We can skip over G_TRUNC since the condition is 1-bit. 3987 // Truncating/extending can have no impact on the value. 3988 unsigned Opc = CondDef->getOpcode(); 3989 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) 3990 break; 3991 3992 // Can't see past copies from physregs. 3993 if (Opc == TargetOpcode::COPY && 3994 Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) 3995 return false; 3996 3997 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); 3998 } 3999 4000 // Is the condition defined by a compare? 4001 if (!CondDef) 4002 return false; 4003 4004 unsigned CondOpc = CondDef->getOpcode(); 4005 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) 4006 return false; 4007 4008 AArch64CC::CondCode CondCode; 4009 if (CondOpc == TargetOpcode::G_ICMP) { 4010 MachineInstr *Cmp; 4011 CmpInst::Predicate Pred; 4012 4013 std::tie(Cmp, Pred) = 4014 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), 4015 CondDef->getOperand(1), MIB); 4016 4017 if (!Cmp) { 4018 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); 4019 return false; 4020 } 4021 4022 // Have to collect the CondCode after emitIntegerCompare, since it can 4023 // update the predicate. 4024 CondCode = changeICMPPredToAArch64CC(Pred); 4025 } else { 4026 // Get the condition code for the select. 4027 AArch64CC::CondCode CondCode2; 4028 changeFCMPPredToAArch64CC( 4029 (CmpInst::Predicate)CondDef->getOperand(1).getPredicate(), CondCode, 4030 CondCode2); 4031 4032 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two 4033 // instructions to emit the comparison. 4034 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be 4035 // unnecessary. 4036 if (CondCode2 != AArch64CC::AL) 4037 return false; 4038 4039 // Make sure we'll be able to select the compare. 4040 unsigned CmpOpc = selectFCMPOpc(*CondDef, MRI); 4041 if (!CmpOpc) 4042 return false; 4043 4044 // Emit a new compare. 4045 auto Cmp = MIB.buildInstr(CmpOpc, {}, {CondDef->getOperand(2).getReg()}); 4046 if (CmpOpc != AArch64::FCMPSri && CmpOpc != AArch64::FCMPDri) 4047 Cmp.addUse(CondDef->getOperand(3).getReg()); 4048 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 4049 } 4050 4051 // Emit the select. 4052 unsigned CSelOpc = selectSelectOpc(I, MRI, RBI); 4053 auto CSel = 4054 MIB.buildInstr(CSelOpc, {I.getOperand(0).getReg()}, 4055 {I.getOperand(2).getReg(), I.getOperand(3).getReg()}) 4056 .addImm(CondCode); 4057 constrainSelectedInstRegOperands(*CSel, TII, TRI, RBI); 4058 I.eraseFromParent(); 4059 return true; 4060 } 4061 4062 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( 4063 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 4064 MachineIRBuilder &MIRBuilder) const { 4065 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && 4066 "Unexpected MachineOperand"); 4067 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4068 // We want to find this sort of thing: 4069 // x = G_SUB 0, y 4070 // G_ICMP z, x 4071 // 4072 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. 4073 // e.g: 4074 // 4075 // cmn z, y 4076 4077 // Helper lambda to detect the subtract followed by the compare. 4078 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0. 4079 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) { 4080 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB) 4081 return false; 4082 4083 // Need to make sure NZCV is the same at the end of the transformation. 4084 if (CC != AArch64CC::EQ && CC != AArch64CC::NE) 4085 return false; 4086 4087 // We want to match against SUBs. 4088 if (DefMI->getOpcode() != TargetOpcode::G_SUB) 4089 return false; 4090 4091 // Make sure that we're getting 4092 // x = G_SUB 0, y 4093 auto ValAndVReg = 4094 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI); 4095 if (!ValAndVReg || ValAndVReg->Value != 0) 4096 return false; 4097 4098 // This can safely be represented as a CMN. 4099 return true; 4100 }; 4101 4102 // Check if the RHS or LHS of the G_ICMP is defined by a SUB 4103 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); 4104 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); 4105 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); 4106 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P); 4107 4108 // Given this: 4109 // 4110 // x = G_SUB 0, y 4111 // G_ICMP x, z 4112 // 4113 // Produce this: 4114 // 4115 // cmn y, z 4116 if (IsCMN(LHSDef, CC)) 4117 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); 4118 4119 // Same idea here, but with the RHS of the compare instead: 4120 // 4121 // Given this: 4122 // 4123 // x = G_SUB 0, y 4124 // G_ICMP z, x 4125 // 4126 // Produce this: 4127 // 4128 // cmn z, y 4129 if (IsCMN(RHSDef, CC)) 4130 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); 4131 4132 // Given this: 4133 // 4134 // z = G_AND x, y 4135 // G_ICMP z, 0 4136 // 4137 // Produce this if the compare is signed: 4138 // 4139 // tst x, y 4140 if (!isUnsignedICMPPred(P) && LHSDef && 4141 LHSDef->getOpcode() == TargetOpcode::G_AND) { 4142 // Make sure that the RHS is 0. 4143 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI); 4144 if (!ValAndVReg || ValAndVReg->Value != 0) 4145 return nullptr; 4146 4147 return emitTST(LHSDef->getOperand(1).getReg(), 4148 LHSDef->getOperand(2).getReg(), MIRBuilder); 4149 } 4150 4151 return nullptr; 4152 } 4153 4154 MachineInstr *AArch64InstructionSelector::tryOptArithImmedIntegerCompare( 4155 MachineOperand &LHS, MachineOperand &RHS, CmpInst::Predicate &P, 4156 MachineIRBuilder &MIB) const { 4157 // Attempt to select the immediate form of an integer compare. 4158 MachineRegisterInfo &MRI = *MIB.getMRI(); 4159 auto Ty = MRI.getType(LHS.getReg()); 4160 assert(!Ty.isVector() && "Expected scalar or pointer only?"); 4161 unsigned Size = Ty.getSizeInBits(); 4162 assert((Size == 32 || Size == 64) && 4163 "Expected 32 bit or 64 bit compare only?"); 4164 4165 // Check if this is a case we can already handle. 4166 InstructionSelector::ComplexRendererFns ImmFns; 4167 ImmFns = selectArithImmed(RHS); 4168 4169 if (!ImmFns) { 4170 // We didn't get a rendering function, but we may still have a constant. 4171 auto MaybeImmed = getImmedFromMO(RHS); 4172 if (!MaybeImmed) 4173 return nullptr; 4174 4175 // We have a constant, but it doesn't fit. Try adjusting it by one and 4176 // updating the predicate if possible. 4177 uint64_t C = *MaybeImmed; 4178 CmpInst::Predicate NewP; 4179 switch (P) { 4180 default: 4181 return nullptr; 4182 case CmpInst::ICMP_SLT: 4183 case CmpInst::ICMP_SGE: 4184 // Check for 4185 // 4186 // x slt c => x sle c - 1 4187 // x sge c => x sgt c - 1 4188 // 4189 // When c is not the smallest possible negative number. 4190 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) || 4191 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN)) 4192 return nullptr; 4193 NewP = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; 4194 C -= 1; 4195 break; 4196 case CmpInst::ICMP_ULT: 4197 case CmpInst::ICMP_UGE: 4198 // Check for 4199 // 4200 // x ult c => x ule c - 1 4201 // x uge c => x ugt c - 1 4202 // 4203 // When c is not zero. 4204 if (C == 0) 4205 return nullptr; 4206 NewP = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; 4207 C -= 1; 4208 break; 4209 case CmpInst::ICMP_SLE: 4210 case CmpInst::ICMP_SGT: 4211 // Check for 4212 // 4213 // x sle c => x slt c + 1 4214 // x sgt c => s sge c + 1 4215 // 4216 // When c is not the largest possible signed integer. 4217 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) || 4218 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX)) 4219 return nullptr; 4220 NewP = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; 4221 C += 1; 4222 break; 4223 case CmpInst::ICMP_ULE: 4224 case CmpInst::ICMP_UGT: 4225 // Check for 4226 // 4227 // x ule c => x ult c + 1 4228 // x ugt c => s uge c + 1 4229 // 4230 // When c is not the largest possible unsigned integer. 4231 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) || 4232 (Size == 64 && C == UINT64_MAX)) 4233 return nullptr; 4234 NewP = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; 4235 C += 1; 4236 break; 4237 } 4238 4239 // Check if the new constant is valid. 4240 if (Size == 32) 4241 C = static_cast<uint32_t>(C); 4242 ImmFns = select12BitValueWithLeftShift(C); 4243 if (!ImmFns) 4244 return nullptr; 4245 P = NewP; 4246 } 4247 4248 // At this point, we know we can select an immediate form. Go ahead and do 4249 // that. 4250 Register ZReg; 4251 unsigned Opc; 4252 if (Size == 32) { 4253 ZReg = AArch64::WZR; 4254 Opc = AArch64::SUBSWri; 4255 } else { 4256 ZReg = AArch64::XZR; 4257 Opc = AArch64::SUBSXri; 4258 } 4259 4260 auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()}); 4261 for (auto &RenderFn : *ImmFns) 4262 RenderFn(CmpMI); 4263 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 4264 return &*CmpMI; 4265 } 4266 4267 MachineInstr *AArch64InstructionSelector::tryOptArithShiftedCompare( 4268 MachineOperand &LHS, MachineOperand &RHS, MachineIRBuilder &MIB) const { 4269 // We are looking for the following pattern: 4270 // 4271 // shift = G_SHL/ASHR/LHSR y, c 4272 // ... 4273 // cmp = G_ICMP pred, something, shift 4274 // 4275 // Since we will select the G_ICMP to a SUBS, we can potentially fold the 4276 // shift into the subtract. 4277 static const unsigned OpcTable[2] = {AArch64::SUBSWrs, AArch64::SUBSXrs}; 4278 static const Register ZRegTable[2] = {AArch64::WZR, AArch64::XZR}; 4279 auto ImmFns = selectShiftedRegister(RHS); 4280 if (!ImmFns) 4281 return nullptr; 4282 MachineRegisterInfo &MRI = *MIB.getMRI(); 4283 auto Ty = MRI.getType(LHS.getReg()); 4284 assert(!Ty.isVector() && "Expected scalar or pointer only?"); 4285 unsigned Size = Ty.getSizeInBits(); 4286 bool Idx = (Size == 64); 4287 Register ZReg = ZRegTable[Idx]; 4288 unsigned Opc = OpcTable[Idx]; 4289 auto CmpMI = MIB.buildInstr(Opc, {ZReg}, {LHS.getReg()}); 4290 for (auto &RenderFn : *ImmFns) 4291 RenderFn(CmpMI); 4292 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 4293 return &*CmpMI; 4294 } 4295 4296 bool AArch64InstructionSelector::selectShuffleVector( 4297 MachineInstr &I, MachineRegisterInfo &MRI) const { 4298 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 4299 Register Src1Reg = I.getOperand(1).getReg(); 4300 const LLT Src1Ty = MRI.getType(Src1Reg); 4301 Register Src2Reg = I.getOperand(2).getReg(); 4302 const LLT Src2Ty = MRI.getType(Src2Reg); 4303 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); 4304 4305 MachineBasicBlock &MBB = *I.getParent(); 4306 MachineFunction &MF = *MBB.getParent(); 4307 LLVMContext &Ctx = MF.getFunction().getContext(); 4308 4309 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if 4310 // it's originated from a <1 x T> type. Those should have been lowered into 4311 // G_BUILD_VECTOR earlier. 4312 if (!Src1Ty.isVector() || !Src2Ty.isVector()) { 4313 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"); 4314 return false; 4315 } 4316 4317 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; 4318 4319 SmallVector<Constant *, 64> CstIdxs; 4320 for (int Val : Mask) { 4321 // For now, any undef indexes we'll just assume to be 0. This should be 4322 // optimized in future, e.g. to select DUP etc. 4323 Val = Val < 0 ? 0 : Val; 4324 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { 4325 unsigned Offset = Byte + Val * BytesPerElt; 4326 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); 4327 } 4328 } 4329 4330 MachineIRBuilder MIRBuilder(I); 4331 4332 // Use a constant pool to load the index vector for TBL. 4333 Constant *CPVal = ConstantVector::get(CstIdxs); 4334 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder); 4335 if (!IndexLoad) { 4336 LLVM_DEBUG(dbgs() << "Could not load from a constant pool"); 4337 return false; 4338 } 4339 4340 if (DstTy.getSizeInBits() != 128) { 4341 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); 4342 // This case can be done with TBL1. 4343 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder); 4344 if (!Concat) { 4345 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); 4346 return false; 4347 } 4348 4349 // The constant pool load will be 64 bits, so need to convert to FPR128 reg. 4350 IndexLoad = 4351 emitScalarToVector(64, &AArch64::FPR128RegClass, 4352 IndexLoad->getOperand(0).getReg(), MIRBuilder); 4353 4354 auto TBL1 = MIRBuilder.buildInstr( 4355 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, 4356 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); 4357 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); 4358 4359 auto Copy = 4360 MIRBuilder 4361 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 4362 .addReg(TBL1.getReg(0), 0, AArch64::dsub); 4363 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); 4364 I.eraseFromParent(); 4365 return true; 4366 } 4367 4368 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive 4369 // Q registers for regalloc. 4370 auto RegSeq = MIRBuilder 4371 .buildInstr(TargetOpcode::REG_SEQUENCE, 4372 {&AArch64::QQRegClass}, {Src1Reg}) 4373 .addImm(AArch64::qsub0) 4374 .addUse(Src2Reg) 4375 .addImm(AArch64::qsub1); 4376 4377 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, 4378 {RegSeq, IndexLoad->getOperand(0)}); 4379 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI); 4380 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); 4381 I.eraseFromParent(); 4382 return true; 4383 } 4384 4385 MachineInstr *AArch64InstructionSelector::emitLaneInsert( 4386 Optional<Register> DstReg, Register SrcReg, Register EltReg, 4387 unsigned LaneIdx, const RegisterBank &RB, 4388 MachineIRBuilder &MIRBuilder) const { 4389 MachineInstr *InsElt = nullptr; 4390 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 4391 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4392 4393 // Create a register to define with the insert if one wasn't passed in. 4394 if (!DstReg) 4395 DstReg = MRI.createVirtualRegister(DstRC); 4396 4397 unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); 4398 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; 4399 4400 if (RB.getID() == AArch64::FPRRegBankID) { 4401 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); 4402 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 4403 .addImm(LaneIdx) 4404 .addUse(InsSub->getOperand(0).getReg()) 4405 .addImm(0); 4406 } else { 4407 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 4408 .addImm(LaneIdx) 4409 .addUse(EltReg); 4410 } 4411 4412 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 4413 return InsElt; 4414 } 4415 4416 bool AArch64InstructionSelector::selectInsertElt( 4417 MachineInstr &I, MachineRegisterInfo &MRI) const { 4418 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); 4419 4420 // Get information on the destination. 4421 Register DstReg = I.getOperand(0).getReg(); 4422 const LLT DstTy = MRI.getType(DstReg); 4423 unsigned VecSize = DstTy.getSizeInBits(); 4424 4425 // Get information on the element we want to insert into the destination. 4426 Register EltReg = I.getOperand(2).getReg(); 4427 const LLT EltTy = MRI.getType(EltReg); 4428 unsigned EltSize = EltTy.getSizeInBits(); 4429 if (EltSize < 16 || EltSize > 64) 4430 return false; // Don't support all element types yet. 4431 4432 // Find the definition of the index. Bail out if it's not defined by a 4433 // G_CONSTANT. 4434 Register IdxReg = I.getOperand(3).getReg(); 4435 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI); 4436 if (!VRegAndVal) 4437 return false; 4438 unsigned LaneIdx = VRegAndVal->Value; 4439 4440 // Perform the lane insert. 4441 Register SrcReg = I.getOperand(1).getReg(); 4442 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); 4443 MachineIRBuilder MIRBuilder(I); 4444 4445 if (VecSize < 128) { 4446 // If the vector we're inserting into is smaller than 128 bits, widen it 4447 // to 128 to do the insert. 4448 MachineInstr *ScalarToVec = emitScalarToVector( 4449 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder); 4450 if (!ScalarToVec) 4451 return false; 4452 SrcReg = ScalarToVec->getOperand(0).getReg(); 4453 } 4454 4455 // Create an insert into a new FPR128 register. 4456 // Note that if our vector is already 128 bits, we end up emitting an extra 4457 // register. 4458 MachineInstr *InsMI = 4459 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder); 4460 4461 if (VecSize < 128) { 4462 // If we had to widen to perform the insert, then we have to demote back to 4463 // the original size to get the result we want. 4464 Register DemoteVec = InsMI->getOperand(0).getReg(); 4465 const TargetRegisterClass *RC = 4466 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); 4467 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 4468 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 4469 return false; 4470 } 4471 unsigned SubReg = 0; 4472 if (!getSubRegForClass(RC, TRI, SubReg)) 4473 return false; 4474 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 4475 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize 4476 << "\n"); 4477 return false; 4478 } 4479 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 4480 .addReg(DemoteVec, 0, SubReg); 4481 RBI.constrainGenericRegister(DstReg, *RC, MRI); 4482 } else { 4483 // No widening needed. 4484 InsMI->getOperand(0).setReg(DstReg); 4485 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 4486 } 4487 4488 I.eraseFromParent(); 4489 return true; 4490 } 4491 4492 bool AArch64InstructionSelector::tryOptConstantBuildVec( 4493 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const { 4494 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 4495 assert(DstTy.getSizeInBits() <= 128 && "Unexpected build_vec type!"); 4496 if (DstTy.getSizeInBits() < 32) 4497 return false; 4498 // Check if we're building a constant vector, in which case we want to 4499 // generate a constant pool load instead of a vector insert sequence. 4500 SmallVector<Constant *, 16> Csts; 4501 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { 4502 // Try to find G_CONSTANT or G_FCONSTANT 4503 auto *OpMI = 4504 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI); 4505 if (OpMI) 4506 Csts.emplace_back( 4507 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm())); 4508 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT, 4509 I.getOperand(Idx).getReg(), MRI))) 4510 Csts.emplace_back( 4511 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm())); 4512 else 4513 return false; 4514 } 4515 Constant *CV = ConstantVector::get(Csts); 4516 MachineIRBuilder MIB(I); 4517 auto *CPLoad = emitLoadFromConstantPool(CV, MIB); 4518 if (!CPLoad) { 4519 LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector"); 4520 return false; 4521 } 4522 MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0)); 4523 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 4524 *MRI.getRegClass(CPLoad->getOperand(0).getReg()), 4525 MRI); 4526 I.eraseFromParent(); 4527 return true; 4528 } 4529 4530 bool AArch64InstructionSelector::selectBuildVector( 4531 MachineInstr &I, MachineRegisterInfo &MRI) const { 4532 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 4533 // Until we port more of the optimized selections, for now just use a vector 4534 // insert sequence. 4535 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 4536 const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); 4537 unsigned EltSize = EltTy.getSizeInBits(); 4538 4539 if (tryOptConstantBuildVec(I, DstTy, MRI)) 4540 return true; 4541 if (EltSize < 16 || EltSize > 64) 4542 return false; // Don't support all element types yet. 4543 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 4544 MachineIRBuilder MIRBuilder(I); 4545 4546 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 4547 MachineInstr *ScalarToVec = 4548 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, 4549 I.getOperand(1).getReg(), MIRBuilder); 4550 if (!ScalarToVec) 4551 return false; 4552 4553 Register DstVec = ScalarToVec->getOperand(0).getReg(); 4554 unsigned DstSize = DstTy.getSizeInBits(); 4555 4556 // Keep track of the last MI we inserted. Later on, we might be able to save 4557 // a copy using it. 4558 MachineInstr *PrevMI = nullptr; 4559 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { 4560 // Note that if we don't do a subregister copy, we can end up making an 4561 // extra register. 4562 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB, 4563 MIRBuilder); 4564 DstVec = PrevMI->getOperand(0).getReg(); 4565 } 4566 4567 // If DstTy's size in bits is less than 128, then emit a subregister copy 4568 // from DstVec to the last register we've defined. 4569 if (DstSize < 128) { 4570 // Force this to be FPR using the destination vector. 4571 const TargetRegisterClass *RC = 4572 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize); 4573 if (!RC) 4574 return false; 4575 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 4576 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 4577 return false; 4578 } 4579 4580 unsigned SubReg = 0; 4581 if (!getSubRegForClass(RC, TRI, SubReg)) 4582 return false; 4583 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 4584 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize 4585 << "\n"); 4586 return false; 4587 } 4588 4589 Register Reg = MRI.createVirtualRegister(RC); 4590 Register DstReg = I.getOperand(0).getReg(); 4591 4592 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 4593 .addReg(DstVec, 0, SubReg); 4594 MachineOperand &RegOp = I.getOperand(1); 4595 RegOp.setReg(Reg); 4596 RBI.constrainGenericRegister(DstReg, *RC, MRI); 4597 } else { 4598 // We don't need a subregister copy. Save a copy by re-using the 4599 // destination register on the final insert. 4600 assert(PrevMI && "PrevMI was null?"); 4601 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); 4602 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); 4603 } 4604 4605 I.eraseFromParent(); 4606 return true; 4607 } 4608 4609 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the 4610 /// ID if it exists, and 0 otherwise. 4611 static unsigned findIntrinsicID(MachineInstr &I) { 4612 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) { 4613 return Op.isIntrinsicID(); 4614 }); 4615 if (IntrinOp == I.operands_end()) 4616 return 0; 4617 return IntrinOp->getIntrinsicID(); 4618 } 4619 4620 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( 4621 MachineInstr &I, MachineRegisterInfo &MRI) const { 4622 // Find the intrinsic ID. 4623 unsigned IntrinID = findIntrinsicID(I); 4624 if (!IntrinID) 4625 return false; 4626 MachineIRBuilder MIRBuilder(I); 4627 4628 // Select the instruction. 4629 switch (IntrinID) { 4630 default: 4631 return false; 4632 case Intrinsic::trap: 4633 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1); 4634 break; 4635 case Intrinsic::debugtrap: 4636 if (!STI.isTargetWindows()) 4637 return false; 4638 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); 4639 break; 4640 } 4641 4642 I.eraseFromParent(); 4643 return true; 4644 } 4645 4646 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, 4647 MachineRegisterInfo &MRI) { 4648 unsigned IntrinID = findIntrinsicID(I); 4649 if (!IntrinID) 4650 return false; 4651 MachineIRBuilder MIRBuilder(I); 4652 4653 switch (IntrinID) { 4654 default: 4655 break; 4656 case Intrinsic::aarch64_crypto_sha1h: { 4657 Register DstReg = I.getOperand(0).getReg(); 4658 Register SrcReg = I.getOperand(2).getReg(); 4659 4660 // FIXME: Should this be an assert? 4661 if (MRI.getType(DstReg).getSizeInBits() != 32 || 4662 MRI.getType(SrcReg).getSizeInBits() != 32) 4663 return false; 4664 4665 // The operation has to happen on FPRs. Set up some new FPR registers for 4666 // the source and destination if they are on GPRs. 4667 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 4668 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 4669 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)}); 4670 4671 // Make sure the copy ends up getting constrained properly. 4672 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 4673 AArch64::GPR32RegClass, MRI); 4674 } 4675 4676 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) 4677 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 4678 4679 // Actually insert the instruction. 4680 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); 4681 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); 4682 4683 // Did we create a new register for the destination? 4684 if (DstReg != I.getOperand(0).getReg()) { 4685 // Yep. Copy the result of the instruction back into the original 4686 // destination. 4687 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg}); 4688 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 4689 AArch64::GPR32RegClass, MRI); 4690 } 4691 4692 I.eraseFromParent(); 4693 return true; 4694 } 4695 case Intrinsic::frameaddress: 4696 case Intrinsic::returnaddress: { 4697 MachineFunction &MF = *I.getParent()->getParent(); 4698 MachineFrameInfo &MFI = MF.getFrameInfo(); 4699 4700 unsigned Depth = I.getOperand(2).getImm(); 4701 Register DstReg = I.getOperand(0).getReg(); 4702 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); 4703 4704 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { 4705 if (MFReturnAddr) { 4706 MIRBuilder.buildCopy({DstReg}, MFReturnAddr); 4707 I.eraseFromParent(); 4708 return true; 4709 } 4710 MFI.setReturnAddressIsTaken(true); 4711 MF.addLiveIn(AArch64::LR, &AArch64::GPR64spRegClass); 4712 // Insert the copy from LR/X30 into the entry block, before it can be 4713 // clobbered by anything. 4714 MachineBasicBlock &EntryBlock = *MF.begin(); 4715 if (!EntryBlock.isLiveIn(AArch64::LR)) 4716 EntryBlock.addLiveIn(AArch64::LR); 4717 MachineIRBuilder EntryBuilder(MF); 4718 EntryBuilder.setInstr(*EntryBlock.begin()); 4719 EntryBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); 4720 MFReturnAddr = DstReg; 4721 I.eraseFromParent(); 4722 return true; 4723 } 4724 4725 MFI.setFrameAddressIsTaken(true); 4726 Register FrameAddr(AArch64::FP); 4727 while (Depth--) { 4728 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 4729 auto Ldr = 4730 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}) 4731 .addImm(0); 4732 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); 4733 FrameAddr = NextFrame; 4734 } 4735 4736 if (IntrinID == Intrinsic::frameaddress) 4737 MIRBuilder.buildCopy({DstReg}, {FrameAddr}); 4738 else { 4739 MFI.setReturnAddressIsTaken(true); 4740 MIRBuilder.buildInstr(AArch64::LDRXui, {DstReg}, {FrameAddr}).addImm(1); 4741 } 4742 4743 I.eraseFromParent(); 4744 return true; 4745 } 4746 } 4747 return false; 4748 } 4749 4750 InstructionSelector::ComplexRendererFns 4751 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { 4752 auto MaybeImmed = getImmedFromMO(Root); 4753 if (MaybeImmed == None || *MaybeImmed > 31) 4754 return None; 4755 uint64_t Enc = (32 - *MaybeImmed) & 0x1f; 4756 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4757 } 4758 4759 InstructionSelector::ComplexRendererFns 4760 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { 4761 auto MaybeImmed = getImmedFromMO(Root); 4762 if (MaybeImmed == None || *MaybeImmed > 31) 4763 return None; 4764 uint64_t Enc = 31 - *MaybeImmed; 4765 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4766 } 4767 4768 InstructionSelector::ComplexRendererFns 4769 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { 4770 auto MaybeImmed = getImmedFromMO(Root); 4771 if (MaybeImmed == None || *MaybeImmed > 63) 4772 return None; 4773 uint64_t Enc = (64 - *MaybeImmed) & 0x3f; 4774 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4775 } 4776 4777 InstructionSelector::ComplexRendererFns 4778 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { 4779 auto MaybeImmed = getImmedFromMO(Root); 4780 if (MaybeImmed == None || *MaybeImmed > 63) 4781 return None; 4782 uint64_t Enc = 63 - *MaybeImmed; 4783 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 4784 } 4785 4786 /// Helper to select an immediate value that can be represented as a 12-bit 4787 /// value shifted left by either 0 or 12. If it is possible to do so, return 4788 /// the immediate and shift value. If not, return None. 4789 /// 4790 /// Used by selectArithImmed and selectNegArithImmed. 4791 InstructionSelector::ComplexRendererFns 4792 AArch64InstructionSelector::select12BitValueWithLeftShift( 4793 uint64_t Immed) const { 4794 unsigned ShiftAmt; 4795 if (Immed >> 12 == 0) { 4796 ShiftAmt = 0; 4797 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 4798 ShiftAmt = 12; 4799 Immed = Immed >> 12; 4800 } else 4801 return None; 4802 4803 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 4804 return {{ 4805 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, 4806 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, 4807 }}; 4808 } 4809 4810 /// SelectArithImmed - Select an immediate value that can be represented as 4811 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 4812 /// Val set to the 12-bit value and Shift set to the shifter operand. 4813 InstructionSelector::ComplexRendererFns 4814 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { 4815 // This function is called from the addsub_shifted_imm ComplexPattern, 4816 // which lists [imm] as the list of opcode it's interested in, however 4817 // we still need to check whether the operand is actually an immediate 4818 // here because the ComplexPattern opcode list is only used in 4819 // root-level opcode matching. 4820 auto MaybeImmed = getImmedFromMO(Root); 4821 if (MaybeImmed == None) 4822 return None; 4823 return select12BitValueWithLeftShift(*MaybeImmed); 4824 } 4825 4826 /// SelectNegArithImmed - As above, but negates the value before trying to 4827 /// select it. 4828 InstructionSelector::ComplexRendererFns 4829 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { 4830 // We need a register here, because we need to know if we have a 64 or 32 4831 // bit immediate. 4832 if (!Root.isReg()) 4833 return None; 4834 auto MaybeImmed = getImmedFromMO(Root); 4835 if (MaybeImmed == None) 4836 return None; 4837 uint64_t Immed = *MaybeImmed; 4838 4839 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 4840 // have the opposite effect on the C flag, so this pattern mustn't match under 4841 // those circumstances. 4842 if (Immed == 0) 4843 return None; 4844 4845 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on 4846 // the root. 4847 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4848 if (MRI.getType(Root.getReg()).getSizeInBits() == 32) 4849 Immed = ~((uint32_t)Immed) + 1; 4850 else 4851 Immed = ~Immed + 1ULL; 4852 4853 if (Immed & 0xFFFFFFFFFF000000ULL) 4854 return None; 4855 4856 Immed &= 0xFFFFFFULL; 4857 return select12BitValueWithLeftShift(Immed); 4858 } 4859 4860 /// Return true if it is worth folding MI into an extended register. That is, 4861 /// if it's safe to pull it into the addressing mode of a load or store as a 4862 /// shift. 4863 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( 4864 MachineInstr &MI, const MachineRegisterInfo &MRI) const { 4865 // Always fold if there is one use, or if we're optimizing for size. 4866 Register DefReg = MI.getOperand(0).getReg(); 4867 if (MRI.hasOneNonDBGUse(DefReg) || 4868 MI.getParent()->getParent()->getFunction().hasMinSize()) 4869 return true; 4870 4871 // It's better to avoid folding and recomputing shifts when we don't have a 4872 // fastpath. 4873 if (!STI.hasLSLFast()) 4874 return false; 4875 4876 // We have a fastpath, so folding a shift in and potentially computing it 4877 // many times may be beneficial. Check if this is only used in memory ops. 4878 // If it is, then we should fold. 4879 return all_of(MRI.use_nodbg_instructions(DefReg), 4880 [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); 4881 } 4882 4883 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { 4884 switch (Type) { 4885 case AArch64_AM::SXTB: 4886 case AArch64_AM::SXTH: 4887 case AArch64_AM::SXTW: 4888 return true; 4889 default: 4890 return false; 4891 } 4892 } 4893 4894 InstructionSelector::ComplexRendererFns 4895 AArch64InstructionSelector::selectExtendedSHL( 4896 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, 4897 unsigned SizeInBytes, bool WantsExt) const { 4898 assert(Base.isReg() && "Expected base to be a register operand"); 4899 assert(Offset.isReg() && "Expected offset to be a register operand"); 4900 4901 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 4902 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); 4903 if (!OffsetInst) 4904 return None; 4905 4906 unsigned OffsetOpc = OffsetInst->getOpcode(); 4907 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) 4908 return None; 4909 4910 // Make sure that the memory op is a valid size. 4911 int64_t LegalShiftVal = Log2_32(SizeInBytes); 4912 if (LegalShiftVal == 0) 4913 return None; 4914 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 4915 return None; 4916 4917 // Now, try to find the specific G_CONSTANT. Start by assuming that the 4918 // register we will offset is the LHS, and the register containing the 4919 // constant is the RHS. 4920 Register OffsetReg = OffsetInst->getOperand(1).getReg(); 4921 Register ConstantReg = OffsetInst->getOperand(2).getReg(); 4922 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 4923 if (!ValAndVReg) { 4924 // We didn't get a constant on the RHS. If the opcode is a shift, then 4925 // we're done. 4926 if (OffsetOpc == TargetOpcode::G_SHL) 4927 return None; 4928 4929 // If we have a G_MUL, we can use either register. Try looking at the RHS. 4930 std::swap(OffsetReg, ConstantReg); 4931 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 4932 if (!ValAndVReg) 4933 return None; 4934 } 4935 4936 // The value must fit into 3 bits, and must be positive. Make sure that is 4937 // true. 4938 int64_t ImmVal = ValAndVReg->Value; 4939 4940 // Since we're going to pull this into a shift, the constant value must be 4941 // a power of 2. If we got a multiply, then we need to check this. 4942 if (OffsetOpc == TargetOpcode::G_MUL) { 4943 if (!isPowerOf2_32(ImmVal)) 4944 return None; 4945 4946 // Got a power of 2. So, the amount we'll shift is the log base-2 of that. 4947 ImmVal = Log2_32(ImmVal); 4948 } 4949 4950 if ((ImmVal & 0x7) != ImmVal) 4951 return None; 4952 4953 // We are only allowed to shift by LegalShiftVal. This shift value is built 4954 // into the instruction, so we can't just use whatever we want. 4955 if (ImmVal != LegalShiftVal) 4956 return None; 4957 4958 unsigned SignExtend = 0; 4959 if (WantsExt) { 4960 // Check if the offset is defined by an extend. 4961 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); 4962 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); 4963 if (Ext == AArch64_AM::InvalidShiftExtend) 4964 return None; 4965 4966 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; 4967 // We only support SXTW for signed extension here. 4968 if (SignExtend && Ext != AArch64_AM::SXTW) 4969 return None; 4970 4971 // Need a 32-bit wide register here. 4972 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); 4973 OffsetReg = ExtInst->getOperand(1).getReg(); 4974 OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB); 4975 } 4976 4977 // We can use the LHS of the GEP as the base, and the LHS of the shift as an 4978 // offset. Signify that we are shifting by setting the shift flag to 1. 4979 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, 4980 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, 4981 [=](MachineInstrBuilder &MIB) { 4982 // Need to add both immediates here to make sure that they are both 4983 // added to the instruction. 4984 MIB.addImm(SignExtend); 4985 MIB.addImm(1); 4986 }}}; 4987 } 4988 4989 /// This is used for computing addresses like this: 4990 /// 4991 /// ldr x1, [x2, x3, lsl #3] 4992 /// 4993 /// Where x2 is the base register, and x3 is an offset register. The shift-left 4994 /// is a constant value specific to this load instruction. That is, we'll never 4995 /// see anything other than a 3 here (which corresponds to the size of the 4996 /// element being loaded.) 4997 InstructionSelector::ComplexRendererFns 4998 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( 4999 MachineOperand &Root, unsigned SizeInBytes) const { 5000 if (!Root.isReg()) 5001 return None; 5002 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5003 5004 // We want to find something like this: 5005 // 5006 // val = G_CONSTANT LegalShiftVal 5007 // shift = G_SHL off_reg val 5008 // ptr = G_PTR_ADD base_reg shift 5009 // x = G_LOAD ptr 5010 // 5011 // And fold it into this addressing mode: 5012 // 5013 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] 5014 5015 // Check if we can find the G_PTR_ADD. 5016 MachineInstr *PtrAdd = 5017 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5018 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 5019 return None; 5020 5021 // Now, try to match an opcode which will match our specific offset. 5022 // We want a G_SHL or a G_MUL. 5023 MachineInstr *OffsetInst = 5024 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); 5025 return selectExtendedSHL(Root, PtrAdd->getOperand(1), 5026 OffsetInst->getOperand(0), SizeInBytes, 5027 /*WantsExt=*/false); 5028 } 5029 5030 /// This is used for computing addresses like this: 5031 /// 5032 /// ldr x1, [x2, x3] 5033 /// 5034 /// Where x2 is the base register, and x3 is an offset register. 5035 /// 5036 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, 5037 /// this will do so. Otherwise, it will return None. 5038 InstructionSelector::ComplexRendererFns 5039 AArch64InstructionSelector::selectAddrModeRegisterOffset( 5040 MachineOperand &Root) const { 5041 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5042 5043 // We need a GEP. 5044 MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); 5045 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD) 5046 return None; 5047 5048 // If this is used more than once, let's not bother folding. 5049 // TODO: Check if they are memory ops. If they are, then we can still fold 5050 // without having to recompute anything. 5051 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg())) 5052 return None; 5053 5054 // Base is the GEP's LHS, offset is its RHS. 5055 return {{[=](MachineInstrBuilder &MIB) { 5056 MIB.addUse(Gep->getOperand(1).getReg()); 5057 }, 5058 [=](MachineInstrBuilder &MIB) { 5059 MIB.addUse(Gep->getOperand(2).getReg()); 5060 }, 5061 [=](MachineInstrBuilder &MIB) { 5062 // Need to add both immediates here to make sure that they are both 5063 // added to the instruction. 5064 MIB.addImm(0); 5065 MIB.addImm(0); 5066 }}}; 5067 } 5068 5069 /// This is intended to be equivalent to selectAddrModeXRO in 5070 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. 5071 InstructionSelector::ComplexRendererFns 5072 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, 5073 unsigned SizeInBytes) const { 5074 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5075 5076 // If we have a constant offset, then we probably don't want to match a 5077 // register offset. 5078 if (isBaseWithConstantOffset(Root, MRI)) 5079 return None; 5080 5081 // Try to fold shifts into the addressing mode. 5082 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); 5083 if (AddrModeFns) 5084 return AddrModeFns; 5085 5086 // If that doesn't work, see if it's possible to fold in registers from 5087 // a GEP. 5088 return selectAddrModeRegisterOffset(Root); 5089 } 5090 5091 /// This is used for computing addresses like this: 5092 /// 5093 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] 5094 /// 5095 /// Where we have a 64-bit base register, a 32-bit offset register, and an 5096 /// extend (which may or may not be signed). 5097 InstructionSelector::ComplexRendererFns 5098 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, 5099 unsigned SizeInBytes) const { 5100 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5101 5102 MachineInstr *PtrAdd = 5103 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5104 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 5105 return None; 5106 5107 MachineOperand &LHS = PtrAdd->getOperand(1); 5108 MachineOperand &RHS = PtrAdd->getOperand(2); 5109 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); 5110 5111 // The first case is the same as selectAddrModeXRO, except we need an extend. 5112 // In this case, we try to find a shift and extend, and fold them into the 5113 // addressing mode. 5114 // 5115 // E.g. 5116 // 5117 // off_reg = G_Z/S/ANYEXT ext_reg 5118 // val = G_CONSTANT LegalShiftVal 5119 // shift = G_SHL off_reg val 5120 // ptr = G_PTR_ADD base_reg shift 5121 // x = G_LOAD ptr 5122 // 5123 // In this case we can get a load like this: 5124 // 5125 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] 5126 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), 5127 SizeInBytes, /*WantsExt=*/true); 5128 if (ExtendedShl) 5129 return ExtendedShl; 5130 5131 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. 5132 // 5133 // e.g. 5134 // ldr something, [base_reg, ext_reg, sxtw] 5135 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 5136 return None; 5137 5138 // Check if this is an extend. We'll get an extend type if it is. 5139 AArch64_AM::ShiftExtendType Ext = 5140 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); 5141 if (Ext == AArch64_AM::InvalidShiftExtend) 5142 return None; 5143 5144 // Need a 32-bit wide register. 5145 MachineIRBuilder MIB(*PtrAdd); 5146 Register ExtReg = 5147 narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB); 5148 unsigned SignExtend = Ext == AArch64_AM::SXTW; 5149 5150 // Base is LHS, offset is ExtReg. 5151 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, 5152 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 5153 [=](MachineInstrBuilder &MIB) { 5154 MIB.addImm(SignExtend); 5155 MIB.addImm(0); 5156 }}}; 5157 } 5158 5159 /// Select a "register plus unscaled signed 9-bit immediate" address. This 5160 /// should only match when there is an offset that is not valid for a scaled 5161 /// immediate addressing mode. The "Size" argument is the size in bytes of the 5162 /// memory reference, which is needed here to know what is valid for a scaled 5163 /// immediate. 5164 InstructionSelector::ComplexRendererFns 5165 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, 5166 unsigned Size) const { 5167 MachineRegisterInfo &MRI = 5168 Root.getParent()->getParent()->getParent()->getRegInfo(); 5169 5170 if (!Root.isReg()) 5171 return None; 5172 5173 if (!isBaseWithConstantOffset(Root, MRI)) 5174 return None; 5175 5176 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 5177 if (!RootDef) 5178 return None; 5179 5180 MachineOperand &OffImm = RootDef->getOperand(2); 5181 if (!OffImm.isReg()) 5182 return None; 5183 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); 5184 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) 5185 return None; 5186 int64_t RHSC; 5187 MachineOperand &RHSOp1 = RHS->getOperand(1); 5188 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) 5189 return None; 5190 RHSC = RHSOp1.getCImm()->getSExtValue(); 5191 5192 // If the offset is valid as a scaled immediate, don't match here. 5193 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) 5194 return None; 5195 if (RHSC >= -256 && RHSC < 256) { 5196 MachineOperand &Base = RootDef->getOperand(1); 5197 return {{ 5198 [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, 5199 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, 5200 }}; 5201 } 5202 return None; 5203 } 5204 5205 InstructionSelector::ComplexRendererFns 5206 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, 5207 unsigned Size, 5208 MachineRegisterInfo &MRI) const { 5209 if (RootDef.getOpcode() != AArch64::G_ADD_LOW) 5210 return None; 5211 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); 5212 if (Adrp.getOpcode() != AArch64::ADRP) 5213 return None; 5214 5215 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. 5216 // TODO: Need to check GV's offset % size if doing offset folding into globals. 5217 assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"); 5218 auto GV = Adrp.getOperand(1).getGlobal(); 5219 if (GV->isThreadLocal()) 5220 return None; 5221 5222 auto &MF = *RootDef.getParent()->getParent(); 5223 if (GV->getPointerAlignment(MF.getDataLayout()) < Size) 5224 return None; 5225 5226 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); 5227 MachineIRBuilder MIRBuilder(RootDef); 5228 Register AdrpReg = Adrp.getOperand(0).getReg(); 5229 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, 5230 [=](MachineInstrBuilder &MIB) { 5231 MIB.addGlobalAddress(GV, /* Offset */ 0, 5232 OpFlags | AArch64II::MO_PAGEOFF | 5233 AArch64II::MO_NC); 5234 }}}; 5235 } 5236 5237 /// Select a "register plus scaled unsigned 12-bit immediate" address. The 5238 /// "Size" argument is the size in bytes of the memory reference, which 5239 /// determines the scale. 5240 InstructionSelector::ComplexRendererFns 5241 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, 5242 unsigned Size) const { 5243 MachineFunction &MF = *Root.getParent()->getParent()->getParent(); 5244 MachineRegisterInfo &MRI = MF.getRegInfo(); 5245 5246 if (!Root.isReg()) 5247 return None; 5248 5249 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 5250 if (!RootDef) 5251 return None; 5252 5253 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 5254 return {{ 5255 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, 5256 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 5257 }}; 5258 } 5259 5260 CodeModel::Model CM = MF.getTarget().getCodeModel(); 5261 // Check if we can fold in the ADD of small code model ADRP + ADD address. 5262 if (CM == CodeModel::Small) { 5263 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI); 5264 if (OpFns) 5265 return OpFns; 5266 } 5267 5268 if (isBaseWithConstantOffset(Root, MRI)) { 5269 MachineOperand &LHS = RootDef->getOperand(1); 5270 MachineOperand &RHS = RootDef->getOperand(2); 5271 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 5272 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 5273 if (LHSDef && RHSDef) { 5274 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); 5275 unsigned Scale = Log2_32(Size); 5276 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 5277 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 5278 return {{ 5279 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, 5280 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 5281 }}; 5282 5283 return {{ 5284 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 5285 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 5286 }}; 5287 } 5288 } 5289 } 5290 5291 // Before falling back to our general case, check if the unscaled 5292 // instructions can handle this. If so, that's preferable. 5293 if (selectAddrModeUnscaled(Root, Size).hasValue()) 5294 return None; 5295 5296 return {{ 5297 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 5298 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 5299 }}; 5300 } 5301 5302 /// Given a shift instruction, return the correct shift type for that 5303 /// instruction. 5304 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { 5305 // TODO: Handle AArch64_AM::ROR 5306 switch (MI.getOpcode()) { 5307 default: 5308 return AArch64_AM::InvalidShiftExtend; 5309 case TargetOpcode::G_SHL: 5310 return AArch64_AM::LSL; 5311 case TargetOpcode::G_LSHR: 5312 return AArch64_AM::LSR; 5313 case TargetOpcode::G_ASHR: 5314 return AArch64_AM::ASR; 5315 } 5316 } 5317 5318 /// Select a "shifted register" operand. If the value is not shifted, set the 5319 /// shift operand to a default value of "lsl 0". 5320 /// 5321 /// TODO: Allow shifted register to be rotated in logical instructions. 5322 InstructionSelector::ComplexRendererFns 5323 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const { 5324 if (!Root.isReg()) 5325 return None; 5326 MachineRegisterInfo &MRI = 5327 Root.getParent()->getParent()->getParent()->getRegInfo(); 5328 5329 // Check if the operand is defined by an instruction which corresponds to 5330 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. 5331 // 5332 // TODO: Handle AArch64_AM::ROR for logical instructions. 5333 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); 5334 if (!ShiftInst) 5335 return None; 5336 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); 5337 if (ShType == AArch64_AM::InvalidShiftExtend) 5338 return None; 5339 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) 5340 return None; 5341 5342 // Need an immediate on the RHS. 5343 MachineOperand &ShiftRHS = ShiftInst->getOperand(2); 5344 auto Immed = getImmedFromMO(ShiftRHS); 5345 if (!Immed) 5346 return None; 5347 5348 // We have something that we can fold. Fold in the shift's LHS and RHS into 5349 // the instruction. 5350 MachineOperand &ShiftLHS = ShiftInst->getOperand(1); 5351 Register ShiftReg = ShiftLHS.getReg(); 5352 5353 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); 5354 unsigned Val = *Immed & (NumBits - 1); 5355 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); 5356 5357 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, 5358 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; 5359 } 5360 5361 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( 5362 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { 5363 unsigned Opc = MI.getOpcode(); 5364 5365 // Handle explicit extend instructions first. 5366 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { 5367 unsigned Size; 5368 if (Opc == TargetOpcode::G_SEXT) 5369 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 5370 else 5371 Size = MI.getOperand(2).getImm(); 5372 assert(Size != 64 && "Extend from 64 bits?"); 5373 switch (Size) { 5374 case 8: 5375 return AArch64_AM::SXTB; 5376 case 16: 5377 return AArch64_AM::SXTH; 5378 case 32: 5379 return AArch64_AM::SXTW; 5380 default: 5381 return AArch64_AM::InvalidShiftExtend; 5382 } 5383 } 5384 5385 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { 5386 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 5387 assert(Size != 64 && "Extend from 64 bits?"); 5388 switch (Size) { 5389 case 8: 5390 return AArch64_AM::UXTB; 5391 case 16: 5392 return AArch64_AM::UXTH; 5393 case 32: 5394 return AArch64_AM::UXTW; 5395 default: 5396 return AArch64_AM::InvalidShiftExtend; 5397 } 5398 } 5399 5400 // Don't have an explicit extend. Try to handle a G_AND with a constant mask 5401 // on the RHS. 5402 if (Opc != TargetOpcode::G_AND) 5403 return AArch64_AM::InvalidShiftExtend; 5404 5405 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); 5406 if (!MaybeAndMask) 5407 return AArch64_AM::InvalidShiftExtend; 5408 uint64_t AndMask = *MaybeAndMask; 5409 switch (AndMask) { 5410 default: 5411 return AArch64_AM::InvalidShiftExtend; 5412 case 0xFF: 5413 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 5414 case 0xFFFF: 5415 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 5416 case 0xFFFFFFFF: 5417 return AArch64_AM::UXTW; 5418 } 5419 } 5420 5421 Register AArch64InstructionSelector::narrowExtendRegIfNeeded( 5422 Register ExtReg, MachineIRBuilder &MIB) const { 5423 MachineRegisterInfo &MRI = *MIB.getMRI(); 5424 if (MRI.getType(ExtReg).getSizeInBits() == 32) 5425 return ExtReg; 5426 5427 // Insert a copy to move ExtReg to GPR32. 5428 Register NarrowReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 5429 auto Copy = MIB.buildCopy({NarrowReg}, {ExtReg}); 5430 5431 // Select the copy into a subregister copy. 5432 selectCopy(*Copy, TII, MRI, TRI, RBI); 5433 return Copy.getReg(0); 5434 } 5435 5436 Register AArch64InstructionSelector::widenGPRBankRegIfNeeded( 5437 Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const { 5438 assert(WideSize >= 8 && "WideSize is smaller than all possible registers?"); 5439 MachineRegisterInfo &MRI = *MIB.getMRI(); 5440 unsigned NarrowSize = MRI.getType(Reg).getSizeInBits(); 5441 assert(WideSize >= NarrowSize && 5442 "WideSize cannot be smaller than NarrowSize!"); 5443 5444 // If the sizes match, just return the register. 5445 // 5446 // If NarrowSize is an s1, then we can select it to any size, so we'll treat 5447 // it as a don't care. 5448 if (NarrowSize == WideSize || NarrowSize == 1) 5449 return Reg; 5450 5451 // Now check the register classes. 5452 const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI); 5453 const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize); 5454 const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize); 5455 assert(OrigRC && "Could not determine narrow RC?"); 5456 assert(WideRC && "Could not determine wide RC?"); 5457 5458 // If the sizes differ, but the register classes are the same, there is no 5459 // need to insert a SUBREG_TO_REG. 5460 // 5461 // For example, an s8 that's supposed to be a GPR will be selected to either 5462 // a GPR32 or a GPR64 register. Note that this assumes that the s8 will 5463 // always end up on a GPR32. 5464 if (OrigRC == WideRC) 5465 return Reg; 5466 5467 // We have two different register classes. Insert a SUBREG_TO_REG. 5468 unsigned SubReg = 0; 5469 getSubRegForClass(OrigRC, TRI, SubReg); 5470 assert(SubReg && "Couldn't determine subregister?"); 5471 5472 // Build the SUBREG_TO_REG and return the new, widened register. 5473 auto SubRegToReg = 5474 MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {}) 5475 .addImm(0) 5476 .addUse(Reg) 5477 .addImm(SubReg); 5478 constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI); 5479 return SubRegToReg.getReg(0); 5480 } 5481 5482 /// Select an "extended register" operand. This operand folds in an extend 5483 /// followed by an optional left shift. 5484 InstructionSelector::ComplexRendererFns 5485 AArch64InstructionSelector::selectArithExtendedRegister( 5486 MachineOperand &Root) const { 5487 if (!Root.isReg()) 5488 return None; 5489 MachineRegisterInfo &MRI = 5490 Root.getParent()->getParent()->getParent()->getRegInfo(); 5491 5492 uint64_t ShiftVal = 0; 5493 Register ExtReg; 5494 AArch64_AM::ShiftExtendType Ext; 5495 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); 5496 if (!RootDef) 5497 return None; 5498 5499 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) 5500 return None; 5501 5502 // Check if we can fold a shift and an extend. 5503 if (RootDef->getOpcode() == TargetOpcode::G_SHL) { 5504 // Look for a constant on the RHS of the shift. 5505 MachineOperand &RHS = RootDef->getOperand(2); 5506 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); 5507 if (!MaybeShiftVal) 5508 return None; 5509 ShiftVal = *MaybeShiftVal; 5510 if (ShiftVal > 4) 5511 return None; 5512 // Look for a valid extend instruction on the LHS of the shift. 5513 MachineOperand &LHS = RootDef->getOperand(1); 5514 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); 5515 if (!ExtDef) 5516 return None; 5517 Ext = getExtendTypeForInst(*ExtDef, MRI); 5518 if (Ext == AArch64_AM::InvalidShiftExtend) 5519 return None; 5520 ExtReg = ExtDef->getOperand(1).getReg(); 5521 } else { 5522 // Didn't get a shift. Try just folding an extend. 5523 Ext = getExtendTypeForInst(*RootDef, MRI); 5524 if (Ext == AArch64_AM::InvalidShiftExtend) 5525 return None; 5526 ExtReg = RootDef->getOperand(1).getReg(); 5527 5528 // If we have a 32 bit instruction which zeroes out the high half of a 5529 // register, we get an implicit zero extend for free. Check if we have one. 5530 // FIXME: We actually emit the extend right now even though we don't have 5531 // to. 5532 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { 5533 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); 5534 if (ExtInst && isDef32(*ExtInst)) 5535 return None; 5536 } 5537 } 5538 5539 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister 5540 // copy. 5541 MachineIRBuilder MIB(*RootDef); 5542 ExtReg = narrowExtendRegIfNeeded(ExtReg, MIB); 5543 5544 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 5545 [=](MachineInstrBuilder &MIB) { 5546 MIB.addImm(getArithExtendImm(Ext, ShiftVal)); 5547 }}}; 5548 } 5549 5550 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, 5551 const MachineInstr &MI, 5552 int OpIdx) const { 5553 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 5554 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5555 "Expected G_CONSTANT"); 5556 Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI); 5557 assert(CstVal && "Expected constant value"); 5558 MIB.addImm(CstVal.getValue()); 5559 } 5560 5561 void AArch64InstructionSelector::renderLogicalImm32( 5562 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 5563 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5564 "Expected G_CONSTANT"); 5565 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 5566 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); 5567 MIB.addImm(Enc); 5568 } 5569 5570 void AArch64InstructionSelector::renderLogicalImm64( 5571 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 5572 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5573 "Expected G_CONSTANT"); 5574 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 5575 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); 5576 MIB.addImm(Enc); 5577 } 5578 5579 bool AArch64InstructionSelector::isLoadStoreOfNumBytes( 5580 const MachineInstr &MI, unsigned NumBytes) const { 5581 if (!MI.mayLoadOrStore()) 5582 return false; 5583 assert(MI.hasOneMemOperand() && 5584 "Expected load/store to have only one mem op!"); 5585 return (*MI.memoperands_begin())->getSize() == NumBytes; 5586 } 5587 5588 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { 5589 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 5590 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) 5591 return false; 5592 5593 // Only return true if we know the operation will zero-out the high half of 5594 // the 64-bit register. Truncates can be subregister copies, which don't 5595 // zero out the high bits. Copies and other copy-like instructions can be 5596 // fed by truncates, or could be lowered as subregister copies. 5597 switch (MI.getOpcode()) { 5598 default: 5599 return true; 5600 case TargetOpcode::COPY: 5601 case TargetOpcode::G_BITCAST: 5602 case TargetOpcode::G_TRUNC: 5603 case TargetOpcode::G_PHI: 5604 return false; 5605 } 5606 } 5607 5608 5609 // Perform fixups on the given PHI instruction's operands to force them all 5610 // to be the same as the destination regbank. 5611 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, 5612 const AArch64RegisterBankInfo &RBI) { 5613 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI"); 5614 Register DstReg = MI.getOperand(0).getReg(); 5615 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg); 5616 assert(DstRB && "Expected PHI dst to have regbank assigned"); 5617 MachineIRBuilder MIB(MI); 5618 5619 // Go through each operand and ensure it has the same regbank. 5620 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { 5621 MachineOperand &MO = MI.getOperand(OpIdx); 5622 if (!MO.isReg()) 5623 continue; 5624 Register OpReg = MO.getReg(); 5625 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg); 5626 if (RB != DstRB) { 5627 // Insert a cross-bank copy. 5628 auto *OpDef = MRI.getVRegDef(OpReg); 5629 const LLT &Ty = MRI.getType(OpReg); 5630 MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator())); 5631 auto Copy = MIB.buildCopy(Ty, OpReg); 5632 MRI.setRegBank(Copy.getReg(0), *DstRB); 5633 MO.setReg(Copy.getReg(0)); 5634 } 5635 } 5636 } 5637 5638 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { 5639 // We're looking for PHIs, build a list so we don't invalidate iterators. 5640 MachineRegisterInfo &MRI = MF.getRegInfo(); 5641 SmallVector<MachineInstr *, 32> Phis; 5642 for (auto &BB : MF) { 5643 for (auto &MI : BB) { 5644 if (MI.getOpcode() == TargetOpcode::G_PHI) 5645 Phis.emplace_back(&MI); 5646 } 5647 } 5648 5649 for (auto *MI : Phis) { 5650 // We need to do some work here if the operand types are < 16 bit and they 5651 // are split across fpr/gpr banks. Since all types <32b on gpr 5652 // end up being assigned gpr32 regclasses, we can end up with PHIs here 5653 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't 5654 // be selecting heterogenous regbanks for operands if possible, but we 5655 // still need to be able to deal with it here. 5656 // 5657 // To fix this, if we have a gpr-bank operand < 32b in size and at least 5658 // one other operand is on the fpr bank, then we add cross-bank copies 5659 // to homogenize the operand banks. For simplicity the bank that we choose 5660 // to settle on is whatever bank the def operand has. For example: 5661 // 5662 // %endbb: 5663 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 5664 // => 5665 // %bb2: 5666 // ... 5667 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) 5668 // ... 5669 // %endbb: 5670 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 5671 bool HasGPROp = false, HasFPROp = false; 5672 for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) { 5673 const auto &MO = MI->getOperand(OpIdx); 5674 if (!MO.isReg()) 5675 continue; 5676 const LLT &Ty = MRI.getType(MO.getReg()); 5677 if (!Ty.isValid() || !Ty.isScalar()) 5678 break; 5679 if (Ty.getSizeInBits() >= 32) 5680 break; 5681 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); 5682 // If for some reason we don't have a regbank yet. Don't try anything. 5683 if (!RB) 5684 break; 5685 5686 if (RB->getID() == AArch64::GPRRegBankID) 5687 HasGPROp = true; 5688 else 5689 HasFPROp = true; 5690 } 5691 // We have heterogenous regbanks, need to fixup. 5692 if (HasGPROp && HasFPROp) 5693 fixupPHIOpBanks(*MI, MRI, RBI); 5694 } 5695 } 5696 5697 namespace llvm { 5698 InstructionSelector * 5699 createAArch64InstructionSelector(const AArch64TargetMachine &TM, 5700 AArch64Subtarget &Subtarget, 5701 AArch64RegisterBankInfo &RBI) { 5702 return new AArch64InstructionSelector(TM, Subtarget, RBI); 5703 } 5704 } 5705