1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64GlobalISelUtils.h" 15 #include "AArch64InstrInfo.h" 16 #include "AArch64MachineFunctionInfo.h" 17 #include "AArch64RegisterBankInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "AArch64TargetMachine.h" 21 #include "AArch64GlobalISelUtils.h" 22 #include "MCTargetDesc/AArch64AddressingModes.h" 23 #include "MCTargetDesc/AArch64MCTargetDesc.h" 24 #include "llvm/ADT/Optional.h" 25 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 26 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 27 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 28 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineConstantPool.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineInstr.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineMemOperand.h" 35 #include "llvm/CodeGen/MachineOperand.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/TargetOpcodes.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/DerivedTypes.h" 40 #include "llvm/IR/Instructions.h" 41 #include "llvm/IR/PatternMatch.h" 42 #include "llvm/IR/Type.h" 43 #include "llvm/IR/IntrinsicsAArch64.h" 44 #include "llvm/Pass.h" 45 #include "llvm/Support/Debug.h" 46 #include "llvm/Support/raw_ostream.h" 47 48 #define DEBUG_TYPE "aarch64-isel" 49 50 using namespace llvm; 51 using namespace MIPatternMatch; 52 using namespace AArch64GISelUtils; 53 54 namespace llvm { 55 class BlockFrequencyInfo; 56 class ProfileSummaryInfo; 57 } 58 59 namespace { 60 61 #define GET_GLOBALISEL_PREDICATE_BITSET 62 #include "AArch64GenGlobalISel.inc" 63 #undef GET_GLOBALISEL_PREDICATE_BITSET 64 65 class AArch64InstructionSelector : public InstructionSelector { 66 public: 67 AArch64InstructionSelector(const AArch64TargetMachine &TM, 68 const AArch64Subtarget &STI, 69 const AArch64RegisterBankInfo &RBI); 70 71 bool select(MachineInstr &I) override; 72 static const char *getName() { return DEBUG_TYPE; } 73 74 void setupMF(MachineFunction &MF, GISelKnownBits *KB, 75 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI, 76 BlockFrequencyInfo *BFI) override { 77 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); 78 MIB.setMF(MF); 79 80 // hasFnAttribute() is expensive to call on every BRCOND selection, so 81 // cache it here for each run of the selector. 82 ProduceNonFlagSettingCondBr = 83 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); 84 MFReturnAddr = Register(); 85 86 processPHIs(MF); 87 } 88 89 private: 90 /// tblgen-erated 'select' implementation, used as the initial selector for 91 /// the patterns that don't require complex C++. 92 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 93 94 // A lowering phase that runs before any selection attempts. 95 // Returns true if the instruction was modified. 96 bool preISelLower(MachineInstr &I); 97 98 // An early selection function that runs before the selectImpl() call. 99 bool earlySelect(MachineInstr &I); 100 101 // Do some preprocessing of G_PHIs before we begin selection. 102 void processPHIs(MachineFunction &MF); 103 104 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); 105 106 /// Eliminate same-sized cross-bank copies into stores before selectImpl(). 107 bool contractCrossBankCopyIntoStore(MachineInstr &I, 108 MachineRegisterInfo &MRI); 109 110 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); 111 112 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, 113 MachineRegisterInfo &MRI) const; 114 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, 115 MachineRegisterInfo &MRI) const; 116 117 ///@{ 118 /// Helper functions for selectCompareBranch. 119 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, 120 MachineIRBuilder &MIB) const; 121 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 122 MachineIRBuilder &MIB) const; 123 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 124 MachineIRBuilder &MIB) const; 125 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, 126 MachineBasicBlock *DstMBB, 127 MachineIRBuilder &MIB) const; 128 ///@} 129 130 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, 131 MachineRegisterInfo &MRI); 132 133 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI); 134 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI); 135 136 // Helper to generate an equivalent of scalar_to_vector into a new register, 137 // returned via 'Dst'. 138 MachineInstr *emitScalarToVector(unsigned EltSize, 139 const TargetRegisterClass *DstRC, 140 Register Scalar, 141 MachineIRBuilder &MIRBuilder) const; 142 143 /// Emit a lane insert into \p DstReg, or a new vector register if None is 144 /// provided. 145 /// 146 /// The lane inserted into is defined by \p LaneIdx. The vector source 147 /// register is given by \p SrcReg. The register containing the element is 148 /// given by \p EltReg. 149 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg, 150 Register EltReg, unsigned LaneIdx, 151 const RegisterBank &RB, 152 MachineIRBuilder &MIRBuilder) const; 153 154 /// Emit a sequence of instructions representing a constant \p CV for a 155 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) 156 /// 157 /// \returns the last instruction in the sequence on success, and nullptr 158 /// otherwise. 159 MachineInstr *emitConstantVector(Register Dst, Constant *CV, 160 MachineIRBuilder &MIRBuilder, 161 MachineRegisterInfo &MRI); 162 163 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); 164 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, 165 MachineRegisterInfo &MRI); 166 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); 167 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); 168 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); 169 170 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI); 171 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI); 172 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI); 173 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI); 174 bool selectIntrinsicWithSideEffects(MachineInstr &I, 175 MachineRegisterInfo &MRI); 176 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); 177 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI); 178 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; 179 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; 180 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); 181 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); 182 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); 183 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); 184 185 unsigned emitConstantPoolEntry(const Constant *CPVal, 186 MachineFunction &MF) const; 187 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, 188 MachineIRBuilder &MIRBuilder) const; 189 190 // Emit a vector concat operation. 191 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1, 192 Register Op2, 193 MachineIRBuilder &MIRBuilder) const; 194 195 // Emit an integer compare between LHS and RHS, which checks for Predicate. 196 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 197 MachineOperand &Predicate, 198 MachineIRBuilder &MIRBuilder) const; 199 200 /// Emit a floating point comparison between \p LHS and \p RHS. 201 /// \p Pred if given is the intended predicate to use. 202 MachineInstr *emitFPCompare(Register LHS, Register RHS, 203 MachineIRBuilder &MIRBuilder, 204 Optional<CmpInst::Predicate> = None) const; 205 206 MachineInstr *emitInstr(unsigned Opcode, 207 std::initializer_list<llvm::DstOp> DstOps, 208 std::initializer_list<llvm::SrcOp> SrcOps, 209 MachineIRBuilder &MIRBuilder, 210 const ComplexRendererFns &RenderFns = None) const; 211 /// Helper function to emit an add or sub instruction. 212 /// 213 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above 214 /// in a specific order. 215 /// 216 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. 217 /// 218 /// \code 219 /// const std::array<std::array<unsigned, 2>, 4> Table { 220 /// {{AArch64::ADDXri, AArch64::ADDWri}, 221 /// {AArch64::ADDXrs, AArch64::ADDWrs}, 222 /// {AArch64::ADDXrr, AArch64::ADDWrr}, 223 /// {AArch64::SUBXri, AArch64::SUBWri}, 224 /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; 225 /// \endcode 226 /// 227 /// Each row in the table corresponds to a different addressing mode. Each 228 /// column corresponds to a different register size. 229 /// 230 /// \attention Rows must be structured as follows: 231 /// - Row 0: The ri opcode variants 232 /// - Row 1: The rs opcode variants 233 /// - Row 2: The rr opcode variants 234 /// - Row 3: The ri opcode variants for negative immediates 235 /// - Row 4: The rx opcode variants 236 /// 237 /// \attention Columns must be structured as follows: 238 /// - Column 0: The 64-bit opcode variants 239 /// - Column 1: The 32-bit opcode variants 240 /// 241 /// \p Dst is the destination register of the binop to emit. 242 /// \p LHS is the left-hand operand of the binop to emit. 243 /// \p RHS is the right-hand operand of the binop to emit. 244 MachineInstr *emitAddSub( 245 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 246 Register Dst, MachineOperand &LHS, MachineOperand &RHS, 247 MachineIRBuilder &MIRBuilder) const; 248 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, 249 MachineOperand &RHS, 250 MachineIRBuilder &MIRBuilder) const; 251 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 252 MachineIRBuilder &MIRBuilder) const; 253 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 254 MachineIRBuilder &MIRBuilder) const; 255 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, 256 MachineIRBuilder &MIRBuilder) const; 257 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, 258 MachineIRBuilder &MIRBuilder) const; 259 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, 260 AArch64CC::CondCode CC, 261 MachineIRBuilder &MIRBuilder) const; 262 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg, 263 const RegisterBank &DstRB, LLT ScalarTy, 264 Register VecReg, unsigned LaneIdx, 265 MachineIRBuilder &MIRBuilder) const; 266 267 /// Emit a CSet for an integer compare. 268 /// 269 /// \p DefReg and \p SrcReg are expected to be 32-bit scalar registers. 270 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, 271 MachineIRBuilder &MIRBuilder, 272 Register SrcReg = AArch64::WZR) const; 273 /// Emit a CSet for a FP compare. 274 /// 275 /// \p Dst is expected to be a 32-bit scalar register. 276 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, 277 MachineIRBuilder &MIRBuilder) const; 278 279 /// Emit the overflow op for \p Opcode. 280 /// 281 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, 282 /// G_USUBO, etc. 283 std::pair<MachineInstr *, AArch64CC::CondCode> 284 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, 285 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; 286 287 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. 288 /// \p IsNegative is true if the test should be "not zero". 289 /// This will also optimize the test bit instruction when possible. 290 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, 291 MachineBasicBlock *DstMBB, 292 MachineIRBuilder &MIB) const; 293 294 /// Emit a CB(N)Z instruction which branches to \p DestMBB. 295 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, 296 MachineBasicBlock *DestMBB, 297 MachineIRBuilder &MIB) const; 298 299 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. 300 // We use these manually instead of using the importer since it doesn't 301 // support SDNodeXForm. 302 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; 303 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; 304 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; 305 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; 306 307 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; 308 ComplexRendererFns selectArithImmed(MachineOperand &Root) const; 309 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; 310 311 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, 312 unsigned Size) const; 313 314 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { 315 return selectAddrModeUnscaled(Root, 1); 316 } 317 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { 318 return selectAddrModeUnscaled(Root, 2); 319 } 320 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { 321 return selectAddrModeUnscaled(Root, 4); 322 } 323 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { 324 return selectAddrModeUnscaled(Root, 8); 325 } 326 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { 327 return selectAddrModeUnscaled(Root, 16); 328 } 329 330 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used 331 /// from complex pattern matchers like selectAddrModeIndexed(). 332 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, 333 MachineRegisterInfo &MRI) const; 334 335 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, 336 unsigned Size) const; 337 template <int Width> 338 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { 339 return selectAddrModeIndexed(Root, Width / 8); 340 } 341 342 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, 343 const MachineRegisterInfo &MRI) const; 344 ComplexRendererFns 345 selectAddrModeShiftedExtendXReg(MachineOperand &Root, 346 unsigned SizeInBytes) const; 347 348 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether 349 /// or not a shift + extend should be folded into an addressing mode. Returns 350 /// None when this is not profitable or possible. 351 ComplexRendererFns 352 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, 353 MachineOperand &Offset, unsigned SizeInBytes, 354 bool WantsExt) const; 355 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; 356 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, 357 unsigned SizeInBytes) const; 358 template <int Width> 359 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { 360 return selectAddrModeXRO(Root, Width / 8); 361 } 362 363 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, 364 unsigned SizeInBytes) const; 365 template <int Width> 366 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { 367 return selectAddrModeWRO(Root, Width / 8); 368 } 369 370 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; 371 372 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { 373 return selectShiftedRegister(Root); 374 } 375 376 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { 377 // TODO: selectShiftedRegister should allow for rotates on logical shifts. 378 // For now, make them the same. The only difference between the two is that 379 // logical shifts are allowed to fold in rotates. Otherwise, these are 380 // functionally the same. 381 return selectShiftedRegister(Root); 382 } 383 384 /// Given an extend instruction, determine the correct shift-extend type for 385 /// that instruction. 386 /// 387 /// If the instruction is going to be used in a load or store, pass 388 /// \p IsLoadStore = true. 389 AArch64_AM::ShiftExtendType 390 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, 391 bool IsLoadStore = false) const; 392 393 /// Move \p Reg to \p RC if \p Reg is not already on \p RC. 394 /// 395 /// \returns Either \p Reg if no change was necessary, or the new register 396 /// created by moving \p Reg. 397 /// 398 /// Note: This uses emitCopy right now. 399 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, 400 MachineIRBuilder &MIB) const; 401 402 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; 403 404 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 405 int OpIdx = -1) const; 406 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, 407 int OpIdx = -1) const; 408 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, 409 int OpIdx = -1) const; 410 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI, 411 int OpIdx = -1) const; 412 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, 413 int OpIdx = -1) const; 414 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, 415 int OpIdx = -1) const; 416 417 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. 418 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); 419 420 // Optimization methods. 421 bool tryOptSelect(MachineInstr &MI); 422 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 423 MachineOperand &Predicate, 424 MachineIRBuilder &MIRBuilder) const; 425 426 /// Return true if \p MI is a load or store of \p NumBytes bytes. 427 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; 428 429 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit 430 /// register zeroed out. In other words, the result of MI has been explicitly 431 /// zero extended. 432 bool isDef32(const MachineInstr &MI) const; 433 434 const AArch64TargetMachine &TM; 435 const AArch64Subtarget &STI; 436 const AArch64InstrInfo &TII; 437 const AArch64RegisterInfo &TRI; 438 const AArch64RegisterBankInfo &RBI; 439 440 bool ProduceNonFlagSettingCondBr = false; 441 442 // Some cached values used during selection. 443 // We use LR as a live-in register, and we keep track of it here as it can be 444 // clobbered by calls. 445 Register MFReturnAddr; 446 447 MachineIRBuilder MIB; 448 449 #define GET_GLOBALISEL_PREDICATES_DECL 450 #include "AArch64GenGlobalISel.inc" 451 #undef GET_GLOBALISEL_PREDICATES_DECL 452 453 // We declare the temporaries used by selectImpl() in the class to minimize the 454 // cost of constructing placeholder values. 455 #define GET_GLOBALISEL_TEMPORARIES_DECL 456 #include "AArch64GenGlobalISel.inc" 457 #undef GET_GLOBALISEL_TEMPORARIES_DECL 458 }; 459 460 } // end anonymous namespace 461 462 #define GET_GLOBALISEL_IMPL 463 #include "AArch64GenGlobalISel.inc" 464 #undef GET_GLOBALISEL_IMPL 465 466 AArch64InstructionSelector::AArch64InstructionSelector( 467 const AArch64TargetMachine &TM, const AArch64Subtarget &STI, 468 const AArch64RegisterBankInfo &RBI) 469 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), 470 TRI(*STI.getRegisterInfo()), RBI(RBI), 471 #define GET_GLOBALISEL_PREDICATES_INIT 472 #include "AArch64GenGlobalISel.inc" 473 #undef GET_GLOBALISEL_PREDICATES_INIT 474 #define GET_GLOBALISEL_TEMPORARIES_INIT 475 #include "AArch64GenGlobalISel.inc" 476 #undef GET_GLOBALISEL_TEMPORARIES_INIT 477 { 478 } 479 480 // FIXME: This should be target-independent, inferred from the types declared 481 // for each class in the bank. 482 static const TargetRegisterClass * 483 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, 484 const RegisterBankInfo &RBI, 485 bool GetAllRegSet = false) { 486 if (RB.getID() == AArch64::GPRRegBankID) { 487 if (Ty.getSizeInBits() <= 32) 488 return GetAllRegSet ? &AArch64::GPR32allRegClass 489 : &AArch64::GPR32RegClass; 490 if (Ty.getSizeInBits() == 64) 491 return GetAllRegSet ? &AArch64::GPR64allRegClass 492 : &AArch64::GPR64RegClass; 493 if (Ty.getSizeInBits() == 128) 494 return &AArch64::XSeqPairsClassRegClass; 495 return nullptr; 496 } 497 498 if (RB.getID() == AArch64::FPRRegBankID) { 499 if (Ty.getSizeInBits() <= 16) 500 return &AArch64::FPR16RegClass; 501 if (Ty.getSizeInBits() == 32) 502 return &AArch64::FPR32RegClass; 503 if (Ty.getSizeInBits() == 64) 504 return &AArch64::FPR64RegClass; 505 if (Ty.getSizeInBits() == 128) 506 return &AArch64::FPR128RegClass; 507 return nullptr; 508 } 509 510 return nullptr; 511 } 512 513 /// Given a register bank, and size in bits, return the smallest register class 514 /// that can represent that combination. 515 static const TargetRegisterClass * 516 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, 517 bool GetAllRegSet = false) { 518 unsigned RegBankID = RB.getID(); 519 520 if (RegBankID == AArch64::GPRRegBankID) { 521 if (SizeInBits <= 32) 522 return GetAllRegSet ? &AArch64::GPR32allRegClass 523 : &AArch64::GPR32RegClass; 524 if (SizeInBits == 64) 525 return GetAllRegSet ? &AArch64::GPR64allRegClass 526 : &AArch64::GPR64RegClass; 527 if (SizeInBits == 128) 528 return &AArch64::XSeqPairsClassRegClass; 529 } 530 531 if (RegBankID == AArch64::FPRRegBankID) { 532 switch (SizeInBits) { 533 default: 534 return nullptr; 535 case 8: 536 return &AArch64::FPR8RegClass; 537 case 16: 538 return &AArch64::FPR16RegClass; 539 case 32: 540 return &AArch64::FPR32RegClass; 541 case 64: 542 return &AArch64::FPR64RegClass; 543 case 128: 544 return &AArch64::FPR128RegClass; 545 } 546 } 547 548 return nullptr; 549 } 550 551 /// Returns the correct subregister to use for a given register class. 552 static bool getSubRegForClass(const TargetRegisterClass *RC, 553 const TargetRegisterInfo &TRI, unsigned &SubReg) { 554 switch (TRI.getRegSizeInBits(*RC)) { 555 case 8: 556 SubReg = AArch64::bsub; 557 break; 558 case 16: 559 SubReg = AArch64::hsub; 560 break; 561 case 32: 562 if (RC != &AArch64::FPR32RegClass) 563 SubReg = AArch64::sub_32; 564 else 565 SubReg = AArch64::ssub; 566 break; 567 case 64: 568 SubReg = AArch64::dsub; 569 break; 570 default: 571 LLVM_DEBUG( 572 dbgs() << "Couldn't find appropriate subregister for register class."); 573 return false; 574 } 575 576 return true; 577 } 578 579 /// Returns the minimum size the given register bank can hold. 580 static unsigned getMinSizeForRegBank(const RegisterBank &RB) { 581 switch (RB.getID()) { 582 case AArch64::GPRRegBankID: 583 return 32; 584 case AArch64::FPRRegBankID: 585 return 8; 586 default: 587 llvm_unreachable("Tried to get minimum size for unknown register bank."); 588 } 589 } 590 591 /// Create a REG_SEQUENCE instruction using the registers in \p Regs. 592 /// Helper function for functions like createDTuple and createQTuple. 593 /// 594 /// \p RegClassIDs - The list of register class IDs available for some tuple of 595 /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is 596 /// expected to contain between 2 and 4 tuple classes. 597 /// 598 /// \p SubRegs - The list of subregister classes associated with each register 599 /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 600 /// subregister class. The index of each subregister class is expected to 601 /// correspond with the index of each register class. 602 /// 603 /// \returns Either the destination register of REG_SEQUENCE instruction that 604 /// was created, or the 0th element of \p Regs if \p Regs contains a single 605 /// element. 606 static Register createTuple(ArrayRef<Register> Regs, 607 const unsigned RegClassIDs[], 608 const unsigned SubRegs[], MachineIRBuilder &MIB) { 609 unsigned NumRegs = Regs.size(); 610 if (NumRegs == 1) 611 return Regs[0]; 612 assert(NumRegs >= 2 && NumRegs <= 4 && 613 "Only support between two and 4 registers in a tuple!"); 614 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); 615 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]); 616 auto RegSequence = 617 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {}); 618 for (unsigned I = 0, E = Regs.size(); I < E; ++I) { 619 RegSequence.addUse(Regs[I]); 620 RegSequence.addImm(SubRegs[I]); 621 } 622 return RegSequence.getReg(0); 623 } 624 625 /// Create a tuple of D-registers using the registers in \p Regs. 626 static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { 627 static const unsigned RegClassIDs[] = { 628 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 629 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 630 AArch64::dsub2, AArch64::dsub3}; 631 return createTuple(Regs, RegClassIDs, SubRegs, MIB); 632 } 633 634 /// Create a tuple of Q-registers using the registers in \p Regs. 635 static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { 636 static const unsigned RegClassIDs[] = { 637 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 638 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 639 AArch64::qsub2, AArch64::qsub3}; 640 return createTuple(Regs, RegClassIDs, SubRegs, MIB); 641 } 642 643 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { 644 auto &MI = *Root.getParent(); 645 auto &MBB = *MI.getParent(); 646 auto &MF = *MBB.getParent(); 647 auto &MRI = MF.getRegInfo(); 648 uint64_t Immed; 649 if (Root.isImm()) 650 Immed = Root.getImm(); 651 else if (Root.isCImm()) 652 Immed = Root.getCImm()->getZExtValue(); 653 else if (Root.isReg()) { 654 auto ValAndVReg = 655 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); 656 if (!ValAndVReg) 657 return None; 658 Immed = ValAndVReg->Value.getSExtValue(); 659 } else 660 return None; 661 return Immed; 662 } 663 664 /// Check whether \p I is a currently unsupported binary operation: 665 /// - it has an unsized type 666 /// - an operand is not a vreg 667 /// - all operands are not in the same bank 668 /// These are checks that should someday live in the verifier, but right now, 669 /// these are mostly limitations of the aarch64 selector. 670 static bool unsupportedBinOp(const MachineInstr &I, 671 const AArch64RegisterBankInfo &RBI, 672 const MachineRegisterInfo &MRI, 673 const AArch64RegisterInfo &TRI) { 674 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 675 if (!Ty.isValid()) { 676 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n"); 677 return true; 678 } 679 680 const RegisterBank *PrevOpBank = nullptr; 681 for (auto &MO : I.operands()) { 682 // FIXME: Support non-register operands. 683 if (!MO.isReg()) { 684 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n"); 685 return true; 686 } 687 688 // FIXME: Can generic operations have physical registers operands? If 689 // so, this will need to be taught about that, and we'll need to get the 690 // bank out of the minimal class for the register. 691 // Either way, this needs to be documented (and possibly verified). 692 if (!Register::isVirtualRegister(MO.getReg())) { 693 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n"); 694 return true; 695 } 696 697 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); 698 if (!OpBank) { 699 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n"); 700 return true; 701 } 702 703 if (PrevOpBank && OpBank != PrevOpBank) { 704 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n"); 705 return true; 706 } 707 PrevOpBank = OpBank; 708 } 709 return false; 710 } 711 712 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc 713 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID 714 /// and of size \p OpSize. 715 /// \returns \p GenericOpc if the combination is unsupported. 716 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, 717 unsigned OpSize) { 718 switch (RegBankID) { 719 case AArch64::GPRRegBankID: 720 if (OpSize == 32) { 721 switch (GenericOpc) { 722 case TargetOpcode::G_SHL: 723 return AArch64::LSLVWr; 724 case TargetOpcode::G_LSHR: 725 return AArch64::LSRVWr; 726 case TargetOpcode::G_ASHR: 727 return AArch64::ASRVWr; 728 default: 729 return GenericOpc; 730 } 731 } else if (OpSize == 64) { 732 switch (GenericOpc) { 733 case TargetOpcode::G_PTR_ADD: 734 return AArch64::ADDXrr; 735 case TargetOpcode::G_SHL: 736 return AArch64::LSLVXr; 737 case TargetOpcode::G_LSHR: 738 return AArch64::LSRVXr; 739 case TargetOpcode::G_ASHR: 740 return AArch64::ASRVXr; 741 default: 742 return GenericOpc; 743 } 744 } 745 break; 746 case AArch64::FPRRegBankID: 747 switch (OpSize) { 748 case 32: 749 switch (GenericOpc) { 750 case TargetOpcode::G_FADD: 751 return AArch64::FADDSrr; 752 case TargetOpcode::G_FSUB: 753 return AArch64::FSUBSrr; 754 case TargetOpcode::G_FMUL: 755 return AArch64::FMULSrr; 756 case TargetOpcode::G_FDIV: 757 return AArch64::FDIVSrr; 758 default: 759 return GenericOpc; 760 } 761 case 64: 762 switch (GenericOpc) { 763 case TargetOpcode::G_FADD: 764 return AArch64::FADDDrr; 765 case TargetOpcode::G_FSUB: 766 return AArch64::FSUBDrr; 767 case TargetOpcode::G_FMUL: 768 return AArch64::FMULDrr; 769 case TargetOpcode::G_FDIV: 770 return AArch64::FDIVDrr; 771 case TargetOpcode::G_OR: 772 return AArch64::ORRv8i8; 773 default: 774 return GenericOpc; 775 } 776 } 777 break; 778 } 779 return GenericOpc; 780 } 781 782 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, 783 /// appropriate for the (value) register bank \p RegBankID and of memory access 784 /// size \p OpSize. This returns the variant with the base+unsigned-immediate 785 /// addressing mode (e.g., LDRXui). 786 /// \returns \p GenericOpc if the combination is unsupported. 787 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, 788 unsigned OpSize) { 789 const bool isStore = GenericOpc == TargetOpcode::G_STORE; 790 switch (RegBankID) { 791 case AArch64::GPRRegBankID: 792 switch (OpSize) { 793 case 8: 794 return isStore ? AArch64::STRBBui : AArch64::LDRBBui; 795 case 16: 796 return isStore ? AArch64::STRHHui : AArch64::LDRHHui; 797 case 32: 798 return isStore ? AArch64::STRWui : AArch64::LDRWui; 799 case 64: 800 return isStore ? AArch64::STRXui : AArch64::LDRXui; 801 } 802 break; 803 case AArch64::FPRRegBankID: 804 switch (OpSize) { 805 case 8: 806 return isStore ? AArch64::STRBui : AArch64::LDRBui; 807 case 16: 808 return isStore ? AArch64::STRHui : AArch64::LDRHui; 809 case 32: 810 return isStore ? AArch64::STRSui : AArch64::LDRSui; 811 case 64: 812 return isStore ? AArch64::STRDui : AArch64::LDRDui; 813 } 814 break; 815 } 816 return GenericOpc; 817 } 818 819 #ifndef NDEBUG 820 /// Helper function that verifies that we have a valid copy at the end of 821 /// selectCopy. Verifies that the source and dest have the expected sizes and 822 /// then returns true. 823 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, 824 const MachineRegisterInfo &MRI, 825 const TargetRegisterInfo &TRI, 826 const RegisterBankInfo &RBI) { 827 const Register DstReg = I.getOperand(0).getReg(); 828 const Register SrcReg = I.getOperand(1).getReg(); 829 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 830 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 831 832 // Make sure the size of the source and dest line up. 833 assert( 834 (DstSize == SrcSize || 835 // Copies are a mean to setup initial types, the number of 836 // bits may not exactly match. 837 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || 838 // Copies are a mean to copy bits around, as long as we are 839 // on the same register class, that's fine. Otherwise, that 840 // means we need some SUBREG_TO_REG or AND & co. 841 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && 842 "Copy with different width?!"); 843 844 // Check the size of the destination. 845 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && 846 "GPRs cannot get more than 64-bit width values"); 847 848 return true; 849 } 850 #endif 851 852 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg 853 /// to \p *To. 854 /// 855 /// E.g "To = COPY SrcReg:SubReg" 856 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, 857 const RegisterBankInfo &RBI, Register SrcReg, 858 const TargetRegisterClass *To, unsigned SubReg) { 859 assert(SrcReg.isValid() && "Expected a valid source register?"); 860 assert(To && "Destination register class cannot be null"); 861 assert(SubReg && "Expected a valid subregister"); 862 863 MachineIRBuilder MIB(I); 864 auto SubRegCopy = 865 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg); 866 MachineOperand &RegOp = I.getOperand(1); 867 RegOp.setReg(SubRegCopy.getReg(0)); 868 869 // It's possible that the destination register won't be constrained. Make 870 // sure that happens. 871 if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) 872 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); 873 874 return true; 875 } 876 877 /// Helper function to get the source and destination register classes for a 878 /// copy. Returns a std::pair containing the source register class for the 879 /// copy, and the destination register class for the copy. If a register class 880 /// cannot be determined, then it will be nullptr. 881 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 882 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, 883 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 884 const RegisterBankInfo &RBI) { 885 Register DstReg = I.getOperand(0).getReg(); 886 Register SrcReg = I.getOperand(1).getReg(); 887 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 888 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 889 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 890 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 891 892 // Special casing for cross-bank copies of s1s. We can technically represent 893 // a 1-bit value with any size of register. The minimum size for a GPR is 32 894 // bits. So, we need to put the FPR on 32 bits as well. 895 // 896 // FIXME: I'm not sure if this case holds true outside of copies. If it does, 897 // then we can pull it into the helpers that get the appropriate class for a 898 // register bank. Or make a new helper that carries along some constraint 899 // information. 900 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) 901 SrcSize = DstSize = 32; 902 903 return {getMinClassForRegBank(SrcRegBank, SrcSize, true), 904 getMinClassForRegBank(DstRegBank, DstSize, true)}; 905 } 906 907 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, 908 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 909 const RegisterBankInfo &RBI) { 910 Register DstReg = I.getOperand(0).getReg(); 911 Register SrcReg = I.getOperand(1).getReg(); 912 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 913 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 914 915 // Find the correct register classes for the source and destination registers. 916 const TargetRegisterClass *SrcRC; 917 const TargetRegisterClass *DstRC; 918 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); 919 920 if (!DstRC) { 921 LLVM_DEBUG(dbgs() << "Unexpected dest size " 922 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); 923 return false; 924 } 925 926 // A couple helpers below, for making sure that the copy we produce is valid. 927 928 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want 929 // to verify that the src and dst are the same size, since that's handled by 930 // the SUBREG_TO_REG. 931 bool KnownValid = false; 932 933 // Returns true, or asserts if something we don't expect happens. Instead of 934 // returning true, we return isValidCopy() to ensure that we verify the 935 // result. 936 auto CheckCopy = [&]() { 937 // If we have a bitcast or something, we can't have physical registers. 938 assert((I.isCopy() || 939 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && 940 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && 941 "No phys reg on generic operator!"); 942 bool ValidCopy = true; 943 #ifndef NDEBUG 944 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI); 945 assert(ValidCopy && "Invalid copy."); 946 #endif 947 (void)KnownValid; 948 return ValidCopy; 949 }; 950 951 // Is this a copy? If so, then we may need to insert a subregister copy. 952 if (I.isCopy()) { 953 // Yes. Check if there's anything to fix up. 954 if (!SrcRC) { 955 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n"); 956 return false; 957 } 958 959 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); 960 unsigned DstSize = TRI.getRegSizeInBits(*DstRC); 961 unsigned SubReg; 962 963 // If the source bank doesn't support a subregister copy small enough, 964 // then we first need to copy to the destination bank. 965 if (getMinSizeForRegBank(SrcRegBank) > DstSize) { 966 const TargetRegisterClass *DstTempRC = 967 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true); 968 getSubRegForClass(DstRC, TRI, SubReg); 969 970 MachineIRBuilder MIB(I); 971 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg}); 972 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); 973 } else if (SrcSize > DstSize) { 974 // If the source register is bigger than the destination we need to 975 // perform a subregister copy. 976 const TargetRegisterClass *SubRegRC = 977 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 978 getSubRegForClass(SubRegRC, TRI, SubReg); 979 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); 980 } else if (DstSize > SrcSize) { 981 // If the destination register is bigger than the source we need to do 982 // a promotion using SUBREG_TO_REG. 983 const TargetRegisterClass *PromotionRC = 984 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 985 getSubRegForClass(SrcRC, TRI, SubReg); 986 987 Register PromoteReg = MRI.createVirtualRegister(PromotionRC); 988 BuildMI(*I.getParent(), I, I.getDebugLoc(), 989 TII.get(AArch64::SUBREG_TO_REG), PromoteReg) 990 .addImm(0) 991 .addUse(SrcReg) 992 .addImm(SubReg); 993 MachineOperand &RegOp = I.getOperand(1); 994 RegOp.setReg(PromoteReg); 995 996 // Promise that the copy is implicitly validated by the SUBREG_TO_REG. 997 KnownValid = true; 998 } 999 1000 // If the destination is a physical register, then there's nothing to 1001 // change, so we're done. 1002 if (Register::isPhysicalRegister(DstReg)) 1003 return CheckCopy(); 1004 } 1005 1006 // No need to constrain SrcReg. It will get constrained when we hit another 1007 // of its use or its defs. Copies do not have constraints. 1008 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1009 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1010 << " operand\n"); 1011 return false; 1012 } 1013 1014 // If this a GPR ZEXT that we want to just reduce down into a copy. 1015 // The sizes will be mismatched with the source < 32b but that's ok. 1016 if (I.getOpcode() == TargetOpcode::G_ZEXT) { 1017 I.setDesc(TII.get(AArch64::COPY)); 1018 assert(SrcRegBank.getID() == AArch64::GPRRegBankID); 1019 return selectCopy(I, TII, MRI, TRI, RBI); 1020 } 1021 1022 I.setDesc(TII.get(AArch64::COPY)); 1023 return CheckCopy(); 1024 } 1025 1026 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { 1027 if (!DstTy.isScalar() || !SrcTy.isScalar()) 1028 return GenericOpc; 1029 1030 const unsigned DstSize = DstTy.getSizeInBits(); 1031 const unsigned SrcSize = SrcTy.getSizeInBits(); 1032 1033 switch (DstSize) { 1034 case 32: 1035 switch (SrcSize) { 1036 case 32: 1037 switch (GenericOpc) { 1038 case TargetOpcode::G_SITOFP: 1039 return AArch64::SCVTFUWSri; 1040 case TargetOpcode::G_UITOFP: 1041 return AArch64::UCVTFUWSri; 1042 case TargetOpcode::G_FPTOSI: 1043 return AArch64::FCVTZSUWSr; 1044 case TargetOpcode::G_FPTOUI: 1045 return AArch64::FCVTZUUWSr; 1046 default: 1047 return GenericOpc; 1048 } 1049 case 64: 1050 switch (GenericOpc) { 1051 case TargetOpcode::G_SITOFP: 1052 return AArch64::SCVTFUXSri; 1053 case TargetOpcode::G_UITOFP: 1054 return AArch64::UCVTFUXSri; 1055 case TargetOpcode::G_FPTOSI: 1056 return AArch64::FCVTZSUWDr; 1057 case TargetOpcode::G_FPTOUI: 1058 return AArch64::FCVTZUUWDr; 1059 default: 1060 return GenericOpc; 1061 } 1062 default: 1063 return GenericOpc; 1064 } 1065 case 64: 1066 switch (SrcSize) { 1067 case 32: 1068 switch (GenericOpc) { 1069 case TargetOpcode::G_SITOFP: 1070 return AArch64::SCVTFUWDri; 1071 case TargetOpcode::G_UITOFP: 1072 return AArch64::UCVTFUWDri; 1073 case TargetOpcode::G_FPTOSI: 1074 return AArch64::FCVTZSUXSr; 1075 case TargetOpcode::G_FPTOUI: 1076 return AArch64::FCVTZUUXSr; 1077 default: 1078 return GenericOpc; 1079 } 1080 case 64: 1081 switch (GenericOpc) { 1082 case TargetOpcode::G_SITOFP: 1083 return AArch64::SCVTFUXDri; 1084 case TargetOpcode::G_UITOFP: 1085 return AArch64::UCVTFUXDri; 1086 case TargetOpcode::G_FPTOSI: 1087 return AArch64::FCVTZSUXDr; 1088 case TargetOpcode::G_FPTOUI: 1089 return AArch64::FCVTZUUXDr; 1090 default: 1091 return GenericOpc; 1092 } 1093 default: 1094 return GenericOpc; 1095 } 1096 default: 1097 return GenericOpc; 1098 }; 1099 return GenericOpc; 1100 } 1101 1102 MachineInstr * 1103 AArch64InstructionSelector::emitSelect(Register Dst, Register True, 1104 Register False, AArch64CC::CondCode CC, 1105 MachineIRBuilder &MIB) const { 1106 MachineRegisterInfo &MRI = *MIB.getMRI(); 1107 assert(RBI.getRegBank(False, MRI, TRI)->getID() == 1108 RBI.getRegBank(True, MRI, TRI)->getID() && 1109 "Expected both select operands to have the same regbank?"); 1110 LLT Ty = MRI.getType(True); 1111 if (Ty.isVector()) 1112 return nullptr; 1113 const unsigned Size = Ty.getSizeInBits(); 1114 assert((Size == 32 || Size == 64) && 1115 "Expected 32 bit or 64 bit select only?"); 1116 const bool Is32Bit = Size == 32; 1117 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { 1118 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; 1119 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 1120 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); 1121 return &*FCSel; 1122 } 1123 1124 // By default, we'll try and emit a CSEL. 1125 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; 1126 bool Optimized = false; 1127 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, 1128 &Optimized](Register &Reg, Register &OtherReg, 1129 bool Invert) { 1130 if (Optimized) 1131 return false; 1132 1133 // Attempt to fold: 1134 // 1135 // %sub = G_SUB 0, %x 1136 // %select = G_SELECT cc, %reg, %sub 1137 // 1138 // Into: 1139 // %select = CSNEG %reg, %x, cc 1140 Register MatchReg; 1141 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { 1142 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; 1143 Reg = MatchReg; 1144 if (Invert) { 1145 CC = AArch64CC::getInvertedCondCode(CC); 1146 std::swap(Reg, OtherReg); 1147 } 1148 return true; 1149 } 1150 1151 // Attempt to fold: 1152 // 1153 // %xor = G_XOR %x, -1 1154 // %select = G_SELECT cc, %reg, %xor 1155 // 1156 // Into: 1157 // %select = CSINV %reg, %x, cc 1158 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { 1159 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1160 Reg = MatchReg; 1161 if (Invert) { 1162 CC = AArch64CC::getInvertedCondCode(CC); 1163 std::swap(Reg, OtherReg); 1164 } 1165 return true; 1166 } 1167 1168 // Attempt to fold: 1169 // 1170 // %add = G_ADD %x, 1 1171 // %select = G_SELECT cc, %reg, %add 1172 // 1173 // Into: 1174 // %select = CSINC %reg, %x, cc 1175 if (mi_match(Reg, MRI, 1176 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)), 1177 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) { 1178 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1179 Reg = MatchReg; 1180 if (Invert) { 1181 CC = AArch64CC::getInvertedCondCode(CC); 1182 std::swap(Reg, OtherReg); 1183 } 1184 return true; 1185 } 1186 1187 return false; 1188 }; 1189 1190 // Helper lambda which tries to use CSINC/CSINV for the instruction when its 1191 // true/false values are constants. 1192 // FIXME: All of these patterns already exist in tablegen. We should be 1193 // able to import these. 1194 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, 1195 &Optimized]() { 1196 if (Optimized) 1197 return false; 1198 auto TrueCst = getConstantVRegValWithLookThrough(True, MRI); 1199 auto FalseCst = getConstantVRegValWithLookThrough(False, MRI); 1200 if (!TrueCst && !FalseCst) 1201 return false; 1202 1203 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 1204 if (TrueCst && FalseCst) { 1205 int64_t T = TrueCst->Value.getSExtValue(); 1206 int64_t F = FalseCst->Value.getSExtValue(); 1207 1208 if (T == 0 && F == 1) { 1209 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc 1210 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1211 True = ZReg; 1212 False = ZReg; 1213 return true; 1214 } 1215 1216 if (T == 0 && F == -1) { 1217 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc 1218 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1219 True = ZReg; 1220 False = ZReg; 1221 return true; 1222 } 1223 } 1224 1225 if (TrueCst) { 1226 int64_t T = TrueCst->Value.getSExtValue(); 1227 if (T == 1) { 1228 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc 1229 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1230 True = False; 1231 False = ZReg; 1232 CC = AArch64CC::getInvertedCondCode(CC); 1233 return true; 1234 } 1235 1236 if (T == -1) { 1237 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc 1238 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1239 True = False; 1240 False = ZReg; 1241 CC = AArch64CC::getInvertedCondCode(CC); 1242 return true; 1243 } 1244 } 1245 1246 if (FalseCst) { 1247 int64_t F = FalseCst->Value.getSExtValue(); 1248 if (F == 1) { 1249 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc 1250 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1251 False = ZReg; 1252 return true; 1253 } 1254 1255 if (F == -1) { 1256 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc 1257 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1258 False = ZReg; 1259 return true; 1260 } 1261 } 1262 return false; 1263 }; 1264 1265 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); 1266 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); 1267 Optimized |= TryOptSelectCst(); 1268 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 1269 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); 1270 return &*SelectInst; 1271 } 1272 1273 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { 1274 switch (P) { 1275 default: 1276 llvm_unreachable("Unknown condition code!"); 1277 case CmpInst::ICMP_NE: 1278 return AArch64CC::NE; 1279 case CmpInst::ICMP_EQ: 1280 return AArch64CC::EQ; 1281 case CmpInst::ICMP_SGT: 1282 return AArch64CC::GT; 1283 case CmpInst::ICMP_SGE: 1284 return AArch64CC::GE; 1285 case CmpInst::ICMP_SLT: 1286 return AArch64CC::LT; 1287 case CmpInst::ICMP_SLE: 1288 return AArch64CC::LE; 1289 case CmpInst::ICMP_UGT: 1290 return AArch64CC::HI; 1291 case CmpInst::ICMP_UGE: 1292 return AArch64CC::HS; 1293 case CmpInst::ICMP_ULT: 1294 return AArch64CC::LO; 1295 case CmpInst::ICMP_ULE: 1296 return AArch64CC::LS; 1297 } 1298 } 1299 1300 /// Return a register which can be used as a bit to test in a TB(N)Z. 1301 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, 1302 MachineRegisterInfo &MRI) { 1303 assert(Reg.isValid() && "Expected valid register!"); 1304 bool HasZext = false; 1305 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { 1306 unsigned Opc = MI->getOpcode(); 1307 1308 if (!MI->getOperand(0).isReg() || 1309 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1310 break; 1311 1312 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. 1313 // 1314 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number 1315 // on the truncated x is the same as the bit number on x. 1316 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || 1317 Opc == TargetOpcode::G_TRUNC) { 1318 if (Opc == TargetOpcode::G_ZEXT) 1319 HasZext = true; 1320 1321 Register NextReg = MI->getOperand(1).getReg(); 1322 // Did we find something worth folding? 1323 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg)) 1324 break; 1325 1326 // NextReg is worth folding. Keep looking. 1327 Reg = NextReg; 1328 continue; 1329 } 1330 1331 // Attempt to find a suitable operation with a constant on one side. 1332 Optional<uint64_t> C; 1333 Register TestReg; 1334 switch (Opc) { 1335 default: 1336 break; 1337 case TargetOpcode::G_AND: 1338 case TargetOpcode::G_XOR: { 1339 TestReg = MI->getOperand(1).getReg(); 1340 Register ConstantReg = MI->getOperand(2).getReg(); 1341 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); 1342 if (!VRegAndVal) { 1343 // AND commutes, check the other side for a constant. 1344 // FIXME: Can we canonicalize the constant so that it's always on the 1345 // same side at some point earlier? 1346 std::swap(ConstantReg, TestReg); 1347 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); 1348 } 1349 if (VRegAndVal) { 1350 if (HasZext) 1351 C = VRegAndVal->Value.getZExtValue(); 1352 else 1353 C = VRegAndVal->Value.getSExtValue(); 1354 } 1355 break; 1356 } 1357 case TargetOpcode::G_ASHR: 1358 case TargetOpcode::G_LSHR: 1359 case TargetOpcode::G_SHL: { 1360 TestReg = MI->getOperand(1).getReg(); 1361 auto VRegAndVal = 1362 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 1363 if (VRegAndVal) 1364 C = VRegAndVal->Value.getSExtValue(); 1365 break; 1366 } 1367 } 1368 1369 // Didn't find a constant or viable register. Bail out of the loop. 1370 if (!C || !TestReg.isValid()) 1371 break; 1372 1373 // We found a suitable instruction with a constant. Check to see if we can 1374 // walk through the instruction. 1375 Register NextReg; 1376 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits(); 1377 switch (Opc) { 1378 default: 1379 break; 1380 case TargetOpcode::G_AND: 1381 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. 1382 if ((*C >> Bit) & 1) 1383 NextReg = TestReg; 1384 break; 1385 case TargetOpcode::G_SHL: 1386 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in 1387 // the type of the register. 1388 if (*C <= Bit && (Bit - *C) < TestRegSize) { 1389 NextReg = TestReg; 1390 Bit = Bit - *C; 1391 } 1392 break; 1393 case TargetOpcode::G_ASHR: 1394 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits 1395 // in x 1396 NextReg = TestReg; 1397 Bit = Bit + *C; 1398 if (Bit >= TestRegSize) 1399 Bit = TestRegSize - 1; 1400 break; 1401 case TargetOpcode::G_LSHR: 1402 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x 1403 if ((Bit + *C) < TestRegSize) { 1404 NextReg = TestReg; 1405 Bit = Bit + *C; 1406 } 1407 break; 1408 case TargetOpcode::G_XOR: 1409 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when 1410 // appropriate. 1411 // 1412 // e.g. If x' = xor x, c, and the b-th bit is set in c then 1413 // 1414 // tbz x', b -> tbnz x, b 1415 // 1416 // Because x' only has the b-th bit set if x does not. 1417 if ((*C >> Bit) & 1) 1418 Invert = !Invert; 1419 NextReg = TestReg; 1420 break; 1421 } 1422 1423 // Check if we found anything worth folding. 1424 if (!NextReg.isValid()) 1425 return Reg; 1426 Reg = NextReg; 1427 } 1428 1429 return Reg; 1430 } 1431 1432 MachineInstr *AArch64InstructionSelector::emitTestBit( 1433 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, 1434 MachineIRBuilder &MIB) const { 1435 assert(TestReg.isValid()); 1436 assert(ProduceNonFlagSettingCondBr && 1437 "Cannot emit TB(N)Z with speculation tracking!"); 1438 MachineRegisterInfo &MRI = *MIB.getMRI(); 1439 1440 // Attempt to optimize the test bit by walking over instructions. 1441 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI); 1442 LLT Ty = MRI.getType(TestReg); 1443 unsigned Size = Ty.getSizeInBits(); 1444 assert(!Ty.isVector() && "Expected a scalar!"); 1445 assert(Bit < 64 && "Bit is too large!"); 1446 1447 // When the test register is a 64-bit register, we have to narrow to make 1448 // TBNZW work. 1449 bool UseWReg = Bit < 32; 1450 unsigned NecessarySize = UseWReg ? 32 : 64; 1451 if (Size != NecessarySize) 1452 TestReg = moveScalarRegClass( 1453 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, 1454 MIB); 1455 1456 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, 1457 {AArch64::TBZW, AArch64::TBNZW}}; 1458 unsigned Opc = OpcTable[UseWReg][IsNegative]; 1459 auto TestBitMI = 1460 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); 1461 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); 1462 return &*TestBitMI; 1463 } 1464 1465 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( 1466 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, 1467 MachineIRBuilder &MIB) const { 1468 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"); 1469 // Given something like this: 1470 // 1471 // %x = ...Something... 1472 // %one = G_CONSTANT i64 1 1473 // %zero = G_CONSTANT i64 0 1474 // %and = G_AND %x, %one 1475 // %cmp = G_ICMP intpred(ne), %and, %zero 1476 // %cmp_trunc = G_TRUNC %cmp 1477 // G_BRCOND %cmp_trunc, %bb.3 1478 // 1479 // We want to try and fold the AND into the G_BRCOND and produce either a 1480 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). 1481 // 1482 // In this case, we'd get 1483 // 1484 // TBNZ %x %bb.3 1485 // 1486 1487 // Check if the AND has a constant on its RHS which we can use as a mask. 1488 // If it's a power of 2, then it's the same as checking a specific bit. 1489 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) 1490 auto MaybeBit = getConstantVRegValWithLookThrough( 1491 AndInst.getOperand(2).getReg(), *MIB.getMRI()); 1492 if (!MaybeBit) 1493 return false; 1494 1495 int32_t Bit = MaybeBit->Value.exactLogBase2(); 1496 if (Bit < 0) 1497 return false; 1498 1499 Register TestReg = AndInst.getOperand(1).getReg(); 1500 1501 // Emit a TB(N)Z. 1502 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB); 1503 return true; 1504 } 1505 1506 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, 1507 bool IsNegative, 1508 MachineBasicBlock *DestMBB, 1509 MachineIRBuilder &MIB) const { 1510 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!"); 1511 MachineRegisterInfo &MRI = *MIB.getMRI(); 1512 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == 1513 AArch64::GPRRegBankID && 1514 "Expected GPRs only?"); 1515 auto Ty = MRI.getType(CompareReg); 1516 unsigned Width = Ty.getSizeInBits(); 1517 assert(!Ty.isVector() && "Expected scalar only?"); 1518 assert(Width <= 64 && "Expected width to be at most 64?"); 1519 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, 1520 {AArch64::CBNZW, AArch64::CBNZX}}; 1521 unsigned Opc = OpcTable[IsNegative][Width == 64]; 1522 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); 1523 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); 1524 return &*BranchMI; 1525 } 1526 1527 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( 1528 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { 1529 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); 1530 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1531 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't 1532 // totally clean. Some of them require two branches to implement. 1533 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); 1534 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, 1535 Pred); 1536 AArch64CC::CondCode CC1, CC2; 1537 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); 1538 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1539 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); 1540 if (CC2 != AArch64CC::AL) 1541 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); 1542 I.eraseFromParent(); 1543 return true; 1544 } 1545 1546 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( 1547 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 1548 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 1549 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1550 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. 1551 // 1552 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1553 // instructions will not be produced, as they are conditional branch 1554 // instructions that do not set flags. 1555 if (!ProduceNonFlagSettingCondBr) 1556 return false; 1557 1558 MachineRegisterInfo &MRI = *MIB.getMRI(); 1559 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1560 auto Pred = 1561 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); 1562 Register LHS = ICmp.getOperand(2).getReg(); 1563 Register RHS = ICmp.getOperand(3).getReg(); 1564 1565 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. 1566 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 1567 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 1568 1569 // When we can emit a TB(N)Z, prefer that. 1570 // 1571 // Handle non-commutative condition codes first. 1572 // Note that we don't want to do this when we have a G_AND because it can 1573 // become a tst. The tst will make the test bit in the TB(N)Z redundant. 1574 if (VRegAndVal && !AndInst) { 1575 int64_t C = VRegAndVal->Value.getSExtValue(); 1576 1577 // When we have a greater-than comparison, we can just test if the msb is 1578 // zero. 1579 if (C == -1 && Pred == CmpInst::ICMP_SGT) { 1580 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1581 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); 1582 I.eraseFromParent(); 1583 return true; 1584 } 1585 1586 // When we have a less than comparison, we can just test if the msb is not 1587 // zero. 1588 if (C == 0 && Pred == CmpInst::ICMP_SLT) { 1589 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1590 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB); 1591 I.eraseFromParent(); 1592 return true; 1593 } 1594 } 1595 1596 // Attempt to handle commutative condition codes. Right now, that's only 1597 // eq/ne. 1598 if (ICmpInst::isEquality(Pred)) { 1599 if (!VRegAndVal) { 1600 std::swap(RHS, LHS); 1601 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 1602 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 1603 } 1604 1605 if (VRegAndVal && VRegAndVal->Value == 0) { 1606 // If there's a G_AND feeding into this branch, try to fold it away by 1607 // emitting a TB(N)Z instead. 1608 // 1609 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be 1610 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding 1611 // would be redundant. 1612 if (AndInst && 1613 tryOptAndIntoCompareBranch( 1614 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { 1615 I.eraseFromParent(); 1616 return true; 1617 } 1618 1619 // Otherwise, try to emit a CB(N)Z instead. 1620 auto LHSTy = MRI.getType(LHS); 1621 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { 1622 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); 1623 I.eraseFromParent(); 1624 return true; 1625 } 1626 } 1627 } 1628 1629 return false; 1630 } 1631 1632 bool AArch64InstructionSelector::selectCompareBranchFedByICmp( 1633 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 1634 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 1635 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1636 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) 1637 return true; 1638 1639 // Couldn't optimize. Emit a compare + a Bcc. 1640 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1641 auto PredOp = ICmp.getOperand(1); 1642 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); 1643 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( 1644 static_cast<CmpInst::Predicate>(PredOp.getPredicate())); 1645 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); 1646 I.eraseFromParent(); 1647 return true; 1648 } 1649 1650 bool AArch64InstructionSelector::selectCompareBranch( 1651 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { 1652 Register CondReg = I.getOperand(0).getReg(); 1653 MachineInstr *CCMI = MRI.getVRegDef(CondReg); 1654 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) { 1655 CondReg = CCMI->getOperand(1).getReg(); 1656 CCMI = MRI.getVRegDef(CondReg); 1657 } 1658 1659 // Try to select the G_BRCOND using whatever is feeding the condition if 1660 // possible. 1661 unsigned CCMIOpc = CCMI->getOpcode(); 1662 if (CCMIOpc == TargetOpcode::G_FCMP) 1663 return selectCompareBranchFedByFCmp(I, *CCMI, MIB); 1664 if (CCMIOpc == TargetOpcode::G_ICMP) 1665 return selectCompareBranchFedByICmp(I, *CCMI, MIB); 1666 1667 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1668 // instructions will not be produced, as they are conditional branch 1669 // instructions that do not set flags. 1670 if (ProduceNonFlagSettingCondBr) { 1671 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, 1672 I.getOperand(1).getMBB(), MIB); 1673 I.eraseFromParent(); 1674 return true; 1675 } 1676 1677 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. 1678 auto TstMI = 1679 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); 1680 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 1681 auto Bcc = MIB.buildInstr(AArch64::Bcc) 1682 .addImm(AArch64CC::EQ) 1683 .addMBB(I.getOperand(1).getMBB()); 1684 I.eraseFromParent(); 1685 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); 1686 } 1687 1688 /// Returns the element immediate value of a vector shift operand if found. 1689 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. 1690 static Optional<int64_t> getVectorShiftImm(Register Reg, 1691 MachineRegisterInfo &MRI) { 1692 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"); 1693 MachineInstr *OpMI = MRI.getVRegDef(Reg); 1694 assert(OpMI && "Expected to find a vreg def for vector shift operand"); 1695 return getAArch64VectorSplatScalar(*OpMI, MRI); 1696 } 1697 1698 /// Matches and returns the shift immediate value for a SHL instruction given 1699 /// a shift operand. 1700 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { 1701 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); 1702 if (!ShiftImm) 1703 return None; 1704 // Check the immediate is in range for a SHL. 1705 int64_t Imm = *ShiftImm; 1706 if (Imm < 0) 1707 return None; 1708 switch (SrcTy.getElementType().getSizeInBits()) { 1709 default: 1710 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift"); 1711 return None; 1712 case 8: 1713 if (Imm > 7) 1714 return None; 1715 break; 1716 case 16: 1717 if (Imm > 15) 1718 return None; 1719 break; 1720 case 32: 1721 if (Imm > 31) 1722 return None; 1723 break; 1724 case 64: 1725 if (Imm > 63) 1726 return None; 1727 break; 1728 } 1729 return Imm; 1730 } 1731 1732 bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I, 1733 MachineRegisterInfo &MRI) { 1734 assert(I.getOpcode() == TargetOpcode::G_SHL); 1735 Register DstReg = I.getOperand(0).getReg(); 1736 const LLT Ty = MRI.getType(DstReg); 1737 Register Src1Reg = I.getOperand(1).getReg(); 1738 Register Src2Reg = I.getOperand(2).getReg(); 1739 1740 if (!Ty.isVector()) 1741 return false; 1742 1743 // Check if we have a vector of constants on RHS that we can select as the 1744 // immediate form. 1745 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); 1746 1747 unsigned Opc = 0; 1748 if (Ty == LLT::fixed_vector(2, 64)) { 1749 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; 1750 } else if (Ty == LLT::fixed_vector(4, 32)) { 1751 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; 1752 } else if (Ty == LLT::fixed_vector(2, 32)) { 1753 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; 1754 } else if (Ty == LLT::fixed_vector(4, 16)) { 1755 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; 1756 } else if (Ty == LLT::fixed_vector(8, 16)) { 1757 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; 1758 } else if (Ty == LLT::fixed_vector(16, 8)) { 1759 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; 1760 } else if (Ty == LLT::fixed_vector(8, 8)) { 1761 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; 1762 } else { 1763 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); 1764 return false; 1765 } 1766 1767 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); 1768 if (ImmVal) 1769 Shl.addImm(*ImmVal); 1770 else 1771 Shl.addUse(Src2Reg); 1772 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); 1773 I.eraseFromParent(); 1774 return true; 1775 } 1776 1777 bool AArch64InstructionSelector::selectVectorAshrLshr( 1778 MachineInstr &I, MachineRegisterInfo &MRI) { 1779 assert(I.getOpcode() == TargetOpcode::G_ASHR || 1780 I.getOpcode() == TargetOpcode::G_LSHR); 1781 Register DstReg = I.getOperand(0).getReg(); 1782 const LLT Ty = MRI.getType(DstReg); 1783 Register Src1Reg = I.getOperand(1).getReg(); 1784 Register Src2Reg = I.getOperand(2).getReg(); 1785 1786 if (!Ty.isVector()) 1787 return false; 1788 1789 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; 1790 1791 // We expect the immediate case to be lowered in the PostLegalCombiner to 1792 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. 1793 1794 // There is not a shift right register instruction, but the shift left 1795 // register instruction takes a signed value, where negative numbers specify a 1796 // right shift. 1797 1798 unsigned Opc = 0; 1799 unsigned NegOpc = 0; 1800 const TargetRegisterClass *RC = 1801 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI); 1802 if (Ty == LLT::fixed_vector(2, 64)) { 1803 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; 1804 NegOpc = AArch64::NEGv2i64; 1805 } else if (Ty == LLT::fixed_vector(4, 32)) { 1806 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; 1807 NegOpc = AArch64::NEGv4i32; 1808 } else if (Ty == LLT::fixed_vector(2, 32)) { 1809 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; 1810 NegOpc = AArch64::NEGv2i32; 1811 } else if (Ty == LLT::fixed_vector(4, 16)) { 1812 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; 1813 NegOpc = AArch64::NEGv4i16; 1814 } else if (Ty == LLT::fixed_vector(8, 16)) { 1815 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; 1816 NegOpc = AArch64::NEGv8i16; 1817 } else if (Ty == LLT::fixed_vector(16, 8)) { 1818 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; 1819 NegOpc = AArch64::NEGv16i8; 1820 } else if (Ty == LLT::fixed_vector(8, 8)) { 1821 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; 1822 NegOpc = AArch64::NEGv8i8; 1823 } else { 1824 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); 1825 return false; 1826 } 1827 1828 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); 1829 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); 1830 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); 1831 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); 1832 I.eraseFromParent(); 1833 return true; 1834 } 1835 1836 bool AArch64InstructionSelector::selectVaStartAAPCS( 1837 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1838 return false; 1839 } 1840 1841 bool AArch64InstructionSelector::selectVaStartDarwin( 1842 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1843 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 1844 Register ListReg = I.getOperand(0).getReg(); 1845 1846 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1847 1848 auto MIB = 1849 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) 1850 .addDef(ArgsAddrReg) 1851 .addFrameIndex(FuncInfo->getVarArgsStackIndex()) 1852 .addImm(0) 1853 .addImm(0); 1854 1855 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1856 1857 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) 1858 .addUse(ArgsAddrReg) 1859 .addUse(ListReg) 1860 .addImm(0) 1861 .addMemOperand(*I.memoperands_begin()); 1862 1863 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1864 I.eraseFromParent(); 1865 return true; 1866 } 1867 1868 void AArch64InstructionSelector::materializeLargeCMVal( 1869 MachineInstr &I, const Value *V, unsigned OpFlags) { 1870 MachineBasicBlock &MBB = *I.getParent(); 1871 MachineFunction &MF = *MBB.getParent(); 1872 MachineRegisterInfo &MRI = MF.getRegInfo(); 1873 1874 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); 1875 MovZ->addOperand(MF, I.getOperand(1)); 1876 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | 1877 AArch64II::MO_NC); 1878 MovZ->addOperand(MF, MachineOperand::CreateImm(0)); 1879 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); 1880 1881 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, 1882 Register ForceDstReg) { 1883 Register DstReg = ForceDstReg 1884 ? ForceDstReg 1885 : MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1886 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); 1887 if (auto *GV = dyn_cast<GlobalValue>(V)) { 1888 MovI->addOperand(MF, MachineOperand::CreateGA( 1889 GV, MovZ->getOperand(1).getOffset(), Flags)); 1890 } else { 1891 MovI->addOperand( 1892 MF, MachineOperand::CreateBA(cast<BlockAddress>(V), 1893 MovZ->getOperand(1).getOffset(), Flags)); 1894 } 1895 MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); 1896 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); 1897 return DstReg; 1898 }; 1899 Register DstReg = BuildMovK(MovZ.getReg(0), 1900 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); 1901 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); 1902 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); 1903 } 1904 1905 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { 1906 MachineBasicBlock &MBB = *I.getParent(); 1907 MachineFunction &MF = *MBB.getParent(); 1908 MachineRegisterInfo &MRI = MF.getRegInfo(); 1909 1910 switch (I.getOpcode()) { 1911 case TargetOpcode::G_SHL: 1912 case TargetOpcode::G_ASHR: 1913 case TargetOpcode::G_LSHR: { 1914 // These shifts are legalized to have 64 bit shift amounts because we want 1915 // to take advantage of the existing imported selection patterns that assume 1916 // the immediates are s64s. However, if the shifted type is 32 bits and for 1917 // some reason we receive input GMIR that has an s64 shift amount that's not 1918 // a G_CONSTANT, insert a truncate so that we can still select the s32 1919 // register-register variant. 1920 Register SrcReg = I.getOperand(1).getReg(); 1921 Register ShiftReg = I.getOperand(2).getReg(); 1922 const LLT ShiftTy = MRI.getType(ShiftReg); 1923 const LLT SrcTy = MRI.getType(SrcReg); 1924 if (SrcTy.isVector()) 1925 return false; 1926 assert(!ShiftTy.isVector() && "unexpected vector shift ty"); 1927 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64) 1928 return false; 1929 auto *AmtMI = MRI.getVRegDef(ShiftReg); 1930 assert(AmtMI && "could not find a vreg definition for shift amount"); 1931 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) { 1932 // Insert a subregister copy to implement a 64->32 trunc 1933 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) 1934 .addReg(ShiftReg, 0, AArch64::sub_32); 1935 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 1936 I.getOperand(2).setReg(Trunc.getReg(0)); 1937 } 1938 return true; 1939 } 1940 case TargetOpcode::G_STORE: { 1941 bool Changed = contractCrossBankCopyIntoStore(I, MRI); 1942 MachineOperand &SrcOp = I.getOperand(0); 1943 if (MRI.getType(SrcOp.getReg()).isPointer()) { 1944 // Allow matching with imported patterns for stores of pointers. Unlike 1945 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy 1946 // and constrain. 1947 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp); 1948 Register NewSrc = Copy.getReg(0); 1949 SrcOp.setReg(NewSrc); 1950 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI); 1951 Changed = true; 1952 } 1953 return Changed; 1954 } 1955 case TargetOpcode::G_PTR_ADD: 1956 return convertPtrAddToAdd(I, MRI); 1957 case TargetOpcode::G_LOAD: { 1958 // For scalar loads of pointers, we try to convert the dest type from p0 1959 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD 1960 // conversion, this should be ok because all users should have been 1961 // selected already, so the type doesn't matter for them. 1962 Register DstReg = I.getOperand(0).getReg(); 1963 const LLT DstTy = MRI.getType(DstReg); 1964 if (!DstTy.isPointer()) 1965 return false; 1966 MRI.setType(DstReg, LLT::scalar(64)); 1967 return true; 1968 } 1969 case AArch64::G_DUP: { 1970 // Convert the type from p0 to s64 to help selection. 1971 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1972 if (!DstTy.getElementType().isPointer()) 1973 return false; 1974 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); 1975 MRI.setType(I.getOperand(0).getReg(), 1976 DstTy.changeElementType(LLT::scalar(64))); 1977 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass); 1978 I.getOperand(1).setReg(NewSrc.getReg(0)); 1979 return true; 1980 } 1981 case TargetOpcode::G_UITOFP: 1982 case TargetOpcode::G_SITOFP: { 1983 // If both source and destination regbanks are FPR, then convert the opcode 1984 // to G_SITOF so that the importer can select it to an fpr variant. 1985 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank 1986 // copy. 1987 Register SrcReg = I.getOperand(1).getReg(); 1988 LLT SrcTy = MRI.getType(SrcReg); 1989 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1990 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) 1991 return false; 1992 1993 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { 1994 if (I.getOpcode() == TargetOpcode::G_SITOFP) 1995 I.setDesc(TII.get(AArch64::G_SITOF)); 1996 else 1997 I.setDesc(TII.get(AArch64::G_UITOF)); 1998 return true; 1999 } 2000 return false; 2001 } 2002 default: 2003 return false; 2004 } 2005 } 2006 2007 /// This lowering tries to look for G_PTR_ADD instructions and then converts 2008 /// them to a standard G_ADD with a COPY on the source. 2009 /// 2010 /// The motivation behind this is to expose the add semantics to the imported 2011 /// tablegen patterns. We shouldn't need to check for uses being loads/stores, 2012 /// because the selector works bottom up, uses before defs. By the time we 2013 /// end up trying to select a G_PTR_ADD, we should have already attempted to 2014 /// fold this into addressing modes and were therefore unsuccessful. 2015 bool AArch64InstructionSelector::convertPtrAddToAdd( 2016 MachineInstr &I, MachineRegisterInfo &MRI) { 2017 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 2018 Register DstReg = I.getOperand(0).getReg(); 2019 Register AddOp1Reg = I.getOperand(1).getReg(); 2020 const LLT PtrTy = MRI.getType(DstReg); 2021 if (PtrTy.getAddressSpace() != 0) 2022 return false; 2023 2024 const LLT CastPtrTy = 2025 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64); 2026 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg); 2027 // Set regbanks on the registers. 2028 if (PtrTy.isVector()) 2029 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); 2030 else 2031 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 2032 2033 // Now turn the %dst(p0) = G_PTR_ADD %base, off into: 2034 // %dst(intty) = G_ADD %intbase, off 2035 I.setDesc(TII.get(TargetOpcode::G_ADD)); 2036 MRI.setType(DstReg, CastPtrTy); 2037 I.getOperand(1).setReg(PtrToInt.getReg(0)); 2038 if (!select(*PtrToInt)) { 2039 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"); 2040 return false; 2041 } 2042 2043 // Also take the opportunity here to try to do some optimization. 2044 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. 2045 Register NegatedReg; 2046 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) 2047 return true; 2048 I.getOperand(2).setReg(NegatedReg); 2049 I.setDesc(TII.get(TargetOpcode::G_SUB)); 2050 return true; 2051 } 2052 2053 bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, 2054 MachineRegisterInfo &MRI) { 2055 // We try to match the immediate variant of LSL, which is actually an alias 2056 // for a special case of UBFM. Otherwise, we fall back to the imported 2057 // selector which will match the register variant. 2058 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"); 2059 const auto &MO = I.getOperand(2); 2060 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI); 2061 if (!VRegAndVal) 2062 return false; 2063 2064 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2065 if (DstTy.isVector()) 2066 return false; 2067 bool Is64Bit = DstTy.getSizeInBits() == 64; 2068 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); 2069 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); 2070 2071 if (!Imm1Fn || !Imm2Fn) 2072 return false; 2073 2074 auto NewI = 2075 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, 2076 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); 2077 2078 for (auto &RenderFn : *Imm1Fn) 2079 RenderFn(NewI); 2080 for (auto &RenderFn : *Imm2Fn) 2081 RenderFn(NewI); 2082 2083 I.eraseFromParent(); 2084 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 2085 } 2086 2087 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( 2088 MachineInstr &I, MachineRegisterInfo &MRI) { 2089 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); 2090 // If we're storing a scalar, it doesn't matter what register bank that 2091 // scalar is on. All that matters is the size. 2092 // 2093 // So, if we see something like this (with a 32-bit scalar as an example): 2094 // 2095 // %x:gpr(s32) = ... something ... 2096 // %y:fpr(s32) = COPY %x:gpr(s32) 2097 // G_STORE %y:fpr(s32) 2098 // 2099 // We can fix this up into something like this: 2100 // 2101 // G_STORE %x:gpr(s32) 2102 // 2103 // And then continue the selection process normally. 2104 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); 2105 if (!DefDstReg.isValid()) 2106 return false; 2107 LLT DefDstTy = MRI.getType(DefDstReg); 2108 Register StoreSrcReg = I.getOperand(0).getReg(); 2109 LLT StoreSrcTy = MRI.getType(StoreSrcReg); 2110 2111 // If we get something strange like a physical register, then we shouldn't 2112 // go any further. 2113 if (!DefDstTy.isValid()) 2114 return false; 2115 2116 // Are the source and dst types the same size? 2117 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) 2118 return false; 2119 2120 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == 2121 RBI.getRegBank(DefDstReg, MRI, TRI)) 2122 return false; 2123 2124 // We have a cross-bank copy, which is entering a store. Let's fold it. 2125 I.getOperand(0).setReg(DefDstReg); 2126 return true; 2127 } 2128 2129 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { 2130 assert(I.getParent() && "Instruction should be in a basic block!"); 2131 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 2132 2133 MachineBasicBlock &MBB = *I.getParent(); 2134 MachineFunction &MF = *MBB.getParent(); 2135 MachineRegisterInfo &MRI = MF.getRegInfo(); 2136 2137 switch (I.getOpcode()) { 2138 case AArch64::G_DUP: { 2139 // Before selecting a DUP instruction, check if it is better selected as a 2140 // MOV or load from a constant pool. 2141 Register Src = I.getOperand(1).getReg(); 2142 auto ValAndVReg = getConstantVRegValWithLookThrough(Src, MRI); 2143 if (!ValAndVReg) 2144 return false; 2145 LLVMContext &Ctx = MF.getFunction().getContext(); 2146 Register Dst = I.getOperand(0).getReg(); 2147 auto *CV = ConstantDataVector::getSplat( 2148 MRI.getType(Dst).getNumElements(), 2149 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()), 2150 ValAndVReg->Value)); 2151 if (!emitConstantVector(Dst, CV, MIB, MRI)) 2152 return false; 2153 I.eraseFromParent(); 2154 return true; 2155 } 2156 case TargetOpcode::G_BR: { 2157 // If the branch jumps to the fallthrough block, don't bother emitting it. 2158 // Only do this for -O0 for a good code size improvement, because when 2159 // optimizations are enabled we want to leave this choice to 2160 // MachineBlockPlacement. 2161 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None; 2162 if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB())) 2163 return false; 2164 I.eraseFromParent(); 2165 return true; 2166 } 2167 case TargetOpcode::G_SHL: 2168 return earlySelectSHL(I, MRI); 2169 case TargetOpcode::G_CONSTANT: { 2170 bool IsZero = false; 2171 if (I.getOperand(1).isCImm()) 2172 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; 2173 else if (I.getOperand(1).isImm()) 2174 IsZero = I.getOperand(1).getImm() == 0; 2175 2176 if (!IsZero) 2177 return false; 2178 2179 Register DefReg = I.getOperand(0).getReg(); 2180 LLT Ty = MRI.getType(DefReg); 2181 if (Ty.getSizeInBits() == 64) { 2182 I.getOperand(1).ChangeToRegister(AArch64::XZR, false); 2183 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); 2184 } else if (Ty.getSizeInBits() == 32) { 2185 I.getOperand(1).ChangeToRegister(AArch64::WZR, false); 2186 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); 2187 } else 2188 return false; 2189 2190 I.setDesc(TII.get(TargetOpcode::COPY)); 2191 return true; 2192 } 2193 2194 case TargetOpcode::G_ADD: { 2195 // Check if this is being fed by a G_ICMP on either side. 2196 // 2197 // (cmp pred, x, y) + z 2198 // 2199 // In the above case, when the cmp is true, we increment z by 1. So, we can 2200 // fold the add into the cset for the cmp by using cinc. 2201 // 2202 // FIXME: This would probably be a lot nicer in PostLegalizerLowering. 2203 Register X = I.getOperand(1).getReg(); 2204 2205 // Only handle scalars. Scalar G_ICMP is only legal for s32, so bail out 2206 // early if we see it. 2207 LLT Ty = MRI.getType(X); 2208 if (Ty.isVector() || Ty.getSizeInBits() != 32) 2209 return false; 2210 2211 Register CmpReg = I.getOperand(2).getReg(); 2212 MachineInstr *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI); 2213 if (!Cmp) { 2214 std::swap(X, CmpReg); 2215 Cmp = getOpcodeDef(TargetOpcode::G_ICMP, CmpReg, MRI); 2216 if (!Cmp) 2217 return false; 2218 } 2219 auto Pred = 2220 static_cast<CmpInst::Predicate>(Cmp->getOperand(1).getPredicate()); 2221 emitIntegerCompare(Cmp->getOperand(2), Cmp->getOperand(3), 2222 Cmp->getOperand(1), MIB); 2223 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB, X); 2224 I.eraseFromParent(); 2225 return true; 2226 } 2227 case TargetOpcode::G_OR: { 2228 // Look for operations that take the lower `Width=Size-ShiftImm` bits of 2229 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via 2230 // shifting and masking that we can replace with a BFI (encoded as a BFM). 2231 Register Dst = I.getOperand(0).getReg(); 2232 LLT Ty = MRI.getType(Dst); 2233 2234 if (!Ty.isScalar()) 2235 return false; 2236 2237 unsigned Size = Ty.getSizeInBits(); 2238 if (Size != 32 && Size != 64) 2239 return false; 2240 2241 Register ShiftSrc; 2242 int64_t ShiftImm; 2243 Register MaskSrc; 2244 int64_t MaskImm; 2245 if (!mi_match( 2246 Dst, MRI, 2247 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))), 2248 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm)))))) 2249 return false; 2250 2251 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) 2252 return false; 2253 2254 int64_t Immr = Size - ShiftImm; 2255 int64_t Imms = Size - ShiftImm - 1; 2256 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; 2257 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB); 2258 I.eraseFromParent(); 2259 return true; 2260 } 2261 default: 2262 return false; 2263 } 2264 } 2265 2266 bool AArch64InstructionSelector::select(MachineInstr &I) { 2267 assert(I.getParent() && "Instruction should be in a basic block!"); 2268 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 2269 2270 MachineBasicBlock &MBB = *I.getParent(); 2271 MachineFunction &MF = *MBB.getParent(); 2272 MachineRegisterInfo &MRI = MF.getRegInfo(); 2273 2274 const AArch64Subtarget *Subtarget = 2275 &static_cast<const AArch64Subtarget &>(MF.getSubtarget()); 2276 if (Subtarget->requiresStrictAlign()) { 2277 // We don't support this feature yet. 2278 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n"); 2279 return false; 2280 } 2281 2282 MIB.setInstrAndDebugLoc(I); 2283 2284 unsigned Opcode = I.getOpcode(); 2285 // G_PHI requires same handling as PHI 2286 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { 2287 // Certain non-generic instructions also need some special handling. 2288 2289 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 2290 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2291 2292 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { 2293 const Register DefReg = I.getOperand(0).getReg(); 2294 const LLT DefTy = MRI.getType(DefReg); 2295 2296 const RegClassOrRegBank &RegClassOrBank = 2297 MRI.getRegClassOrRegBank(DefReg); 2298 2299 const TargetRegisterClass *DefRC 2300 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 2301 if (!DefRC) { 2302 if (!DefTy.isValid()) { 2303 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 2304 return false; 2305 } 2306 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 2307 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); 2308 if (!DefRC) { 2309 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 2310 return false; 2311 } 2312 } 2313 2314 I.setDesc(TII.get(TargetOpcode::PHI)); 2315 2316 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 2317 } 2318 2319 if (I.isCopy()) 2320 return selectCopy(I, TII, MRI, TRI, RBI); 2321 2322 return true; 2323 } 2324 2325 2326 if (I.getNumOperands() != I.getNumExplicitOperands()) { 2327 LLVM_DEBUG( 2328 dbgs() << "Generic instruction has unexpected implicit operands\n"); 2329 return false; 2330 } 2331 2332 // Try to do some lowering before we start instruction selecting. These 2333 // lowerings are purely transformations on the input G_MIR and so selection 2334 // must continue after any modification of the instruction. 2335 if (preISelLower(I)) { 2336 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. 2337 } 2338 2339 // There may be patterns where the importer can't deal with them optimally, 2340 // but does select it to a suboptimal sequence so our custom C++ selection 2341 // code later never has a chance to work on it. Therefore, we have an early 2342 // selection attempt here to give priority to certain selection routines 2343 // over the imported ones. 2344 if (earlySelect(I)) 2345 return true; 2346 2347 if (selectImpl(I, *CoverageInfo)) 2348 return true; 2349 2350 LLT Ty = 2351 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; 2352 2353 switch (Opcode) { 2354 case TargetOpcode::G_SBFX: 2355 case TargetOpcode::G_UBFX: { 2356 static const unsigned OpcTable[2][2] = { 2357 {AArch64::UBFMWri, AArch64::UBFMXri}, 2358 {AArch64::SBFMWri, AArch64::SBFMXri}}; 2359 bool IsSigned = Opcode == TargetOpcode::G_SBFX; 2360 unsigned Size = Ty.getSizeInBits(); 2361 unsigned Opc = OpcTable[IsSigned][Size == 64]; 2362 auto Cst1 = 2363 getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI); 2364 assert(Cst1 && "Should have gotten a constant for src 1?"); 2365 auto Cst2 = 2366 getConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI); 2367 assert(Cst2 && "Should have gotten a constant for src 2?"); 2368 auto LSB = Cst1->Value.getZExtValue(); 2369 auto Width = Cst2->Value.getZExtValue(); 2370 auto BitfieldInst = 2371 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)}) 2372 .addImm(LSB) 2373 .addImm(LSB + Width - 1); 2374 I.eraseFromParent(); 2375 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI); 2376 } 2377 case TargetOpcode::G_BRCOND: 2378 return selectCompareBranch(I, MF, MRI); 2379 2380 case TargetOpcode::G_BRINDIRECT: { 2381 I.setDesc(TII.get(AArch64::BR)); 2382 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2383 } 2384 2385 case TargetOpcode::G_BRJT: 2386 return selectBrJT(I, MRI); 2387 2388 case AArch64::G_ADD_LOW: { 2389 // This op may have been separated from it's ADRP companion by the localizer 2390 // or some other code motion pass. Given that many CPUs will try to 2391 // macro fuse these operations anyway, select this into a MOVaddr pseudo 2392 // which will later be expanded into an ADRP+ADD pair after scheduling. 2393 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg()); 2394 if (BaseMI->getOpcode() != AArch64::ADRP) { 2395 I.setDesc(TII.get(AArch64::ADDXri)); 2396 I.addOperand(MachineOperand::CreateImm(0)); 2397 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2398 } 2399 assert(TM.getCodeModel() == CodeModel::Small && 2400 "Expected small code model"); 2401 auto Op1 = BaseMI->getOperand(1); 2402 auto Op2 = I.getOperand(2); 2403 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) 2404 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), 2405 Op1.getTargetFlags()) 2406 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), 2407 Op2.getTargetFlags()); 2408 I.eraseFromParent(); 2409 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); 2410 } 2411 2412 case TargetOpcode::G_BSWAP: { 2413 // Handle vector types for G_BSWAP directly. 2414 Register DstReg = I.getOperand(0).getReg(); 2415 LLT DstTy = MRI.getType(DstReg); 2416 2417 // We should only get vector types here; everything else is handled by the 2418 // importer right now. 2419 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { 2420 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n"); 2421 return false; 2422 } 2423 2424 // Only handle 4 and 2 element vectors for now. 2425 // TODO: 16-bit elements. 2426 unsigned NumElts = DstTy.getNumElements(); 2427 if (NumElts != 4 && NumElts != 2) { 2428 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n"); 2429 return false; 2430 } 2431 2432 // Choose the correct opcode for the supported types. Right now, that's 2433 // v2s32, v4s32, and v2s64. 2434 unsigned Opc = 0; 2435 unsigned EltSize = DstTy.getElementType().getSizeInBits(); 2436 if (EltSize == 32) 2437 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 2438 : AArch64::REV32v16i8; 2439 else if (EltSize == 64) 2440 Opc = AArch64::REV64v16i8; 2441 2442 // We should always get something by the time we get here... 2443 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?"); 2444 2445 I.setDesc(TII.get(Opc)); 2446 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2447 } 2448 2449 case TargetOpcode::G_FCONSTANT: 2450 case TargetOpcode::G_CONSTANT: { 2451 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; 2452 2453 const LLT s8 = LLT::scalar(8); 2454 const LLT s16 = LLT::scalar(16); 2455 const LLT s32 = LLT::scalar(32); 2456 const LLT s64 = LLT::scalar(64); 2457 const LLT s128 = LLT::scalar(128); 2458 const LLT p0 = LLT::pointer(0, 64); 2459 2460 const Register DefReg = I.getOperand(0).getReg(); 2461 const LLT DefTy = MRI.getType(DefReg); 2462 const unsigned DefSize = DefTy.getSizeInBits(); 2463 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2464 2465 // FIXME: Redundant check, but even less readable when factored out. 2466 if (isFP) { 2467 if (Ty != s32 && Ty != s64 && Ty != s128) { 2468 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2469 << " constant, expected: " << s32 << " or " << s64 2470 << " or " << s128 << '\n'); 2471 return false; 2472 } 2473 2474 if (RB.getID() != AArch64::FPRRegBankID) { 2475 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2476 << " constant on bank: " << RB 2477 << ", expected: FPR\n"); 2478 return false; 2479 } 2480 2481 // The case when we have 0.0 is covered by tablegen. Reject it here so we 2482 // can be sure tablegen works correctly and isn't rescued by this code. 2483 // 0.0 is not covered by tablegen for FP128. So we will handle this 2484 // scenario in the code here. 2485 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) 2486 return false; 2487 } else { 2488 // s32 and s64 are covered by tablegen. 2489 if (Ty != p0 && Ty != s8 && Ty != s16) { 2490 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2491 << " constant, expected: " << s32 << ", " << s64 2492 << ", or " << p0 << '\n'); 2493 return false; 2494 } 2495 2496 if (RB.getID() != AArch64::GPRRegBankID) { 2497 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2498 << " constant on bank: " << RB 2499 << ", expected: GPR\n"); 2500 return false; 2501 } 2502 } 2503 2504 // We allow G_CONSTANT of types < 32b. 2505 const unsigned MovOpc = 2506 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; 2507 2508 if (isFP) { 2509 // Either emit a FMOV, or emit a copy to emit a normal mov. 2510 const TargetRegisterClass &GPRRC = 2511 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass; 2512 const TargetRegisterClass &FPRRC = 2513 DefSize == 32 ? AArch64::FPR32RegClass 2514 : (DefSize == 64 ? AArch64::FPR64RegClass 2515 : AArch64::FPR128RegClass); 2516 2517 // For 64b values, emit a constant pool load instead. 2518 // For s32, use a cp load if we have optsize/minsize. 2519 if (DefSize == 64 || DefSize == 128 || 2520 (DefSize == 32 && shouldOptForSize(&MF))) { 2521 auto *FPImm = I.getOperand(1).getFPImm(); 2522 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB); 2523 if (!LoadMI) { 2524 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n"); 2525 return false; 2526 } 2527 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()}); 2528 I.eraseFromParent(); 2529 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI); 2530 } 2531 2532 // Nope. Emit a copy and use a normal mov instead. 2533 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC); 2534 MachineOperand &RegOp = I.getOperand(0); 2535 RegOp.setReg(DefGPRReg); 2536 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2537 MIB.buildCopy({DefReg}, {DefGPRReg}); 2538 2539 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { 2540 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n"); 2541 return false; 2542 } 2543 2544 MachineOperand &ImmOp = I.getOperand(1); 2545 // FIXME: Is going through int64_t always correct? 2546 ImmOp.ChangeToImmediate( 2547 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); 2548 } else if (I.getOperand(1).isCImm()) { 2549 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); 2550 I.getOperand(1).ChangeToImmediate(Val); 2551 } else if (I.getOperand(1).isImm()) { 2552 uint64_t Val = I.getOperand(1).getImm(); 2553 I.getOperand(1).ChangeToImmediate(Val); 2554 } 2555 2556 I.setDesc(TII.get(MovOpc)); 2557 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2558 return true; 2559 } 2560 case TargetOpcode::G_EXTRACT: { 2561 Register DstReg = I.getOperand(0).getReg(); 2562 Register SrcReg = I.getOperand(1).getReg(); 2563 LLT SrcTy = MRI.getType(SrcReg); 2564 LLT DstTy = MRI.getType(DstReg); 2565 (void)DstTy; 2566 unsigned SrcSize = SrcTy.getSizeInBits(); 2567 2568 if (SrcTy.getSizeInBits() > 64) { 2569 // This should be an extract of an s128, which is like a vector extract. 2570 if (SrcTy.getSizeInBits() != 128) 2571 return false; 2572 // Only support extracting 64 bits from an s128 at the moment. 2573 if (DstTy.getSizeInBits() != 64) 2574 return false; 2575 2576 unsigned Offset = I.getOperand(2).getImm(); 2577 if (Offset % 64 != 0) 2578 return false; 2579 2580 // Check we have the right regbank always. 2581 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2582 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2583 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!"); 2584 2585 if (SrcRB.getID() == AArch64::GPRRegBankID) { 2586 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 2587 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64); 2588 I.eraseFromParent(); 2589 return true; 2590 } 2591 2592 // Emit the same code as a vector extract. 2593 // Offset must be a multiple of 64. 2594 unsigned LaneIdx = Offset / 64; 2595 MachineInstr *Extract = emitExtractVectorElt( 2596 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); 2597 if (!Extract) 2598 return false; 2599 I.eraseFromParent(); 2600 return true; 2601 } 2602 2603 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); 2604 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + 2605 Ty.getSizeInBits() - 1); 2606 2607 if (SrcSize < 64) { 2608 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && 2609 "unexpected G_EXTRACT types"); 2610 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2611 } 2612 2613 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2614 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2615 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 2616 .addReg(DstReg, 0, AArch64::sub_32); 2617 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 2618 AArch64::GPR32RegClass, MRI); 2619 I.getOperand(0).setReg(DstReg); 2620 2621 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2622 } 2623 2624 case TargetOpcode::G_INSERT: { 2625 LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); 2626 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2627 unsigned DstSize = DstTy.getSizeInBits(); 2628 // Larger inserts are vectors, same-size ones should be something else by 2629 // now (split up or turned into COPYs). 2630 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) 2631 return false; 2632 2633 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); 2634 unsigned LSB = I.getOperand(3).getImm(); 2635 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 2636 I.getOperand(3).setImm((DstSize - LSB) % DstSize); 2637 MachineInstrBuilder(MF, I).addImm(Width - 1); 2638 2639 if (DstSize < 64) { 2640 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && 2641 "unexpected G_INSERT types"); 2642 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2643 } 2644 2645 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2646 BuildMI(MBB, I.getIterator(), I.getDebugLoc(), 2647 TII.get(AArch64::SUBREG_TO_REG)) 2648 .addDef(SrcReg) 2649 .addImm(0) 2650 .addUse(I.getOperand(2).getReg()) 2651 .addImm(AArch64::sub_32); 2652 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 2653 AArch64::GPR32RegClass, MRI); 2654 I.getOperand(2).setReg(SrcReg); 2655 2656 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2657 } 2658 case TargetOpcode::G_FRAME_INDEX: { 2659 // allocas and G_FRAME_INDEX are only supported in addrspace(0). 2660 if (Ty != LLT::pointer(0, 64)) { 2661 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty 2662 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2663 return false; 2664 } 2665 I.setDesc(TII.get(AArch64::ADDXri)); 2666 2667 // MOs for a #0 shifted immediate. 2668 I.addOperand(MachineOperand::CreateImm(0)); 2669 I.addOperand(MachineOperand::CreateImm(0)); 2670 2671 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2672 } 2673 2674 case TargetOpcode::G_GLOBAL_VALUE: { 2675 auto GV = I.getOperand(1).getGlobal(); 2676 if (GV->isThreadLocal()) 2677 return selectTLSGlobalValue(I, MRI); 2678 2679 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); 2680 if (OpFlags & AArch64II::MO_GOT) { 2681 I.setDesc(TII.get(AArch64::LOADgot)); 2682 I.getOperand(1).setTargetFlags(OpFlags); 2683 } else if (TM.getCodeModel() == CodeModel::Large) { 2684 // Materialize the global using movz/movk instructions. 2685 materializeLargeCMVal(I, GV, OpFlags); 2686 I.eraseFromParent(); 2687 return true; 2688 } else if (TM.getCodeModel() == CodeModel::Tiny) { 2689 I.setDesc(TII.get(AArch64::ADR)); 2690 I.getOperand(1).setTargetFlags(OpFlags); 2691 } else { 2692 I.setDesc(TII.get(AArch64::MOVaddr)); 2693 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); 2694 MachineInstrBuilder MIB(MF, I); 2695 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), 2696 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 2697 } 2698 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2699 } 2700 2701 case TargetOpcode::G_ZEXTLOAD: 2702 case TargetOpcode::G_LOAD: 2703 case TargetOpcode::G_STORE: { 2704 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; 2705 LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); 2706 2707 if (PtrTy != LLT::pointer(0, 64)) { 2708 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy 2709 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2710 return false; 2711 } 2712 2713 auto &MemOp = **I.memoperands_begin(); 2714 uint64_t MemSizeInBytes = MemOp.getSize(); 2715 unsigned MemSizeInBits = MemSizeInBytes * 8; 2716 AtomicOrdering Order = MemOp.getSuccessOrdering(); 2717 2718 // Need special instructions for atomics that affect ordering. 2719 if (Order != AtomicOrdering::NotAtomic && 2720 Order != AtomicOrdering::Unordered && 2721 Order != AtomicOrdering::Monotonic) { 2722 assert(I.getOpcode() != TargetOpcode::G_ZEXTLOAD); 2723 if (MemSizeInBytes > 64) 2724 return false; 2725 2726 if (I.getOpcode() == TargetOpcode::G_LOAD) { 2727 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH, 2728 AArch64::LDARW, AArch64::LDARX}; 2729 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); 2730 } else { 2731 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, 2732 AArch64::STLRW, AArch64::STLRX}; 2733 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); 2734 } 2735 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2736 return true; 2737 } 2738 2739 #ifndef NDEBUG 2740 const Register PtrReg = I.getOperand(1).getReg(); 2741 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); 2742 // Sanity-check the pointer register. 2743 assert(PtrRB.getID() == AArch64::GPRRegBankID && 2744 "Load/Store pointer operand isn't a GPR"); 2745 assert(MRI.getType(PtrReg).isPointer() && 2746 "Load/Store pointer operand isn't a pointer"); 2747 #endif 2748 2749 const Register ValReg = I.getOperand(0).getReg(); 2750 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); 2751 2752 // Helper lambda for partially selecting I. Either returns the original 2753 // instruction with an updated opcode, or a new instruction. 2754 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { 2755 bool IsStore = I.getOpcode() == TargetOpcode::G_STORE; 2756 const unsigned NewOpc = 2757 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); 2758 if (NewOpc == I.getOpcode()) 2759 return nullptr; 2760 // Check if we can fold anything into the addressing mode. 2761 auto AddrModeFns = 2762 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); 2763 if (!AddrModeFns) { 2764 // Can't fold anything. Use the original instruction. 2765 I.setDesc(TII.get(NewOpc)); 2766 I.addOperand(MachineOperand::CreateImm(0)); 2767 return &I; 2768 } 2769 2770 // Folded something. Create a new instruction and return it. 2771 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); 2772 IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg); 2773 NewInst.cloneMemRefs(I); 2774 for (auto &Fn : *AddrModeFns) 2775 Fn(NewInst); 2776 I.eraseFromParent(); 2777 return &*NewInst; 2778 }; 2779 2780 MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); 2781 if (!LoadStore) 2782 return false; 2783 2784 // If we're storing a 0, use WZR/XZR. 2785 if (Opcode == TargetOpcode::G_STORE) { 2786 auto CVal = getConstantVRegValWithLookThrough( 2787 LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true, 2788 /*HandleFConstants = */ false); 2789 if (CVal && CVal->Value == 0) { 2790 switch (LoadStore->getOpcode()) { 2791 case AArch64::STRWui: 2792 case AArch64::STRHHui: 2793 case AArch64::STRBBui: 2794 LoadStore->getOperand(0).setReg(AArch64::WZR); 2795 break; 2796 case AArch64::STRXui: 2797 LoadStore->getOperand(0).setReg(AArch64::XZR); 2798 break; 2799 } 2800 } 2801 } 2802 2803 if (IsZExtLoad) { 2804 // The zextload from a smaller type to i32 should be handled by the 2805 // importer. 2806 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) 2807 return false; 2808 // If we have a ZEXTLOAD then change the load's type to be a narrower reg 2809 // and zero_extend with SUBREG_TO_REG. 2810 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2811 Register DstReg = LoadStore->getOperand(0).getReg(); 2812 LoadStore->getOperand(0).setReg(LdReg); 2813 2814 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); 2815 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) 2816 .addImm(0) 2817 .addUse(LdReg) 2818 .addImm(AArch64::sub_32); 2819 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 2820 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, 2821 MRI); 2822 } 2823 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 2824 } 2825 2826 case TargetOpcode::G_SMULH: 2827 case TargetOpcode::G_UMULH: { 2828 // Reject the various things we don't support yet. 2829 if (unsupportedBinOp(I, RBI, MRI, TRI)) 2830 return false; 2831 2832 const Register DefReg = I.getOperand(0).getReg(); 2833 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2834 2835 if (RB.getID() != AArch64::GPRRegBankID) { 2836 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"); 2837 return false; 2838 } 2839 2840 if (Ty != LLT::scalar(64)) { 2841 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty 2842 << ", expected: " << LLT::scalar(64) << '\n'); 2843 return false; 2844 } 2845 2846 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr 2847 : AArch64::UMULHrr; 2848 I.setDesc(TII.get(NewOpc)); 2849 2850 // Now that we selected an opcode, we need to constrain the register 2851 // operands to use appropriate classes. 2852 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2853 } 2854 case TargetOpcode::G_LSHR: 2855 case TargetOpcode::G_ASHR: 2856 if (MRI.getType(I.getOperand(0).getReg()).isVector()) 2857 return selectVectorAshrLshr(I, MRI); 2858 LLVM_FALLTHROUGH; 2859 case TargetOpcode::G_SHL: 2860 if (Opcode == TargetOpcode::G_SHL && 2861 MRI.getType(I.getOperand(0).getReg()).isVector()) 2862 return selectVectorSHL(I, MRI); 2863 LLVM_FALLTHROUGH; 2864 case TargetOpcode::G_FADD: 2865 case TargetOpcode::G_FSUB: 2866 case TargetOpcode::G_FMUL: 2867 case TargetOpcode::G_FDIV: 2868 case TargetOpcode::G_OR: { 2869 // Reject the various things we don't support yet. 2870 if (unsupportedBinOp(I, RBI, MRI, TRI)) 2871 return false; 2872 2873 const unsigned OpSize = Ty.getSizeInBits(); 2874 2875 const Register DefReg = I.getOperand(0).getReg(); 2876 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2877 2878 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); 2879 if (NewOpc == I.getOpcode()) 2880 return false; 2881 2882 I.setDesc(TII.get(NewOpc)); 2883 // FIXME: Should the type be always reset in setDesc? 2884 2885 // Now that we selected an opcode, we need to constrain the register 2886 // operands to use appropriate classes. 2887 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2888 } 2889 2890 case TargetOpcode::G_PTR_ADD: { 2891 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB); 2892 I.eraseFromParent(); 2893 return true; 2894 } 2895 case TargetOpcode::G_SADDO: 2896 case TargetOpcode::G_UADDO: 2897 case TargetOpcode::G_SSUBO: 2898 case TargetOpcode::G_USUBO: { 2899 // Emit the operation and get the correct condition code. 2900 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), 2901 I.getOperand(2), I.getOperand(3), MIB); 2902 2903 // Now, put the overflow result in the register given by the first operand 2904 // to the overflow op. CSINC increments the result when the predicate is 2905 // false, so to get the increment when it's true, we need to use the 2906 // inverse. In this case, we want to increment when carry is set. 2907 Register ZReg = AArch64::WZR; 2908 auto CsetMI = MIB.buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, 2909 {ZReg, ZReg}) 2910 .addImm(getInvertedCondCode(OpAndCC.second)); 2911 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); 2912 I.eraseFromParent(); 2913 return true; 2914 } 2915 2916 case TargetOpcode::G_PTRMASK: { 2917 Register MaskReg = I.getOperand(2).getReg(); 2918 Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI); 2919 // TODO: Implement arbitrary cases 2920 if (!MaskVal || !isShiftedMask_64(*MaskVal)) 2921 return false; 2922 2923 uint64_t Mask = *MaskVal; 2924 I.setDesc(TII.get(AArch64::ANDXri)); 2925 I.getOperand(2).ChangeToImmediate( 2926 AArch64_AM::encodeLogicalImmediate(Mask, 64)); 2927 2928 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2929 } 2930 case TargetOpcode::G_PTRTOINT: 2931 case TargetOpcode::G_TRUNC: { 2932 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2933 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 2934 2935 const Register DstReg = I.getOperand(0).getReg(); 2936 const Register SrcReg = I.getOperand(1).getReg(); 2937 2938 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2939 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2940 2941 if (DstRB.getID() != SrcRB.getID()) { 2942 LLVM_DEBUG( 2943 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); 2944 return false; 2945 } 2946 2947 if (DstRB.getID() == AArch64::GPRRegBankID) { 2948 const TargetRegisterClass *DstRC = 2949 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 2950 if (!DstRC) 2951 return false; 2952 2953 const TargetRegisterClass *SrcRC = 2954 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); 2955 if (!SrcRC) 2956 return false; 2957 2958 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 2959 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 2960 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); 2961 return false; 2962 } 2963 2964 if (DstRC == SrcRC) { 2965 // Nothing to be done 2966 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && 2967 SrcTy == LLT::scalar(64)) { 2968 llvm_unreachable("TableGen can import this case"); 2969 return false; 2970 } else if (DstRC == &AArch64::GPR32RegClass && 2971 SrcRC == &AArch64::GPR64RegClass) { 2972 I.getOperand(1).setSubReg(AArch64::sub_32); 2973 } else { 2974 LLVM_DEBUG( 2975 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); 2976 return false; 2977 } 2978 2979 I.setDesc(TII.get(TargetOpcode::COPY)); 2980 return true; 2981 } else if (DstRB.getID() == AArch64::FPRRegBankID) { 2982 if (DstTy == LLT::fixed_vector(4, 16) && 2983 SrcTy == LLT::fixed_vector(4, 32)) { 2984 I.setDesc(TII.get(AArch64::XTNv4i16)); 2985 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2986 return true; 2987 } 2988 2989 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { 2990 MachineInstr *Extract = emitExtractVectorElt( 2991 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); 2992 if (!Extract) 2993 return false; 2994 I.eraseFromParent(); 2995 return true; 2996 } 2997 2998 // We might have a vector G_PTRTOINT, in which case just emit a COPY. 2999 if (Opcode == TargetOpcode::G_PTRTOINT) { 3000 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"); 3001 I.setDesc(TII.get(TargetOpcode::COPY)); 3002 return true; 3003 } 3004 } 3005 3006 return false; 3007 } 3008 3009 case TargetOpcode::G_ANYEXT: { 3010 const Register DstReg = I.getOperand(0).getReg(); 3011 const Register SrcReg = I.getOperand(1).getReg(); 3012 3013 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); 3014 if (RBDst.getID() != AArch64::GPRRegBankID) { 3015 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst 3016 << ", expected: GPR\n"); 3017 return false; 3018 } 3019 3020 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); 3021 if (RBSrc.getID() != AArch64::GPRRegBankID) { 3022 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc 3023 << ", expected: GPR\n"); 3024 return false; 3025 } 3026 3027 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); 3028 3029 if (DstSize == 0) { 3030 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"); 3031 return false; 3032 } 3033 3034 if (DstSize != 64 && DstSize > 32) { 3035 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize 3036 << ", expected: 32 or 64\n"); 3037 return false; 3038 } 3039 // At this point G_ANYEXT is just like a plain COPY, but we need 3040 // to explicitly form the 64-bit value if any. 3041 if (DstSize > 32) { 3042 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); 3043 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) 3044 .addDef(ExtSrc) 3045 .addImm(0) 3046 .addUse(SrcReg) 3047 .addImm(AArch64::sub_32); 3048 I.getOperand(1).setReg(ExtSrc); 3049 } 3050 return selectCopy(I, TII, MRI, TRI, RBI); 3051 } 3052 3053 case TargetOpcode::G_ZEXT: 3054 case TargetOpcode::G_SEXT_INREG: 3055 case TargetOpcode::G_SEXT: { 3056 unsigned Opcode = I.getOpcode(); 3057 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; 3058 const Register DefReg = I.getOperand(0).getReg(); 3059 Register SrcReg = I.getOperand(1).getReg(); 3060 const LLT DstTy = MRI.getType(DefReg); 3061 const LLT SrcTy = MRI.getType(SrcReg); 3062 unsigned DstSize = DstTy.getSizeInBits(); 3063 unsigned SrcSize = SrcTy.getSizeInBits(); 3064 3065 // SEXT_INREG has the same src reg size as dst, the size of the value to be 3066 // extended is encoded in the imm. 3067 if (Opcode == TargetOpcode::G_SEXT_INREG) 3068 SrcSize = I.getOperand(2).getImm(); 3069 3070 if (DstTy.isVector()) 3071 return false; // Should be handled by imported patterns. 3072 3073 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == 3074 AArch64::GPRRegBankID && 3075 "Unexpected ext regbank"); 3076 3077 MachineInstr *ExtI; 3078 3079 // First check if we're extending the result of a load which has a dest type 3080 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest 3081 // GPR register on AArch64 and all loads which are smaller automatically 3082 // zero-extend the upper bits. E.g. 3083 // %v(s8) = G_LOAD %p, :: (load 1) 3084 // %v2(s32) = G_ZEXT %v(s8) 3085 if (!IsSigned) { 3086 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 3087 bool IsGPR = 3088 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; 3089 if (LoadMI && IsGPR) { 3090 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); 3091 unsigned BytesLoaded = MemOp->getSize(); 3092 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) 3093 return selectCopy(I, TII, MRI, TRI, RBI); 3094 } 3095 3096 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) 3097 // + SUBREG_TO_REG. 3098 // 3099 // If we are zero extending from 32 bits to 64 bits, it's possible that 3100 // the instruction implicitly does the zero extend for us. In that case, 3101 // we only need the SUBREG_TO_REG. 3102 if (IsGPR && SrcSize == 32 && DstSize == 64) { 3103 // Unlike with the G_LOAD case, we don't want to look through copies 3104 // here. (See isDef32.) 3105 MachineInstr *Def = MRI.getVRegDef(SrcReg); 3106 Register SubregToRegSrc = SrcReg; 3107 3108 // Does the instruction implicitly zero extend? 3109 if (!Def || !isDef32(*Def)) { 3110 // No. Zero out using an OR. 3111 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 3112 const Register ZReg = AArch64::WZR; 3113 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0); 3114 SubregToRegSrc = OrDst; 3115 } 3116 3117 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) 3118 .addImm(0) 3119 .addUse(SubregToRegSrc) 3120 .addImm(AArch64::sub_32); 3121 3122 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, 3123 MRI)) { 3124 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n"); 3125 return false; 3126 } 3127 3128 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 3129 MRI)) { 3130 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n"); 3131 return false; 3132 } 3133 3134 I.eraseFromParent(); 3135 return true; 3136 } 3137 } 3138 3139 if (DstSize == 64) { 3140 if (Opcode != TargetOpcode::G_SEXT_INREG) { 3141 // FIXME: Can we avoid manually doing this? 3142 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 3143 MRI)) { 3144 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) 3145 << " operand\n"); 3146 return false; 3147 } 3148 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, 3149 {&AArch64::GPR64RegClass}, {}) 3150 .addImm(0) 3151 .addUse(SrcReg) 3152 .addImm(AArch64::sub_32) 3153 .getReg(0); 3154 } 3155 3156 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, 3157 {DefReg}, {SrcReg}) 3158 .addImm(0) 3159 .addImm(SrcSize - 1); 3160 } else if (DstSize <= 32) { 3161 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, 3162 {DefReg}, {SrcReg}) 3163 .addImm(0) 3164 .addImm(SrcSize - 1); 3165 } else { 3166 return false; 3167 } 3168 3169 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 3170 I.eraseFromParent(); 3171 return true; 3172 } 3173 3174 case TargetOpcode::G_SITOFP: 3175 case TargetOpcode::G_UITOFP: 3176 case TargetOpcode::G_FPTOSI: 3177 case TargetOpcode::G_FPTOUI: { 3178 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), 3179 SrcTy = MRI.getType(I.getOperand(1).getReg()); 3180 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); 3181 if (NewOpc == Opcode) 3182 return false; 3183 3184 I.setDesc(TII.get(NewOpc)); 3185 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3186 3187 return true; 3188 } 3189 3190 case TargetOpcode::G_FREEZE: 3191 return selectCopy(I, TII, MRI, TRI, RBI); 3192 3193 case TargetOpcode::G_INTTOPTR: 3194 // The importer is currently unable to import pointer types since they 3195 // didn't exist in SelectionDAG. 3196 return selectCopy(I, TII, MRI, TRI, RBI); 3197 3198 case TargetOpcode::G_BITCAST: 3199 // Imported SelectionDAG rules can handle every bitcast except those that 3200 // bitcast from a type to the same type. Ideally, these shouldn't occur 3201 // but we might not run an optimizer that deletes them. The other exception 3202 // is bitcasts involving pointer types, as SelectionDAG has no knowledge 3203 // of them. 3204 return selectCopy(I, TII, MRI, TRI, RBI); 3205 3206 case TargetOpcode::G_SELECT: { 3207 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { 3208 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty 3209 << ", expected: " << LLT::scalar(1) << '\n'); 3210 return false; 3211 } 3212 3213 const Register CondReg = I.getOperand(1).getReg(); 3214 const Register TReg = I.getOperand(2).getReg(); 3215 const Register FReg = I.getOperand(3).getReg(); 3216 3217 if (tryOptSelect(I)) 3218 return true; 3219 3220 // Make sure to use an unused vreg instead of wzr, so that the peephole 3221 // optimizations will be able to optimize these. 3222 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 3223 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) 3224 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 3225 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 3226 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB)) 3227 return false; 3228 I.eraseFromParent(); 3229 return true; 3230 } 3231 case TargetOpcode::G_ICMP: { 3232 if (Ty.isVector()) 3233 return selectVectorICmp(I, MRI); 3234 3235 if (Ty != LLT::scalar(32)) { 3236 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty 3237 << ", expected: " << LLT::scalar(32) << '\n'); 3238 return false; 3239 } 3240 3241 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 3242 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), 3243 MIB); 3244 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIB); 3245 I.eraseFromParent(); 3246 return true; 3247 } 3248 3249 case TargetOpcode::G_FCMP: { 3250 CmpInst::Predicate Pred = 3251 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 3252 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB, 3253 Pred) || 3254 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB)) 3255 return false; 3256 I.eraseFromParent(); 3257 return true; 3258 } 3259 case TargetOpcode::G_VASTART: 3260 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) 3261 : selectVaStartAAPCS(I, MF, MRI); 3262 case TargetOpcode::G_INTRINSIC: 3263 return selectIntrinsic(I, MRI); 3264 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 3265 return selectIntrinsicWithSideEffects(I, MRI); 3266 case TargetOpcode::G_IMPLICIT_DEF: { 3267 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 3268 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3269 const Register DstReg = I.getOperand(0).getReg(); 3270 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 3271 const TargetRegisterClass *DstRC = 3272 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 3273 RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 3274 return true; 3275 } 3276 case TargetOpcode::G_BLOCK_ADDR: { 3277 if (TM.getCodeModel() == CodeModel::Large) { 3278 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); 3279 I.eraseFromParent(); 3280 return true; 3281 } else { 3282 I.setDesc(TII.get(AArch64::MOVaddrBA)); 3283 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), 3284 I.getOperand(0).getReg()) 3285 .addBlockAddress(I.getOperand(1).getBlockAddress(), 3286 /* Offset */ 0, AArch64II::MO_PAGE) 3287 .addBlockAddress( 3288 I.getOperand(1).getBlockAddress(), /* Offset */ 0, 3289 AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 3290 I.eraseFromParent(); 3291 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 3292 } 3293 } 3294 case AArch64::G_DUP: { 3295 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by 3296 // imported patterns. Do it manually here. Avoiding generating s16 gpr is 3297 // difficult because at RBS we may end up pessimizing the fpr case if we 3298 // decided to add an anyextend to fix this. Manual selection is the most 3299 // robust solution for now. 3300 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 3301 AArch64::GPRRegBankID) 3302 return false; // We expect the fpr regbank case to be imported. 3303 LLT VecTy = MRI.getType(I.getOperand(0).getReg()); 3304 if (VecTy == LLT::fixed_vector(8, 8)) 3305 I.setDesc(TII.get(AArch64::DUPv8i8gpr)); 3306 else if (VecTy == LLT::fixed_vector(16, 8)) 3307 I.setDesc(TII.get(AArch64::DUPv16i8gpr)); 3308 else if (VecTy == LLT::fixed_vector(4, 16)) 3309 I.setDesc(TII.get(AArch64::DUPv4i16gpr)); 3310 else if (VecTy == LLT::fixed_vector(8, 16)) 3311 I.setDesc(TII.get(AArch64::DUPv8i16gpr)); 3312 else 3313 return false; 3314 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3315 } 3316 case TargetOpcode::G_INTRINSIC_TRUNC: 3317 return selectIntrinsicTrunc(I, MRI); 3318 case TargetOpcode::G_INTRINSIC_ROUND: 3319 return selectIntrinsicRound(I, MRI); 3320 case TargetOpcode::G_BUILD_VECTOR: 3321 return selectBuildVector(I, MRI); 3322 case TargetOpcode::G_MERGE_VALUES: 3323 return selectMergeValues(I, MRI); 3324 case TargetOpcode::G_UNMERGE_VALUES: 3325 return selectUnmergeValues(I, MRI); 3326 case TargetOpcode::G_SHUFFLE_VECTOR: 3327 return selectShuffleVector(I, MRI); 3328 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 3329 return selectExtractElt(I, MRI); 3330 case TargetOpcode::G_INSERT_VECTOR_ELT: 3331 return selectInsertElt(I, MRI); 3332 case TargetOpcode::G_CONCAT_VECTORS: 3333 return selectConcatVectors(I, MRI); 3334 case TargetOpcode::G_JUMP_TABLE: 3335 return selectJumpTable(I, MRI); 3336 case TargetOpcode::G_VECREDUCE_FADD: 3337 case TargetOpcode::G_VECREDUCE_ADD: 3338 return selectReduction(I, MRI); 3339 } 3340 3341 return false; 3342 } 3343 3344 bool AArch64InstructionSelector::selectReduction(MachineInstr &I, 3345 MachineRegisterInfo &MRI) { 3346 Register VecReg = I.getOperand(1).getReg(); 3347 LLT VecTy = MRI.getType(VecReg); 3348 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { 3349 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit 3350 // a subregister copy afterwards. 3351 if (VecTy == LLT::fixed_vector(2, 32)) { 3352 Register DstReg = I.getOperand(0).getReg(); 3353 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass}, 3354 {VecReg, VecReg}); 3355 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 3356 .addReg(AddP.getReg(0), 0, AArch64::ssub) 3357 .getReg(0); 3358 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI); 3359 I.eraseFromParent(); 3360 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI); 3361 } 3362 3363 unsigned Opc = 0; 3364 if (VecTy == LLT::fixed_vector(16, 8)) 3365 Opc = AArch64::ADDVv16i8v; 3366 else if (VecTy == LLT::fixed_vector(8, 16)) 3367 Opc = AArch64::ADDVv8i16v; 3368 else if (VecTy == LLT::fixed_vector(4, 32)) 3369 Opc = AArch64::ADDVv4i32v; 3370 else if (VecTy == LLT::fixed_vector(2, 64)) 3371 Opc = AArch64::ADDPv2i64p; 3372 else { 3373 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction"); 3374 return false; 3375 } 3376 I.setDesc(TII.get(Opc)); 3377 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3378 } 3379 3380 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) { 3381 unsigned Opc = 0; 3382 if (VecTy == LLT::fixed_vector(2, 32)) 3383 Opc = AArch64::FADDPv2i32p; 3384 else if (VecTy == LLT::fixed_vector(2, 64)) 3385 Opc = AArch64::FADDPv2i64p; 3386 else { 3387 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction"); 3388 return false; 3389 } 3390 I.setDesc(TII.get(Opc)); 3391 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3392 } 3393 return false; 3394 } 3395 3396 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, 3397 MachineRegisterInfo &MRI) { 3398 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); 3399 Register JTAddr = I.getOperand(0).getReg(); 3400 unsigned JTI = I.getOperand(1).getIndex(); 3401 Register Index = I.getOperand(2).getReg(); 3402 3403 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3404 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 3405 3406 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); 3407 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, 3408 {TargetReg, ScratchReg}, {JTAddr, Index}) 3409 .addJumpTableIndex(JTI); 3410 // Build the indirect branch. 3411 MIB.buildInstr(AArch64::BR, {}, {TargetReg}); 3412 I.eraseFromParent(); 3413 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); 3414 } 3415 3416 bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I, 3417 MachineRegisterInfo &MRI) { 3418 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"); 3419 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!"); 3420 3421 Register DstReg = I.getOperand(0).getReg(); 3422 unsigned JTI = I.getOperand(1).getIndex(); 3423 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. 3424 auto MovMI = 3425 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) 3426 .addJumpTableIndex(JTI, AArch64II::MO_PAGE) 3427 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 3428 I.eraseFromParent(); 3429 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 3430 } 3431 3432 bool AArch64InstructionSelector::selectTLSGlobalValue( 3433 MachineInstr &I, MachineRegisterInfo &MRI) { 3434 if (!STI.isTargetMachO()) 3435 return false; 3436 MachineFunction &MF = *I.getParent()->getParent(); 3437 MF.getFrameInfo().setAdjustsStack(true); 3438 3439 const auto &GlobalOp = I.getOperand(1); 3440 assert(GlobalOp.getOffset() == 0 && 3441 "Shouldn't have an offset on TLS globals!"); 3442 const GlobalValue &GV = *GlobalOp.getGlobal(); 3443 3444 auto LoadGOT = 3445 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) 3446 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); 3447 3448 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, 3449 {LoadGOT.getReg(0)}) 3450 .addImm(0); 3451 3452 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); 3453 // TLS calls preserve all registers except those that absolutely must be 3454 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be 3455 // silly). 3456 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) 3457 .addUse(AArch64::X0, RegState::Implicit) 3458 .addDef(AArch64::X0, RegState::Implicit) 3459 .addRegMask(TRI.getTLSCallPreservedMask()); 3460 3461 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); 3462 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, 3463 MRI); 3464 I.eraseFromParent(); 3465 return true; 3466 } 3467 3468 bool AArch64InstructionSelector::selectIntrinsicTrunc( 3469 MachineInstr &I, MachineRegisterInfo &MRI) const { 3470 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 3471 3472 // Select the correct opcode. 3473 unsigned Opc = 0; 3474 if (!SrcTy.isVector()) { 3475 switch (SrcTy.getSizeInBits()) { 3476 default: 3477 case 16: 3478 Opc = AArch64::FRINTZHr; 3479 break; 3480 case 32: 3481 Opc = AArch64::FRINTZSr; 3482 break; 3483 case 64: 3484 Opc = AArch64::FRINTZDr; 3485 break; 3486 } 3487 } else { 3488 unsigned NumElts = SrcTy.getNumElements(); 3489 switch (SrcTy.getElementType().getSizeInBits()) { 3490 default: 3491 break; 3492 case 16: 3493 if (NumElts == 4) 3494 Opc = AArch64::FRINTZv4f16; 3495 else if (NumElts == 8) 3496 Opc = AArch64::FRINTZv8f16; 3497 break; 3498 case 32: 3499 if (NumElts == 2) 3500 Opc = AArch64::FRINTZv2f32; 3501 else if (NumElts == 4) 3502 Opc = AArch64::FRINTZv4f32; 3503 break; 3504 case 64: 3505 if (NumElts == 2) 3506 Opc = AArch64::FRINTZv2f64; 3507 break; 3508 } 3509 } 3510 3511 if (!Opc) { 3512 // Didn't get an opcode above, bail. 3513 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"); 3514 return false; 3515 } 3516 3517 // Legalization would have set us up perfectly for this; we just need to 3518 // set the opcode and move on. 3519 I.setDesc(TII.get(Opc)); 3520 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3521 } 3522 3523 bool AArch64InstructionSelector::selectIntrinsicRound( 3524 MachineInstr &I, MachineRegisterInfo &MRI) const { 3525 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 3526 3527 // Select the correct opcode. 3528 unsigned Opc = 0; 3529 if (!SrcTy.isVector()) { 3530 switch (SrcTy.getSizeInBits()) { 3531 default: 3532 case 16: 3533 Opc = AArch64::FRINTAHr; 3534 break; 3535 case 32: 3536 Opc = AArch64::FRINTASr; 3537 break; 3538 case 64: 3539 Opc = AArch64::FRINTADr; 3540 break; 3541 } 3542 } else { 3543 unsigned NumElts = SrcTy.getNumElements(); 3544 switch (SrcTy.getElementType().getSizeInBits()) { 3545 default: 3546 break; 3547 case 16: 3548 if (NumElts == 4) 3549 Opc = AArch64::FRINTAv4f16; 3550 else if (NumElts == 8) 3551 Opc = AArch64::FRINTAv8f16; 3552 break; 3553 case 32: 3554 if (NumElts == 2) 3555 Opc = AArch64::FRINTAv2f32; 3556 else if (NumElts == 4) 3557 Opc = AArch64::FRINTAv4f32; 3558 break; 3559 case 64: 3560 if (NumElts == 2) 3561 Opc = AArch64::FRINTAv2f64; 3562 break; 3563 } 3564 } 3565 3566 if (!Opc) { 3567 // Didn't get an opcode above, bail. 3568 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"); 3569 return false; 3570 } 3571 3572 // Legalization would have set us up perfectly for this; we just need to 3573 // set the opcode and move on. 3574 I.setDesc(TII.get(Opc)); 3575 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3576 } 3577 3578 bool AArch64InstructionSelector::selectVectorICmp( 3579 MachineInstr &I, MachineRegisterInfo &MRI) { 3580 Register DstReg = I.getOperand(0).getReg(); 3581 LLT DstTy = MRI.getType(DstReg); 3582 Register SrcReg = I.getOperand(2).getReg(); 3583 Register Src2Reg = I.getOperand(3).getReg(); 3584 LLT SrcTy = MRI.getType(SrcReg); 3585 3586 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); 3587 unsigned NumElts = DstTy.getNumElements(); 3588 3589 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b 3590 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 3591 // Third index is cc opcode: 3592 // 0 == eq 3593 // 1 == ugt 3594 // 2 == uge 3595 // 3 == ult 3596 // 4 == ule 3597 // 5 == sgt 3598 // 6 == sge 3599 // 7 == slt 3600 // 8 == sle 3601 // ne is done by negating 'eq' result. 3602 3603 // This table below assumes that for some comparisons the operands will be 3604 // commuted. 3605 // ult op == commute + ugt op 3606 // ule op == commute + uge op 3607 // slt op == commute + sgt op 3608 // sle op == commute + sge op 3609 unsigned PredIdx = 0; 3610 bool SwapOperands = false; 3611 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 3612 switch (Pred) { 3613 case CmpInst::ICMP_NE: 3614 case CmpInst::ICMP_EQ: 3615 PredIdx = 0; 3616 break; 3617 case CmpInst::ICMP_UGT: 3618 PredIdx = 1; 3619 break; 3620 case CmpInst::ICMP_UGE: 3621 PredIdx = 2; 3622 break; 3623 case CmpInst::ICMP_ULT: 3624 PredIdx = 3; 3625 SwapOperands = true; 3626 break; 3627 case CmpInst::ICMP_ULE: 3628 PredIdx = 4; 3629 SwapOperands = true; 3630 break; 3631 case CmpInst::ICMP_SGT: 3632 PredIdx = 5; 3633 break; 3634 case CmpInst::ICMP_SGE: 3635 PredIdx = 6; 3636 break; 3637 case CmpInst::ICMP_SLT: 3638 PredIdx = 7; 3639 SwapOperands = true; 3640 break; 3641 case CmpInst::ICMP_SLE: 3642 PredIdx = 8; 3643 SwapOperands = true; 3644 break; 3645 default: 3646 llvm_unreachable("Unhandled icmp predicate"); 3647 return false; 3648 } 3649 3650 // This table obviously should be tablegen'd when we have our GISel native 3651 // tablegen selector. 3652 3653 static const unsigned OpcTable[4][4][9] = { 3654 { 3655 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3656 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3657 0 /* invalid */}, 3658 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3659 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3660 0 /* invalid */}, 3661 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, 3662 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, 3663 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, 3664 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, 3665 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, 3666 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} 3667 }, 3668 { 3669 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3670 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3671 0 /* invalid */}, 3672 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, 3673 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, 3674 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, 3675 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, 3676 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, 3677 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, 3678 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3679 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3680 0 /* invalid */} 3681 }, 3682 { 3683 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, 3684 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, 3685 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, 3686 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, 3687 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, 3688 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, 3689 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3690 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3691 0 /* invalid */}, 3692 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3693 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3694 0 /* invalid */} 3695 }, 3696 { 3697 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, 3698 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, 3699 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, 3700 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3701 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3702 0 /* invalid */}, 3703 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3704 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3705 0 /* invalid */}, 3706 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3707 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3708 0 /* invalid */} 3709 }, 3710 }; 3711 unsigned EltIdx = Log2_32(SrcEltSize / 8); 3712 unsigned NumEltsIdx = Log2_32(NumElts / 2); 3713 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; 3714 if (!Opc) { 3715 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode"); 3716 return false; 3717 } 3718 3719 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); 3720 const TargetRegisterClass *SrcRC = 3721 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); 3722 if (!SrcRC) { 3723 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 3724 return false; 3725 } 3726 3727 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; 3728 if (SrcTy.getSizeInBits() == 128) 3729 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; 3730 3731 if (SwapOperands) 3732 std::swap(SrcReg, Src2Reg); 3733 3734 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); 3735 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3736 3737 // Invert if we had a 'ne' cc. 3738 if (NotOpc) { 3739 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); 3740 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3741 } else { 3742 MIB.buildCopy(DstReg, Cmp.getReg(0)); 3743 } 3744 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); 3745 I.eraseFromParent(); 3746 return true; 3747 } 3748 3749 MachineInstr *AArch64InstructionSelector::emitScalarToVector( 3750 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, 3751 MachineIRBuilder &MIRBuilder) const { 3752 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); 3753 3754 auto BuildFn = [&](unsigned SubregIndex) { 3755 auto Ins = 3756 MIRBuilder 3757 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) 3758 .addImm(SubregIndex); 3759 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); 3760 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); 3761 return &*Ins; 3762 }; 3763 3764 switch (EltSize) { 3765 case 16: 3766 return BuildFn(AArch64::hsub); 3767 case 32: 3768 return BuildFn(AArch64::ssub); 3769 case 64: 3770 return BuildFn(AArch64::dsub); 3771 default: 3772 return nullptr; 3773 } 3774 } 3775 3776 bool AArch64InstructionSelector::selectMergeValues( 3777 MachineInstr &I, MachineRegisterInfo &MRI) { 3778 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"); 3779 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3780 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 3781 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); 3782 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 3783 3784 if (I.getNumOperands() != 3) 3785 return false; 3786 3787 // Merging 2 s64s into an s128. 3788 if (DstTy == LLT::scalar(128)) { 3789 if (SrcTy.getSizeInBits() != 64) 3790 return false; 3791 Register DstReg = I.getOperand(0).getReg(); 3792 Register Src1Reg = I.getOperand(1).getReg(); 3793 Register Src2Reg = I.getOperand(2).getReg(); 3794 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); 3795 MachineInstr *InsMI = 3796 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); 3797 if (!InsMI) 3798 return false; 3799 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), 3800 Src2Reg, /* LaneIdx */ 1, RB, MIB); 3801 if (!Ins2MI) 3802 return false; 3803 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 3804 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); 3805 I.eraseFromParent(); 3806 return true; 3807 } 3808 3809 if (RB.getID() != AArch64::GPRRegBankID) 3810 return false; 3811 3812 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) 3813 return false; 3814 3815 auto *DstRC = &AArch64::GPR64RegClass; 3816 Register SubToRegDef = MRI.createVirtualRegister(DstRC); 3817 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 3818 TII.get(TargetOpcode::SUBREG_TO_REG)) 3819 .addDef(SubToRegDef) 3820 .addImm(0) 3821 .addUse(I.getOperand(1).getReg()) 3822 .addImm(AArch64::sub_32); 3823 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); 3824 // Need to anyext the second scalar before we can use bfm 3825 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 3826 TII.get(TargetOpcode::SUBREG_TO_REG)) 3827 .addDef(SubToRegDef2) 3828 .addImm(0) 3829 .addUse(I.getOperand(2).getReg()) 3830 .addImm(AArch64::sub_32); 3831 MachineInstr &BFM = 3832 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) 3833 .addDef(I.getOperand(0).getReg()) 3834 .addUse(SubToRegDef) 3835 .addUse(SubToRegDef2) 3836 .addImm(32) 3837 .addImm(31); 3838 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); 3839 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); 3840 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); 3841 I.eraseFromParent(); 3842 return true; 3843 } 3844 3845 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, 3846 const unsigned EltSize) { 3847 // Choose a lane copy opcode and subregister based off of the size of the 3848 // vector's elements. 3849 switch (EltSize) { 3850 case 16: 3851 CopyOpc = AArch64::CPYi16; 3852 ExtractSubReg = AArch64::hsub; 3853 break; 3854 case 32: 3855 CopyOpc = AArch64::CPYi32; 3856 ExtractSubReg = AArch64::ssub; 3857 break; 3858 case 64: 3859 CopyOpc = AArch64::CPYi64; 3860 ExtractSubReg = AArch64::dsub; 3861 break; 3862 default: 3863 // Unknown size, bail out. 3864 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n"); 3865 return false; 3866 } 3867 return true; 3868 } 3869 3870 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( 3871 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, 3872 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { 3873 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 3874 unsigned CopyOpc = 0; 3875 unsigned ExtractSubReg = 0; 3876 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { 3877 LLVM_DEBUG( 3878 dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); 3879 return nullptr; 3880 } 3881 3882 const TargetRegisterClass *DstRC = 3883 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); 3884 if (!DstRC) { 3885 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); 3886 return nullptr; 3887 } 3888 3889 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); 3890 const LLT &VecTy = MRI.getType(VecReg); 3891 const TargetRegisterClass *VecRC = 3892 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); 3893 if (!VecRC) { 3894 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 3895 return nullptr; 3896 } 3897 3898 // The register that we're going to copy into. 3899 Register InsertReg = VecReg; 3900 if (!DstReg) 3901 DstReg = MRI.createVirtualRegister(DstRC); 3902 // If the lane index is 0, we just use a subregister COPY. 3903 if (LaneIdx == 0) { 3904 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) 3905 .addReg(VecReg, 0, ExtractSubReg); 3906 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 3907 return &*Copy; 3908 } 3909 3910 // Lane copies require 128-bit wide registers. If we're dealing with an 3911 // unpacked vector, then we need to move up to that width. Insert an implicit 3912 // def and a subregister insert to get us there. 3913 if (VecTy.getSizeInBits() != 128) { 3914 MachineInstr *ScalarToVector = emitScalarToVector( 3915 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); 3916 if (!ScalarToVector) 3917 return nullptr; 3918 InsertReg = ScalarToVector->getOperand(0).getReg(); 3919 } 3920 3921 MachineInstr *LaneCopyMI = 3922 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); 3923 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); 3924 3925 // Make sure that we actually constrain the initial copy. 3926 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 3927 return LaneCopyMI; 3928 } 3929 3930 bool AArch64InstructionSelector::selectExtractElt( 3931 MachineInstr &I, MachineRegisterInfo &MRI) { 3932 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && 3933 "unexpected opcode!"); 3934 Register DstReg = I.getOperand(0).getReg(); 3935 const LLT NarrowTy = MRI.getType(DstReg); 3936 const Register SrcReg = I.getOperand(1).getReg(); 3937 const LLT WideTy = MRI.getType(SrcReg); 3938 (void)WideTy; 3939 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && 3940 "source register size too small!"); 3941 assert(!NarrowTy.isVector() && "cannot extract vector into vector!"); 3942 3943 // Need the lane index to determine the correct copy opcode. 3944 MachineOperand &LaneIdxOp = I.getOperand(2); 3945 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?"); 3946 3947 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 3948 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n"); 3949 return false; 3950 } 3951 3952 // Find the index to extract from. 3953 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); 3954 if (!VRegAndVal) 3955 return false; 3956 unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 3957 3958 3959 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 3960 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, 3961 LaneIdx, MIB); 3962 if (!Extract) 3963 return false; 3964 3965 I.eraseFromParent(); 3966 return true; 3967 } 3968 3969 bool AArch64InstructionSelector::selectSplitVectorUnmerge( 3970 MachineInstr &I, MachineRegisterInfo &MRI) { 3971 unsigned NumElts = I.getNumOperands() - 1; 3972 Register SrcReg = I.getOperand(NumElts).getReg(); 3973 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3974 const LLT SrcTy = MRI.getType(SrcReg); 3975 3976 assert(NarrowTy.isVector() && "Expected an unmerge into vectors"); 3977 if (SrcTy.getSizeInBits() > 128) { 3978 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge"); 3979 return false; 3980 } 3981 3982 // We implement a split vector operation by treating the sub-vectors as 3983 // scalars and extracting them. 3984 const RegisterBank &DstRB = 3985 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); 3986 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { 3987 Register Dst = I.getOperand(OpIdx).getReg(); 3988 MachineInstr *Extract = 3989 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); 3990 if (!Extract) 3991 return false; 3992 } 3993 I.eraseFromParent(); 3994 return true; 3995 } 3996 3997 bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, 3998 MachineRegisterInfo &MRI) { 3999 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 4000 "unexpected opcode"); 4001 4002 // TODO: Handle unmerging into GPRs and from scalars to scalars. 4003 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 4004 AArch64::FPRRegBankID || 4005 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 4006 AArch64::FPRRegBankID) { 4007 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " 4008 "currently unsupported.\n"); 4009 return false; 4010 } 4011 4012 // The last operand is the vector source register, and every other operand is 4013 // a register to unpack into. 4014 unsigned NumElts = I.getNumOperands() - 1; 4015 Register SrcReg = I.getOperand(NumElts).getReg(); 4016 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 4017 const LLT WideTy = MRI.getType(SrcReg); 4018 (void)WideTy; 4019 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && 4020 "can only unmerge from vector or s128 types!"); 4021 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && 4022 "source register size too small!"); 4023 4024 if (!NarrowTy.isScalar()) 4025 return selectSplitVectorUnmerge(I, MRI); 4026 4027 // Choose a lane copy opcode and subregister based off of the size of the 4028 // vector's elements. 4029 unsigned CopyOpc = 0; 4030 unsigned ExtractSubReg = 0; 4031 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) 4032 return false; 4033 4034 // Set up for the lane copies. 4035 MachineBasicBlock &MBB = *I.getParent(); 4036 4037 // Stores the registers we'll be copying from. 4038 SmallVector<Register, 4> InsertRegs; 4039 4040 // We'll use the first register twice, so we only need NumElts-1 registers. 4041 unsigned NumInsertRegs = NumElts - 1; 4042 4043 // If our elements fit into exactly 128 bits, then we can copy from the source 4044 // directly. Otherwise, we need to do a bit of setup with some subregister 4045 // inserts. 4046 if (NarrowTy.getSizeInBits() * NumElts == 128) { 4047 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); 4048 } else { 4049 // No. We have to perform subregister inserts. For each insert, create an 4050 // implicit def and a subregister insert, and save the register we create. 4051 const TargetRegisterClass *RC = 4052 getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI), 4053 WideTy.getScalarSizeInBits() * NumElts); 4054 unsigned SubReg = 0; 4055 bool Found = getSubRegForClass(RC, TRI, SubReg); 4056 (void)Found; 4057 assert(Found && "expected to find last operand's subeg idx"); 4058 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { 4059 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 4060 MachineInstr &ImpDefMI = 4061 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), 4062 ImpDefReg); 4063 4064 // Now, create the subregister insert from SrcReg. 4065 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 4066 MachineInstr &InsMI = 4067 *BuildMI(MBB, I, I.getDebugLoc(), 4068 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) 4069 .addUse(ImpDefReg) 4070 .addUse(SrcReg) 4071 .addImm(SubReg); 4072 4073 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); 4074 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); 4075 4076 // Save the register so that we can copy from it after. 4077 InsertRegs.push_back(InsertReg); 4078 } 4079 } 4080 4081 // Now that we've created any necessary subregister inserts, we can 4082 // create the copies. 4083 // 4084 // Perform the first copy separately as a subregister copy. 4085 Register CopyTo = I.getOperand(0).getReg(); 4086 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) 4087 .addReg(InsertRegs[0], 0, ExtractSubReg); 4088 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); 4089 4090 // Now, perform the remaining copies as vector lane copies. 4091 unsigned LaneIdx = 1; 4092 for (Register InsReg : InsertRegs) { 4093 Register CopyTo = I.getOperand(LaneIdx).getReg(); 4094 MachineInstr &CopyInst = 4095 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) 4096 .addUse(InsReg) 4097 .addImm(LaneIdx); 4098 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); 4099 ++LaneIdx; 4100 } 4101 4102 // Separately constrain the first copy's destination. Because of the 4103 // limitation in constrainOperandRegClass, we can't guarantee that this will 4104 // actually be constrained. So, do it ourselves using the second operand. 4105 const TargetRegisterClass *RC = 4106 MRI.getRegClassOrNull(I.getOperand(1).getReg()); 4107 if (!RC) { 4108 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n"); 4109 return false; 4110 } 4111 4112 RBI.constrainGenericRegister(CopyTo, *RC, MRI); 4113 I.eraseFromParent(); 4114 return true; 4115 } 4116 4117 bool AArch64InstructionSelector::selectConcatVectors( 4118 MachineInstr &I, MachineRegisterInfo &MRI) { 4119 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 4120 "Unexpected opcode"); 4121 Register Dst = I.getOperand(0).getReg(); 4122 Register Op1 = I.getOperand(1).getReg(); 4123 Register Op2 = I.getOperand(2).getReg(); 4124 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB); 4125 if (!ConcatMI) 4126 return false; 4127 I.eraseFromParent(); 4128 return true; 4129 } 4130 4131 unsigned 4132 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, 4133 MachineFunction &MF) const { 4134 Type *CPTy = CPVal->getType(); 4135 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy); 4136 4137 MachineConstantPool *MCP = MF.getConstantPool(); 4138 return MCP->getConstantPoolIndex(CPVal, Alignment); 4139 } 4140 4141 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( 4142 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { 4143 auto &MF = MIRBuilder.getMF(); 4144 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF); 4145 4146 auto Adrp = 4147 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) 4148 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 4149 4150 MachineInstr *LoadMI = nullptr; 4151 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF); 4152 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType()); 4153 switch (Size) { 4154 case 16: 4155 LoadMI = 4156 &*MIRBuilder 4157 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp}) 4158 .addConstantPoolIndex(CPIdx, 0, 4159 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 4160 break; 4161 case 8: 4162 LoadMI = 4163 &*MIRBuilder 4164 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) 4165 .addConstantPoolIndex(CPIdx, 0, 4166 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 4167 break; 4168 case 4: 4169 LoadMI = 4170 &*MIRBuilder 4171 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp}) 4172 .addConstantPoolIndex(CPIdx, 0, 4173 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 4174 break; 4175 default: 4176 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " 4177 << *CPVal->getType()); 4178 return nullptr; 4179 } 4180 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo, 4181 MachineMemOperand::MOLoad, 4182 Size, Align(Size))); 4183 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); 4184 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); 4185 return LoadMI; 4186 } 4187 4188 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given 4189 /// size and RB. 4190 static std::pair<unsigned, unsigned> 4191 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { 4192 unsigned Opc, SubregIdx; 4193 if (RB.getID() == AArch64::GPRRegBankID) { 4194 if (EltSize == 16) { 4195 Opc = AArch64::INSvi16gpr; 4196 SubregIdx = AArch64::ssub; 4197 } else if (EltSize == 32) { 4198 Opc = AArch64::INSvi32gpr; 4199 SubregIdx = AArch64::ssub; 4200 } else if (EltSize == 64) { 4201 Opc = AArch64::INSvi64gpr; 4202 SubregIdx = AArch64::dsub; 4203 } else { 4204 llvm_unreachable("invalid elt size!"); 4205 } 4206 } else { 4207 if (EltSize == 8) { 4208 Opc = AArch64::INSvi8lane; 4209 SubregIdx = AArch64::bsub; 4210 } else if (EltSize == 16) { 4211 Opc = AArch64::INSvi16lane; 4212 SubregIdx = AArch64::hsub; 4213 } else if (EltSize == 32) { 4214 Opc = AArch64::INSvi32lane; 4215 SubregIdx = AArch64::ssub; 4216 } else if (EltSize == 64) { 4217 Opc = AArch64::INSvi64lane; 4218 SubregIdx = AArch64::dsub; 4219 } else { 4220 llvm_unreachable("invalid elt size!"); 4221 } 4222 } 4223 return std::make_pair(Opc, SubregIdx); 4224 } 4225 4226 MachineInstr *AArch64InstructionSelector::emitInstr( 4227 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, 4228 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, 4229 const ComplexRendererFns &RenderFns) const { 4230 assert(Opcode && "Expected an opcode?"); 4231 assert(!isPreISelGenericOpcode(Opcode) && 4232 "Function should only be used to produce selected instructions!"); 4233 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); 4234 if (RenderFns) 4235 for (auto &Fn : *RenderFns) 4236 Fn(MI); 4237 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); 4238 return &*MI; 4239 } 4240 4241 MachineInstr *AArch64InstructionSelector::emitAddSub( 4242 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 4243 Register Dst, MachineOperand &LHS, MachineOperand &RHS, 4244 MachineIRBuilder &MIRBuilder) const { 4245 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4246 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 4247 auto Ty = MRI.getType(LHS.getReg()); 4248 assert(!Ty.isVector() && "Expected a scalar or pointer?"); 4249 unsigned Size = Ty.getSizeInBits(); 4250 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"); 4251 bool Is32Bit = Size == 32; 4252 4253 // INSTRri form with positive arithmetic immediate. 4254 if (auto Fns = selectArithImmed(RHS)) 4255 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, 4256 MIRBuilder, Fns); 4257 4258 // INSTRri form with negative arithmetic immediate. 4259 if (auto Fns = selectNegArithImmed(RHS)) 4260 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, 4261 MIRBuilder, Fns); 4262 4263 // INSTRrx form. 4264 if (auto Fns = selectArithExtendedRegister(RHS)) 4265 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, 4266 MIRBuilder, Fns); 4267 4268 // INSTRrs form. 4269 if (auto Fns = selectShiftedRegister(RHS)) 4270 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, 4271 MIRBuilder, Fns); 4272 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, 4273 MIRBuilder); 4274 } 4275 4276 MachineInstr * 4277 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, 4278 MachineOperand &RHS, 4279 MachineIRBuilder &MIRBuilder) const { 4280 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4281 {{AArch64::ADDXri, AArch64::ADDWri}, 4282 {AArch64::ADDXrs, AArch64::ADDWrs}, 4283 {AArch64::ADDXrr, AArch64::ADDWrr}, 4284 {AArch64::SUBXri, AArch64::SUBWri}, 4285 {AArch64::ADDXrx, AArch64::ADDWrx}}}; 4286 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); 4287 } 4288 4289 MachineInstr * 4290 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, 4291 MachineOperand &RHS, 4292 MachineIRBuilder &MIRBuilder) const { 4293 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4294 {{AArch64::ADDSXri, AArch64::ADDSWri}, 4295 {AArch64::ADDSXrs, AArch64::ADDSWrs}, 4296 {AArch64::ADDSXrr, AArch64::ADDSWrr}, 4297 {AArch64::SUBSXri, AArch64::SUBSWri}, 4298 {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; 4299 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 4300 } 4301 4302 MachineInstr * 4303 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, 4304 MachineOperand &RHS, 4305 MachineIRBuilder &MIRBuilder) const { 4306 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4307 {{AArch64::SUBSXri, AArch64::SUBSWri}, 4308 {AArch64::SUBSXrs, AArch64::SUBSWrs}, 4309 {AArch64::SUBSXrr, AArch64::SUBSWrr}, 4310 {AArch64::ADDSXri, AArch64::ADDSWri}, 4311 {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; 4312 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 4313 } 4314 4315 MachineInstr * 4316 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, 4317 MachineIRBuilder &MIRBuilder) const { 4318 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4319 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); 4320 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; 4321 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); 4322 } 4323 4324 MachineInstr * 4325 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, 4326 MachineIRBuilder &MIRBuilder) const { 4327 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 4328 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4329 LLT Ty = MRI.getType(LHS.getReg()); 4330 unsigned RegSize = Ty.getSizeInBits(); 4331 bool Is32Bit = (RegSize == 32); 4332 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, 4333 {AArch64::ANDSXrs, AArch64::ANDSWrs}, 4334 {AArch64::ANDSXrr, AArch64::ANDSWrr}}; 4335 // ANDS needs a logical immediate for its immediate form. Check if we can 4336 // fold one in. 4337 if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { 4338 int64_t Imm = ValAndVReg->Value.getSExtValue(); 4339 4340 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { 4341 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); 4342 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 4343 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 4344 return &*TstMI; 4345 } 4346 } 4347 4348 if (auto Fns = selectLogicalShiftedRegister(RHS)) 4349 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); 4350 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); 4351 } 4352 4353 MachineInstr *AArch64InstructionSelector::emitIntegerCompare( 4354 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 4355 MachineIRBuilder &MIRBuilder) const { 4356 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 4357 assert(Predicate.isPredicate() && "Expected predicate?"); 4358 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4359 LLT CmpTy = MRI.getType(LHS.getReg()); 4360 assert(!CmpTy.isVector() && "Expected scalar or pointer"); 4361 unsigned Size = CmpTy.getSizeInBits(); 4362 (void)Size; 4363 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"); 4364 // Fold the compare into a cmn or tst if possible. 4365 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) 4366 return FoldCmp; 4367 auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); 4368 return emitSUBS(Dst, LHS, RHS, MIRBuilder); 4369 } 4370 4371 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( 4372 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { 4373 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4374 #ifndef NDEBUG 4375 LLT Ty = MRI.getType(Dst); 4376 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && 4377 "Expected a 32-bit scalar register?"); 4378 #endif 4379 const Register ZeroReg = AArch64::WZR; 4380 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) { 4381 auto CSet = 4382 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg}) 4383 .addImm(getInvertedCondCode(CC)); 4384 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI); 4385 return &*CSet; 4386 }; 4387 4388 AArch64CC::CondCode CC1, CC2; 4389 changeFCMPPredToAArch64CC(Pred, CC1, CC2); 4390 if (CC2 == AArch64CC::AL) 4391 return EmitCSet(Dst, CC1); 4392 4393 const TargetRegisterClass *RC = &AArch64::GPR32RegClass; 4394 Register Def1Reg = MRI.createVirtualRegister(RC); 4395 Register Def2Reg = MRI.createVirtualRegister(RC); 4396 EmitCSet(Def1Reg, CC1); 4397 EmitCSet(Def2Reg, CC2); 4398 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); 4399 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); 4400 return &*OrMI; 4401 } 4402 4403 MachineInstr * 4404 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, 4405 MachineIRBuilder &MIRBuilder, 4406 Optional<CmpInst::Predicate> Pred) const { 4407 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4408 LLT Ty = MRI.getType(LHS); 4409 if (Ty.isVector()) 4410 return nullptr; 4411 unsigned OpSize = Ty.getSizeInBits(); 4412 if (OpSize != 32 && OpSize != 64) 4413 return nullptr; 4414 4415 // If this is a compare against +0.0, then we don't have 4416 // to explicitly materialize a constant. 4417 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); 4418 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); 4419 4420 auto IsEqualityPred = [](CmpInst::Predicate P) { 4421 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || 4422 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; 4423 }; 4424 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { 4425 // Try commutating the operands. 4426 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); 4427 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { 4428 ShouldUseImm = true; 4429 std::swap(LHS, RHS); 4430 } 4431 } 4432 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, 4433 {AArch64::FCMPSri, AArch64::FCMPDri}}; 4434 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; 4435 4436 // Partially build the compare. Decide if we need to add a use for the 4437 // third operand based off whether or not we're comparing against 0.0. 4438 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); 4439 if (!ShouldUseImm) 4440 CmpMI.addUse(RHS); 4441 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 4442 return &*CmpMI; 4443 } 4444 4445 MachineInstr *AArch64InstructionSelector::emitVectorConcat( 4446 Optional<Register> Dst, Register Op1, Register Op2, 4447 MachineIRBuilder &MIRBuilder) const { 4448 // We implement a vector concat by: 4449 // 1. Use scalar_to_vector to insert the lower vector into the larger dest 4450 // 2. Insert the upper vector into the destination's upper element 4451 // TODO: some of this code is common with G_BUILD_VECTOR handling. 4452 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4453 4454 const LLT Op1Ty = MRI.getType(Op1); 4455 const LLT Op2Ty = MRI.getType(Op2); 4456 4457 if (Op1Ty != Op2Ty) { 4458 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys"); 4459 return nullptr; 4460 } 4461 assert(Op1Ty.isVector() && "Expected a vector for vector concat"); 4462 4463 if (Op1Ty.getSizeInBits() >= 128) { 4464 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors"); 4465 return nullptr; 4466 } 4467 4468 // At the moment we just support 64 bit vector concats. 4469 if (Op1Ty.getSizeInBits() != 64) { 4470 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors"); 4471 return nullptr; 4472 } 4473 4474 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); 4475 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); 4476 const TargetRegisterClass *DstRC = 4477 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); 4478 4479 MachineInstr *WidenedOp1 = 4480 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); 4481 MachineInstr *WidenedOp2 = 4482 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); 4483 if (!WidenedOp1 || !WidenedOp2) { 4484 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value"); 4485 return nullptr; 4486 } 4487 4488 // Now do the insert of the upper element. 4489 unsigned InsertOpc, InsSubRegIdx; 4490 std::tie(InsertOpc, InsSubRegIdx) = 4491 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); 4492 4493 if (!Dst) 4494 Dst = MRI.createVirtualRegister(DstRC); 4495 auto InsElt = 4496 MIRBuilder 4497 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) 4498 .addImm(1) /* Lane index */ 4499 .addUse(WidenedOp2->getOperand(0).getReg()) 4500 .addImm(0); 4501 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 4502 return &*InsElt; 4503 } 4504 4505 MachineInstr * 4506 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, 4507 MachineIRBuilder &MIRBuilder, 4508 Register SrcReg) const { 4509 // CSINC increments the result when the predicate is false. Invert it. 4510 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( 4511 CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); 4512 auto I = MIRBuilder.buildInstr(AArch64::CSINCWr, {DefReg}, {SrcReg, SrcReg}) 4513 .addImm(InvCC); 4514 constrainSelectedInstRegOperands(*I, TII, TRI, RBI); 4515 return &*I; 4516 } 4517 4518 std::pair<MachineInstr *, AArch64CC::CondCode> 4519 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, 4520 MachineOperand &LHS, 4521 MachineOperand &RHS, 4522 MachineIRBuilder &MIRBuilder) const { 4523 switch (Opcode) { 4524 default: 4525 llvm_unreachable("Unexpected opcode!"); 4526 case TargetOpcode::G_SADDO: 4527 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 4528 case TargetOpcode::G_UADDO: 4529 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); 4530 case TargetOpcode::G_SSUBO: 4531 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 4532 case TargetOpcode::G_USUBO: 4533 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); 4534 } 4535 } 4536 4537 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) { 4538 MachineRegisterInfo &MRI = *MIB.getMRI(); 4539 // We want to recognize this pattern: 4540 // 4541 // $z = G_FCMP pred, $x, $y 4542 // ... 4543 // $w = G_SELECT $z, $a, $b 4544 // 4545 // Where the value of $z is *only* ever used by the G_SELECT (possibly with 4546 // some copies/truncs in between.) 4547 // 4548 // If we see this, then we can emit something like this: 4549 // 4550 // fcmp $x, $y 4551 // fcsel $w, $a, $b, pred 4552 // 4553 // Rather than emitting both of the rather long sequences in the standard 4554 // G_FCMP/G_SELECT select methods. 4555 4556 // First, check if the condition is defined by a compare. 4557 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); 4558 while (CondDef) { 4559 // We can only fold if all of the defs have one use. 4560 Register CondDefReg = CondDef->getOperand(0).getReg(); 4561 if (!MRI.hasOneNonDBGUse(CondDefReg)) { 4562 // Unless it's another select. 4563 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { 4564 if (CondDef == &UI) 4565 continue; 4566 if (UI.getOpcode() != TargetOpcode::G_SELECT) 4567 return false; 4568 } 4569 } 4570 4571 // We can skip over G_TRUNC since the condition is 1-bit. 4572 // Truncating/extending can have no impact on the value. 4573 unsigned Opc = CondDef->getOpcode(); 4574 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) 4575 break; 4576 4577 // Can't see past copies from physregs. 4578 if (Opc == TargetOpcode::COPY && 4579 Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) 4580 return false; 4581 4582 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); 4583 } 4584 4585 // Is the condition defined by a compare? 4586 if (!CondDef) 4587 return false; 4588 4589 unsigned CondOpc = CondDef->getOpcode(); 4590 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) 4591 return false; 4592 4593 AArch64CC::CondCode CondCode; 4594 if (CondOpc == TargetOpcode::G_ICMP) { 4595 auto Pred = 4596 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 4597 CondCode = changeICMPPredToAArch64CC(Pred); 4598 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), 4599 CondDef->getOperand(1), MIB); 4600 } else { 4601 // Get the condition code for the select. 4602 auto Pred = 4603 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 4604 AArch64CC::CondCode CondCode2; 4605 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); 4606 4607 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two 4608 // instructions to emit the comparison. 4609 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be 4610 // unnecessary. 4611 if (CondCode2 != AArch64CC::AL) 4612 return false; 4613 4614 if (!emitFPCompare(CondDef->getOperand(2).getReg(), 4615 CondDef->getOperand(3).getReg(), MIB)) { 4616 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); 4617 return false; 4618 } 4619 } 4620 4621 // Emit the select. 4622 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), 4623 I.getOperand(3).getReg(), CondCode, MIB); 4624 I.eraseFromParent(); 4625 return true; 4626 } 4627 4628 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( 4629 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 4630 MachineIRBuilder &MIRBuilder) const { 4631 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && 4632 "Unexpected MachineOperand"); 4633 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4634 // We want to find this sort of thing: 4635 // x = G_SUB 0, y 4636 // G_ICMP z, x 4637 // 4638 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. 4639 // e.g: 4640 // 4641 // cmn z, y 4642 4643 // Check if the RHS or LHS of the G_ICMP is defined by a SUB 4644 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); 4645 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); 4646 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate()); 4647 // Given this: 4648 // 4649 // x = G_SUB 0, y 4650 // G_ICMP x, z 4651 // 4652 // Produce this: 4653 // 4654 // cmn y, z 4655 if (isCMN(LHSDef, P, MRI)) 4656 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); 4657 4658 // Same idea here, but with the RHS of the compare instead: 4659 // 4660 // Given this: 4661 // 4662 // x = G_SUB 0, y 4663 // G_ICMP z, x 4664 // 4665 // Produce this: 4666 // 4667 // cmn z, y 4668 if (isCMN(RHSDef, P, MRI)) 4669 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); 4670 4671 // Given this: 4672 // 4673 // z = G_AND x, y 4674 // G_ICMP z, 0 4675 // 4676 // Produce this if the compare is signed: 4677 // 4678 // tst x, y 4679 if (!CmpInst::isUnsigned(P) && LHSDef && 4680 LHSDef->getOpcode() == TargetOpcode::G_AND) { 4681 // Make sure that the RHS is 0. 4682 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI); 4683 if (!ValAndVReg || ValAndVReg->Value != 0) 4684 return nullptr; 4685 4686 return emitTST(LHSDef->getOperand(1), 4687 LHSDef->getOperand(2), MIRBuilder); 4688 } 4689 4690 return nullptr; 4691 } 4692 4693 bool AArch64InstructionSelector::selectShuffleVector( 4694 MachineInstr &I, MachineRegisterInfo &MRI) { 4695 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 4696 Register Src1Reg = I.getOperand(1).getReg(); 4697 const LLT Src1Ty = MRI.getType(Src1Reg); 4698 Register Src2Reg = I.getOperand(2).getReg(); 4699 const LLT Src2Ty = MRI.getType(Src2Reg); 4700 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); 4701 4702 MachineBasicBlock &MBB = *I.getParent(); 4703 MachineFunction &MF = *MBB.getParent(); 4704 LLVMContext &Ctx = MF.getFunction().getContext(); 4705 4706 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if 4707 // it's originated from a <1 x T> type. Those should have been lowered into 4708 // G_BUILD_VECTOR earlier. 4709 if (!Src1Ty.isVector() || !Src2Ty.isVector()) { 4710 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"); 4711 return false; 4712 } 4713 4714 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; 4715 4716 SmallVector<Constant *, 64> CstIdxs; 4717 for (int Val : Mask) { 4718 // For now, any undef indexes we'll just assume to be 0. This should be 4719 // optimized in future, e.g. to select DUP etc. 4720 Val = Val < 0 ? 0 : Val; 4721 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { 4722 unsigned Offset = Byte + Val * BytesPerElt; 4723 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); 4724 } 4725 } 4726 4727 // Use a constant pool to load the index vector for TBL. 4728 Constant *CPVal = ConstantVector::get(CstIdxs); 4729 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB); 4730 if (!IndexLoad) { 4731 LLVM_DEBUG(dbgs() << "Could not load from a constant pool"); 4732 return false; 4733 } 4734 4735 if (DstTy.getSizeInBits() != 128) { 4736 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); 4737 // This case can be done with TBL1. 4738 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB); 4739 if (!Concat) { 4740 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); 4741 return false; 4742 } 4743 4744 // The constant pool load will be 64 bits, so need to convert to FPR128 reg. 4745 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass, 4746 IndexLoad->getOperand(0).getReg(), MIB); 4747 4748 auto TBL1 = MIB.buildInstr( 4749 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, 4750 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); 4751 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); 4752 4753 auto Copy = 4754 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 4755 .addReg(TBL1.getReg(0), 0, AArch64::dsub); 4756 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); 4757 I.eraseFromParent(); 4758 return true; 4759 } 4760 4761 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive 4762 // Q registers for regalloc. 4763 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg}; 4764 auto RegSeq = createQTuple(Regs, MIB); 4765 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, 4766 {RegSeq, IndexLoad->getOperand(0)}); 4767 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); 4768 I.eraseFromParent(); 4769 return true; 4770 } 4771 4772 MachineInstr *AArch64InstructionSelector::emitLaneInsert( 4773 Optional<Register> DstReg, Register SrcReg, Register EltReg, 4774 unsigned LaneIdx, const RegisterBank &RB, 4775 MachineIRBuilder &MIRBuilder) const { 4776 MachineInstr *InsElt = nullptr; 4777 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 4778 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4779 4780 // Create a register to define with the insert if one wasn't passed in. 4781 if (!DstReg) 4782 DstReg = MRI.createVirtualRegister(DstRC); 4783 4784 unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); 4785 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; 4786 4787 if (RB.getID() == AArch64::FPRRegBankID) { 4788 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); 4789 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 4790 .addImm(LaneIdx) 4791 .addUse(InsSub->getOperand(0).getReg()) 4792 .addImm(0); 4793 } else { 4794 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 4795 .addImm(LaneIdx) 4796 .addUse(EltReg); 4797 } 4798 4799 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 4800 return InsElt; 4801 } 4802 4803 bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, 4804 MachineRegisterInfo &MRI) { 4805 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); 4806 4807 // Get information on the destination. 4808 Register DstReg = I.getOperand(0).getReg(); 4809 const LLT DstTy = MRI.getType(DstReg); 4810 unsigned VecSize = DstTy.getSizeInBits(); 4811 4812 // Get information on the element we want to insert into the destination. 4813 Register EltReg = I.getOperand(2).getReg(); 4814 const LLT EltTy = MRI.getType(EltReg); 4815 unsigned EltSize = EltTy.getSizeInBits(); 4816 if (EltSize < 16 || EltSize > 64) 4817 return false; // Don't support all element types yet. 4818 4819 // Find the definition of the index. Bail out if it's not defined by a 4820 // G_CONSTANT. 4821 Register IdxReg = I.getOperand(3).getReg(); 4822 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI); 4823 if (!VRegAndVal) 4824 return false; 4825 unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 4826 4827 // Perform the lane insert. 4828 Register SrcReg = I.getOperand(1).getReg(); 4829 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); 4830 4831 if (VecSize < 128) { 4832 // If the vector we're inserting into is smaller than 128 bits, widen it 4833 // to 128 to do the insert. 4834 MachineInstr *ScalarToVec = 4835 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB); 4836 if (!ScalarToVec) 4837 return false; 4838 SrcReg = ScalarToVec->getOperand(0).getReg(); 4839 } 4840 4841 // Create an insert into a new FPR128 register. 4842 // Note that if our vector is already 128 bits, we end up emitting an extra 4843 // register. 4844 MachineInstr *InsMI = 4845 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB); 4846 4847 if (VecSize < 128) { 4848 // If we had to widen to perform the insert, then we have to demote back to 4849 // the original size to get the result we want. 4850 Register DemoteVec = InsMI->getOperand(0).getReg(); 4851 const TargetRegisterClass *RC = 4852 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); 4853 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 4854 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 4855 return false; 4856 } 4857 unsigned SubReg = 0; 4858 if (!getSubRegForClass(RC, TRI, SubReg)) 4859 return false; 4860 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 4861 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize 4862 << "\n"); 4863 return false; 4864 } 4865 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 4866 .addReg(DemoteVec, 0, SubReg); 4867 RBI.constrainGenericRegister(DstReg, *RC, MRI); 4868 } else { 4869 // No widening needed. 4870 InsMI->getOperand(0).setReg(DstReg); 4871 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 4872 } 4873 4874 I.eraseFromParent(); 4875 return true; 4876 } 4877 4878 MachineInstr * 4879 AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, 4880 MachineIRBuilder &MIRBuilder, 4881 MachineRegisterInfo &MRI) { 4882 LLT DstTy = MRI.getType(Dst); 4883 unsigned DstSize = DstTy.getSizeInBits(); 4884 if (CV->isNullValue()) { 4885 if (DstSize == 128) { 4886 auto Mov = 4887 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0); 4888 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); 4889 return &*Mov; 4890 } 4891 4892 if (DstSize == 64) { 4893 auto Mov = 4894 MIRBuilder 4895 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) 4896 .addImm(0); 4897 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {}) 4898 .addReg(Mov.getReg(0), 0, AArch64::dsub); 4899 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI); 4900 return &*Copy; 4901 } 4902 } 4903 4904 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder); 4905 if (!CPLoad) { 4906 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!"); 4907 return nullptr; 4908 } 4909 4910 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0)); 4911 RBI.constrainGenericRegister( 4912 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI); 4913 return &*Copy; 4914 } 4915 4916 bool AArch64InstructionSelector::tryOptConstantBuildVec( 4917 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) { 4918 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 4919 unsigned DstSize = DstTy.getSizeInBits(); 4920 assert(DstSize <= 128 && "Unexpected build_vec type!"); 4921 if (DstSize < 32) 4922 return false; 4923 // Check if we're building a constant vector, in which case we want to 4924 // generate a constant pool load instead of a vector insert sequence. 4925 SmallVector<Constant *, 16> Csts; 4926 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { 4927 // Try to find G_CONSTANT or G_FCONSTANT 4928 auto *OpMI = 4929 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI); 4930 if (OpMI) 4931 Csts.emplace_back( 4932 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm())); 4933 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT, 4934 I.getOperand(Idx).getReg(), MRI))) 4935 Csts.emplace_back( 4936 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm())); 4937 else 4938 return false; 4939 } 4940 Constant *CV = ConstantVector::get(Csts); 4941 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI)) 4942 return false; 4943 I.eraseFromParent(); 4944 return true; 4945 } 4946 4947 bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, 4948 MachineRegisterInfo &MRI) { 4949 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 4950 // Until we port more of the optimized selections, for now just use a vector 4951 // insert sequence. 4952 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 4953 const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); 4954 unsigned EltSize = EltTy.getSizeInBits(); 4955 4956 if (tryOptConstantBuildVec(I, DstTy, MRI)) 4957 return true; 4958 if (EltSize < 16 || EltSize > 64) 4959 return false; // Don't support all element types yet. 4960 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 4961 4962 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 4963 MachineInstr *ScalarToVec = 4964 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, 4965 I.getOperand(1).getReg(), MIB); 4966 if (!ScalarToVec) 4967 return false; 4968 4969 Register DstVec = ScalarToVec->getOperand(0).getReg(); 4970 unsigned DstSize = DstTy.getSizeInBits(); 4971 4972 // Keep track of the last MI we inserted. Later on, we might be able to save 4973 // a copy using it. 4974 MachineInstr *PrevMI = nullptr; 4975 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { 4976 // Note that if we don't do a subregister copy, we can end up making an 4977 // extra register. 4978 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB, 4979 MIB); 4980 DstVec = PrevMI->getOperand(0).getReg(); 4981 } 4982 4983 // If DstTy's size in bits is less than 128, then emit a subregister copy 4984 // from DstVec to the last register we've defined. 4985 if (DstSize < 128) { 4986 // Force this to be FPR using the destination vector. 4987 const TargetRegisterClass *RC = 4988 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize); 4989 if (!RC) 4990 return false; 4991 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 4992 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 4993 return false; 4994 } 4995 4996 unsigned SubReg = 0; 4997 if (!getSubRegForClass(RC, TRI, SubReg)) 4998 return false; 4999 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 5000 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize 5001 << "\n"); 5002 return false; 5003 } 5004 5005 Register Reg = MRI.createVirtualRegister(RC); 5006 Register DstReg = I.getOperand(0).getReg(); 5007 5008 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg); 5009 MachineOperand &RegOp = I.getOperand(1); 5010 RegOp.setReg(Reg); 5011 RBI.constrainGenericRegister(DstReg, *RC, MRI); 5012 } else { 5013 // We don't need a subregister copy. Save a copy by re-using the 5014 // destination register on the final insert. 5015 assert(PrevMI && "PrevMI was null?"); 5016 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); 5017 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); 5018 } 5019 5020 I.eraseFromParent(); 5021 return true; 5022 } 5023 5024 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the 5025 /// ID if it exists, and 0 otherwise. 5026 static unsigned findIntrinsicID(MachineInstr &I) { 5027 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) { 5028 return Op.isIntrinsicID(); 5029 }); 5030 if (IntrinOp == I.operands_end()) 5031 return 0; 5032 return IntrinOp->getIntrinsicID(); 5033 } 5034 5035 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( 5036 MachineInstr &I, MachineRegisterInfo &MRI) { 5037 // Find the intrinsic ID. 5038 unsigned IntrinID = findIntrinsicID(I); 5039 if (!IntrinID) 5040 return false; 5041 5042 // Select the instruction. 5043 switch (IntrinID) { 5044 default: 5045 return false; 5046 case Intrinsic::aarch64_ldxp: 5047 case Intrinsic::aarch64_ldaxp: { 5048 auto NewI = MIB.buildInstr( 5049 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, 5050 {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, 5051 {I.getOperand(3)}); 5052 NewI.cloneMemRefs(I); 5053 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 5054 break; 5055 } 5056 case Intrinsic::trap: 5057 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1); 5058 break; 5059 case Intrinsic::debugtrap: 5060 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); 5061 break; 5062 case Intrinsic::ubsantrap: 5063 MIB.buildInstr(AArch64::BRK, {}, {}) 5064 .addImm(I.getOperand(1).getImm() | ('U' << 8)); 5065 break; 5066 case Intrinsic::aarch64_neon_st2: { 5067 Register Src1 = I.getOperand(1).getReg(); 5068 Register Src2 = I.getOperand(2).getReg(); 5069 Register Ptr = I.getOperand(3).getReg(); 5070 LLT Ty = MRI.getType(Src1); 5071 const LLT S8 = LLT::scalar(8); 5072 const LLT S16 = LLT::scalar(16); 5073 const LLT S32 = LLT::scalar(32); 5074 const LLT S64 = LLT::scalar(64); 5075 const LLT P0 = LLT::pointer(0, 64); 5076 unsigned Opc; 5077 if (Ty == LLT::fixed_vector(8, S8)) 5078 Opc = AArch64::ST2Twov8b; 5079 else if (Ty == LLT::fixed_vector(16, S8)) 5080 Opc = AArch64::ST2Twov16b; 5081 else if (Ty == LLT::fixed_vector(4, S16)) 5082 Opc = AArch64::ST2Twov4h; 5083 else if (Ty == LLT::fixed_vector(8, S16)) 5084 Opc = AArch64::ST2Twov8h; 5085 else if (Ty == LLT::fixed_vector(2, S32)) 5086 Opc = AArch64::ST2Twov2s; 5087 else if (Ty == LLT::fixed_vector(4, S32)) 5088 Opc = AArch64::ST2Twov4s; 5089 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) 5090 Opc = AArch64::ST2Twov2d; 5091 else if (Ty == S64 || Ty == P0) 5092 Opc = AArch64::ST1Twov1d; 5093 else 5094 llvm_unreachable("Unexpected type for st2!"); 5095 SmallVector<Register, 2> Regs = {Src1, Src2}; 5096 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) 5097 : createDTuple(Regs, MIB); 5098 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr}); 5099 Store.cloneMemRefs(I); 5100 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); 5101 break; 5102 } 5103 } 5104 5105 I.eraseFromParent(); 5106 return true; 5107 } 5108 5109 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, 5110 MachineRegisterInfo &MRI) { 5111 unsigned IntrinID = findIntrinsicID(I); 5112 if (!IntrinID) 5113 return false; 5114 5115 switch (IntrinID) { 5116 default: 5117 break; 5118 case Intrinsic::aarch64_crypto_sha1h: { 5119 Register DstReg = I.getOperand(0).getReg(); 5120 Register SrcReg = I.getOperand(2).getReg(); 5121 5122 // FIXME: Should this be an assert? 5123 if (MRI.getType(DstReg).getSizeInBits() != 32 || 5124 MRI.getType(SrcReg).getSizeInBits() != 32) 5125 return false; 5126 5127 // The operation has to happen on FPRs. Set up some new FPR registers for 5128 // the source and destination if they are on GPRs. 5129 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 5130 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 5131 MIB.buildCopy({SrcReg}, {I.getOperand(2)}); 5132 5133 // Make sure the copy ends up getting constrained properly. 5134 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 5135 AArch64::GPR32RegClass, MRI); 5136 } 5137 5138 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) 5139 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 5140 5141 // Actually insert the instruction. 5142 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); 5143 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); 5144 5145 // Did we create a new register for the destination? 5146 if (DstReg != I.getOperand(0).getReg()) { 5147 // Yep. Copy the result of the instruction back into the original 5148 // destination. 5149 MIB.buildCopy({I.getOperand(0)}, {DstReg}); 5150 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 5151 AArch64::GPR32RegClass, MRI); 5152 } 5153 5154 I.eraseFromParent(); 5155 return true; 5156 } 5157 case Intrinsic::frameaddress: 5158 case Intrinsic::returnaddress: { 5159 MachineFunction &MF = *I.getParent()->getParent(); 5160 MachineFrameInfo &MFI = MF.getFrameInfo(); 5161 5162 unsigned Depth = I.getOperand(2).getImm(); 5163 Register DstReg = I.getOperand(0).getReg(); 5164 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); 5165 5166 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { 5167 if (!MFReturnAddr) { 5168 // Insert the copy from LR/X30 into the entry block, before it can be 5169 // clobbered by anything. 5170 MFI.setReturnAddressIsTaken(true); 5171 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR, 5172 AArch64::GPR64RegClass); 5173 } 5174 5175 if (STI.hasPAuth()) { 5176 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); 5177 } else { 5178 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); 5179 MIB.buildInstr(AArch64::XPACLRI); 5180 MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); 5181 } 5182 5183 I.eraseFromParent(); 5184 return true; 5185 } 5186 5187 MFI.setFrameAddressIsTaken(true); 5188 Register FrameAddr(AArch64::FP); 5189 while (Depth--) { 5190 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 5191 auto Ldr = 5192 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0); 5193 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); 5194 FrameAddr = NextFrame; 5195 } 5196 5197 if (IntrinID == Intrinsic::frameaddress) 5198 MIB.buildCopy({DstReg}, {FrameAddr}); 5199 else { 5200 MFI.setReturnAddressIsTaken(true); 5201 5202 if (STI.hasPAuth()) { 5203 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 5204 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); 5205 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); 5206 } else { 5207 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}) 5208 .addImm(1); 5209 MIB.buildInstr(AArch64::XPACLRI); 5210 MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); 5211 } 5212 } 5213 5214 I.eraseFromParent(); 5215 return true; 5216 } 5217 case Intrinsic::swift_async_context_addr: 5218 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()}, 5219 {Register(AArch64::FP)}) 5220 .addImm(8) 5221 .addImm(0); 5222 constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI); 5223 5224 MF->getFrameInfo().setFrameAddressIsTaken(true); 5225 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 5226 I.eraseFromParent(); 5227 return true; 5228 } 5229 return false; 5230 } 5231 5232 InstructionSelector::ComplexRendererFns 5233 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { 5234 auto MaybeImmed = getImmedFromMO(Root); 5235 if (MaybeImmed == None || *MaybeImmed > 31) 5236 return None; 5237 uint64_t Enc = (32 - *MaybeImmed) & 0x1f; 5238 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5239 } 5240 5241 InstructionSelector::ComplexRendererFns 5242 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { 5243 auto MaybeImmed = getImmedFromMO(Root); 5244 if (MaybeImmed == None || *MaybeImmed > 31) 5245 return None; 5246 uint64_t Enc = 31 - *MaybeImmed; 5247 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5248 } 5249 5250 InstructionSelector::ComplexRendererFns 5251 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { 5252 auto MaybeImmed = getImmedFromMO(Root); 5253 if (MaybeImmed == None || *MaybeImmed > 63) 5254 return None; 5255 uint64_t Enc = (64 - *MaybeImmed) & 0x3f; 5256 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5257 } 5258 5259 InstructionSelector::ComplexRendererFns 5260 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { 5261 auto MaybeImmed = getImmedFromMO(Root); 5262 if (MaybeImmed == None || *MaybeImmed > 63) 5263 return None; 5264 uint64_t Enc = 63 - *MaybeImmed; 5265 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5266 } 5267 5268 /// Helper to select an immediate value that can be represented as a 12-bit 5269 /// value shifted left by either 0 or 12. If it is possible to do so, return 5270 /// the immediate and shift value. If not, return None. 5271 /// 5272 /// Used by selectArithImmed and selectNegArithImmed. 5273 InstructionSelector::ComplexRendererFns 5274 AArch64InstructionSelector::select12BitValueWithLeftShift( 5275 uint64_t Immed) const { 5276 unsigned ShiftAmt; 5277 if (Immed >> 12 == 0) { 5278 ShiftAmt = 0; 5279 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 5280 ShiftAmt = 12; 5281 Immed = Immed >> 12; 5282 } else 5283 return None; 5284 5285 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 5286 return {{ 5287 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, 5288 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, 5289 }}; 5290 } 5291 5292 /// SelectArithImmed - Select an immediate value that can be represented as 5293 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 5294 /// Val set to the 12-bit value and Shift set to the shifter operand. 5295 InstructionSelector::ComplexRendererFns 5296 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { 5297 // This function is called from the addsub_shifted_imm ComplexPattern, 5298 // which lists [imm] as the list of opcode it's interested in, however 5299 // we still need to check whether the operand is actually an immediate 5300 // here because the ComplexPattern opcode list is only used in 5301 // root-level opcode matching. 5302 auto MaybeImmed = getImmedFromMO(Root); 5303 if (MaybeImmed == None) 5304 return None; 5305 return select12BitValueWithLeftShift(*MaybeImmed); 5306 } 5307 5308 /// SelectNegArithImmed - As above, but negates the value before trying to 5309 /// select it. 5310 InstructionSelector::ComplexRendererFns 5311 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { 5312 // We need a register here, because we need to know if we have a 64 or 32 5313 // bit immediate. 5314 if (!Root.isReg()) 5315 return None; 5316 auto MaybeImmed = getImmedFromMO(Root); 5317 if (MaybeImmed == None) 5318 return None; 5319 uint64_t Immed = *MaybeImmed; 5320 5321 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 5322 // have the opposite effect on the C flag, so this pattern mustn't match under 5323 // those circumstances. 5324 if (Immed == 0) 5325 return None; 5326 5327 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on 5328 // the root. 5329 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5330 if (MRI.getType(Root.getReg()).getSizeInBits() == 32) 5331 Immed = ~((uint32_t)Immed) + 1; 5332 else 5333 Immed = ~Immed + 1ULL; 5334 5335 if (Immed & 0xFFFFFFFFFF000000ULL) 5336 return None; 5337 5338 Immed &= 0xFFFFFFULL; 5339 return select12BitValueWithLeftShift(Immed); 5340 } 5341 5342 /// Return true if it is worth folding MI into an extended register. That is, 5343 /// if it's safe to pull it into the addressing mode of a load or store as a 5344 /// shift. 5345 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( 5346 MachineInstr &MI, const MachineRegisterInfo &MRI) const { 5347 // Always fold if there is one use, or if we're optimizing for size. 5348 Register DefReg = MI.getOperand(0).getReg(); 5349 if (MRI.hasOneNonDBGUse(DefReg) || 5350 MI.getParent()->getParent()->getFunction().hasOptSize()) 5351 return true; 5352 5353 // It's better to avoid folding and recomputing shifts when we don't have a 5354 // fastpath. 5355 if (!STI.hasLSLFast()) 5356 return false; 5357 5358 // We have a fastpath, so folding a shift in and potentially computing it 5359 // many times may be beneficial. Check if this is only used in memory ops. 5360 // If it is, then we should fold. 5361 return all_of(MRI.use_nodbg_instructions(DefReg), 5362 [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); 5363 } 5364 5365 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { 5366 switch (Type) { 5367 case AArch64_AM::SXTB: 5368 case AArch64_AM::SXTH: 5369 case AArch64_AM::SXTW: 5370 return true; 5371 default: 5372 return false; 5373 } 5374 } 5375 5376 InstructionSelector::ComplexRendererFns 5377 AArch64InstructionSelector::selectExtendedSHL( 5378 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, 5379 unsigned SizeInBytes, bool WantsExt) const { 5380 assert(Base.isReg() && "Expected base to be a register operand"); 5381 assert(Offset.isReg() && "Expected offset to be a register operand"); 5382 5383 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5384 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); 5385 if (!OffsetInst) 5386 return None; 5387 5388 unsigned OffsetOpc = OffsetInst->getOpcode(); 5389 bool LookedThroughZExt = false; 5390 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { 5391 // Try to look through a ZEXT. 5392 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) 5393 return None; 5394 5395 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg()); 5396 OffsetOpc = OffsetInst->getOpcode(); 5397 LookedThroughZExt = true; 5398 5399 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) 5400 return None; 5401 } 5402 // Make sure that the memory op is a valid size. 5403 int64_t LegalShiftVal = Log2_32(SizeInBytes); 5404 if (LegalShiftVal == 0) 5405 return None; 5406 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 5407 return None; 5408 5409 // Now, try to find the specific G_CONSTANT. Start by assuming that the 5410 // register we will offset is the LHS, and the register containing the 5411 // constant is the RHS. 5412 Register OffsetReg = OffsetInst->getOperand(1).getReg(); 5413 Register ConstantReg = OffsetInst->getOperand(2).getReg(); 5414 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 5415 if (!ValAndVReg) { 5416 // We didn't get a constant on the RHS. If the opcode is a shift, then 5417 // we're done. 5418 if (OffsetOpc == TargetOpcode::G_SHL) 5419 return None; 5420 5421 // If we have a G_MUL, we can use either register. Try looking at the RHS. 5422 std::swap(OffsetReg, ConstantReg); 5423 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 5424 if (!ValAndVReg) 5425 return None; 5426 } 5427 5428 // The value must fit into 3 bits, and must be positive. Make sure that is 5429 // true. 5430 int64_t ImmVal = ValAndVReg->Value.getSExtValue(); 5431 5432 // Since we're going to pull this into a shift, the constant value must be 5433 // a power of 2. If we got a multiply, then we need to check this. 5434 if (OffsetOpc == TargetOpcode::G_MUL) { 5435 if (!isPowerOf2_32(ImmVal)) 5436 return None; 5437 5438 // Got a power of 2. So, the amount we'll shift is the log base-2 of that. 5439 ImmVal = Log2_32(ImmVal); 5440 } 5441 5442 if ((ImmVal & 0x7) != ImmVal) 5443 return None; 5444 5445 // We are only allowed to shift by LegalShiftVal. This shift value is built 5446 // into the instruction, so we can't just use whatever we want. 5447 if (ImmVal != LegalShiftVal) 5448 return None; 5449 5450 unsigned SignExtend = 0; 5451 if (WantsExt) { 5452 // Check if the offset is defined by an extend, unless we looked through a 5453 // G_ZEXT earlier. 5454 if (!LookedThroughZExt) { 5455 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); 5456 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); 5457 if (Ext == AArch64_AM::InvalidShiftExtend) 5458 return None; 5459 5460 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; 5461 // We only support SXTW for signed extension here. 5462 if (SignExtend && Ext != AArch64_AM::SXTW) 5463 return None; 5464 OffsetReg = ExtInst->getOperand(1).getReg(); 5465 } 5466 5467 // Need a 32-bit wide register here. 5468 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); 5469 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); 5470 } 5471 5472 // We can use the LHS of the GEP as the base, and the LHS of the shift as an 5473 // offset. Signify that we are shifting by setting the shift flag to 1. 5474 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, 5475 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, 5476 [=](MachineInstrBuilder &MIB) { 5477 // Need to add both immediates here to make sure that they are both 5478 // added to the instruction. 5479 MIB.addImm(SignExtend); 5480 MIB.addImm(1); 5481 }}}; 5482 } 5483 5484 /// This is used for computing addresses like this: 5485 /// 5486 /// ldr x1, [x2, x3, lsl #3] 5487 /// 5488 /// Where x2 is the base register, and x3 is an offset register. The shift-left 5489 /// is a constant value specific to this load instruction. That is, we'll never 5490 /// see anything other than a 3 here (which corresponds to the size of the 5491 /// element being loaded.) 5492 InstructionSelector::ComplexRendererFns 5493 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( 5494 MachineOperand &Root, unsigned SizeInBytes) const { 5495 if (!Root.isReg()) 5496 return None; 5497 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5498 5499 // We want to find something like this: 5500 // 5501 // val = G_CONSTANT LegalShiftVal 5502 // shift = G_SHL off_reg val 5503 // ptr = G_PTR_ADD base_reg shift 5504 // x = G_LOAD ptr 5505 // 5506 // And fold it into this addressing mode: 5507 // 5508 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] 5509 5510 // Check if we can find the G_PTR_ADD. 5511 MachineInstr *PtrAdd = 5512 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5513 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 5514 return None; 5515 5516 // Now, try to match an opcode which will match our specific offset. 5517 // We want a G_SHL or a G_MUL. 5518 MachineInstr *OffsetInst = 5519 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); 5520 return selectExtendedSHL(Root, PtrAdd->getOperand(1), 5521 OffsetInst->getOperand(0), SizeInBytes, 5522 /*WantsExt=*/false); 5523 } 5524 5525 /// This is used for computing addresses like this: 5526 /// 5527 /// ldr x1, [x2, x3] 5528 /// 5529 /// Where x2 is the base register, and x3 is an offset register. 5530 /// 5531 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, 5532 /// this will do so. Otherwise, it will return None. 5533 InstructionSelector::ComplexRendererFns 5534 AArch64InstructionSelector::selectAddrModeRegisterOffset( 5535 MachineOperand &Root) const { 5536 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5537 5538 // We need a GEP. 5539 MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); 5540 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD) 5541 return None; 5542 5543 // If this is used more than once, let's not bother folding. 5544 // TODO: Check if they are memory ops. If they are, then we can still fold 5545 // without having to recompute anything. 5546 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg())) 5547 return None; 5548 5549 // Base is the GEP's LHS, offset is its RHS. 5550 return {{[=](MachineInstrBuilder &MIB) { 5551 MIB.addUse(Gep->getOperand(1).getReg()); 5552 }, 5553 [=](MachineInstrBuilder &MIB) { 5554 MIB.addUse(Gep->getOperand(2).getReg()); 5555 }, 5556 [=](MachineInstrBuilder &MIB) { 5557 // Need to add both immediates here to make sure that they are both 5558 // added to the instruction. 5559 MIB.addImm(0); 5560 MIB.addImm(0); 5561 }}}; 5562 } 5563 5564 /// This is intended to be equivalent to selectAddrModeXRO in 5565 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. 5566 InstructionSelector::ComplexRendererFns 5567 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, 5568 unsigned SizeInBytes) const { 5569 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5570 if (!Root.isReg()) 5571 return None; 5572 MachineInstr *PtrAdd = 5573 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5574 if (!PtrAdd) 5575 return None; 5576 5577 // Check for an immediates which cannot be encoded in the [base + imm] 5578 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll 5579 // end up with code like: 5580 // 5581 // mov x0, wide 5582 // add x1 base, x0 5583 // ldr x2, [x1, x0] 5584 // 5585 // In this situation, we can use the [base, xreg] addressing mode to save an 5586 // add/sub: 5587 // 5588 // mov x0, wide 5589 // ldr x2, [base, x0] 5590 auto ValAndVReg = 5591 getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); 5592 if (ValAndVReg) { 5593 unsigned Scale = Log2_32(SizeInBytes); 5594 int64_t ImmOff = ValAndVReg->Value.getSExtValue(); 5595 5596 // Skip immediates that can be selected in the load/store addresing 5597 // mode. 5598 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && 5599 ImmOff < (0x1000 << Scale)) 5600 return None; 5601 5602 // Helper lambda to decide whether or not it is preferable to emit an add. 5603 auto isPreferredADD = [](int64_t ImmOff) { 5604 // Constants in [0x0, 0xfff] can be encoded in an add. 5605 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 5606 return true; 5607 5608 // Can it be encoded in an add lsl #12? 5609 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) 5610 return false; 5611 5612 // It can be encoded in an add lsl #12, but we may not want to. If it is 5613 // possible to select this as a single movz, then prefer that. A single 5614 // movz is faster than an add with a shift. 5615 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 5616 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 5617 }; 5618 5619 // If the immediate can be encoded in a single add/sub, then bail out. 5620 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 5621 return None; 5622 } 5623 5624 // Try to fold shifts into the addressing mode. 5625 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); 5626 if (AddrModeFns) 5627 return AddrModeFns; 5628 5629 // If that doesn't work, see if it's possible to fold in registers from 5630 // a GEP. 5631 return selectAddrModeRegisterOffset(Root); 5632 } 5633 5634 /// This is used for computing addresses like this: 5635 /// 5636 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] 5637 /// 5638 /// Where we have a 64-bit base register, a 32-bit offset register, and an 5639 /// extend (which may or may not be signed). 5640 InstructionSelector::ComplexRendererFns 5641 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, 5642 unsigned SizeInBytes) const { 5643 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5644 5645 MachineInstr *PtrAdd = 5646 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5647 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 5648 return None; 5649 5650 MachineOperand &LHS = PtrAdd->getOperand(1); 5651 MachineOperand &RHS = PtrAdd->getOperand(2); 5652 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); 5653 5654 // The first case is the same as selectAddrModeXRO, except we need an extend. 5655 // In this case, we try to find a shift and extend, and fold them into the 5656 // addressing mode. 5657 // 5658 // E.g. 5659 // 5660 // off_reg = G_Z/S/ANYEXT ext_reg 5661 // val = G_CONSTANT LegalShiftVal 5662 // shift = G_SHL off_reg val 5663 // ptr = G_PTR_ADD base_reg shift 5664 // x = G_LOAD ptr 5665 // 5666 // In this case we can get a load like this: 5667 // 5668 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] 5669 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), 5670 SizeInBytes, /*WantsExt=*/true); 5671 if (ExtendedShl) 5672 return ExtendedShl; 5673 5674 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. 5675 // 5676 // e.g. 5677 // ldr something, [base_reg, ext_reg, sxtw] 5678 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 5679 return None; 5680 5681 // Check if this is an extend. We'll get an extend type if it is. 5682 AArch64_AM::ShiftExtendType Ext = 5683 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); 5684 if (Ext == AArch64_AM::InvalidShiftExtend) 5685 return None; 5686 5687 // Need a 32-bit wide register. 5688 MachineIRBuilder MIB(*PtrAdd); 5689 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), 5690 AArch64::GPR32RegClass, MIB); 5691 unsigned SignExtend = Ext == AArch64_AM::SXTW; 5692 5693 // Base is LHS, offset is ExtReg. 5694 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, 5695 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 5696 [=](MachineInstrBuilder &MIB) { 5697 MIB.addImm(SignExtend); 5698 MIB.addImm(0); 5699 }}}; 5700 } 5701 5702 /// Select a "register plus unscaled signed 9-bit immediate" address. This 5703 /// should only match when there is an offset that is not valid for a scaled 5704 /// immediate addressing mode. The "Size" argument is the size in bytes of the 5705 /// memory reference, which is needed here to know what is valid for a scaled 5706 /// immediate. 5707 InstructionSelector::ComplexRendererFns 5708 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, 5709 unsigned Size) const { 5710 MachineRegisterInfo &MRI = 5711 Root.getParent()->getParent()->getParent()->getRegInfo(); 5712 5713 if (!Root.isReg()) 5714 return None; 5715 5716 if (!isBaseWithConstantOffset(Root, MRI)) 5717 return None; 5718 5719 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 5720 if (!RootDef) 5721 return None; 5722 5723 MachineOperand &OffImm = RootDef->getOperand(2); 5724 if (!OffImm.isReg()) 5725 return None; 5726 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); 5727 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) 5728 return None; 5729 int64_t RHSC; 5730 MachineOperand &RHSOp1 = RHS->getOperand(1); 5731 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) 5732 return None; 5733 RHSC = RHSOp1.getCImm()->getSExtValue(); 5734 5735 // If the offset is valid as a scaled immediate, don't match here. 5736 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) 5737 return None; 5738 if (RHSC >= -256 && RHSC < 256) { 5739 MachineOperand &Base = RootDef->getOperand(1); 5740 return {{ 5741 [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, 5742 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, 5743 }}; 5744 } 5745 return None; 5746 } 5747 5748 InstructionSelector::ComplexRendererFns 5749 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, 5750 unsigned Size, 5751 MachineRegisterInfo &MRI) const { 5752 if (RootDef.getOpcode() != AArch64::G_ADD_LOW) 5753 return None; 5754 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); 5755 if (Adrp.getOpcode() != AArch64::ADRP) 5756 return None; 5757 5758 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. 5759 auto Offset = Adrp.getOperand(1).getOffset(); 5760 if (Offset % Size != 0) 5761 return None; 5762 5763 auto GV = Adrp.getOperand(1).getGlobal(); 5764 if (GV->isThreadLocal()) 5765 return None; 5766 5767 auto &MF = *RootDef.getParent()->getParent(); 5768 if (GV->getPointerAlignment(MF.getDataLayout()) < Size) 5769 return None; 5770 5771 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); 5772 MachineIRBuilder MIRBuilder(RootDef); 5773 Register AdrpReg = Adrp.getOperand(0).getReg(); 5774 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, 5775 [=](MachineInstrBuilder &MIB) { 5776 MIB.addGlobalAddress(GV, Offset, 5777 OpFlags | AArch64II::MO_PAGEOFF | 5778 AArch64II::MO_NC); 5779 }}}; 5780 } 5781 5782 /// Select a "register plus scaled unsigned 12-bit immediate" address. The 5783 /// "Size" argument is the size in bytes of the memory reference, which 5784 /// determines the scale. 5785 InstructionSelector::ComplexRendererFns 5786 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, 5787 unsigned Size) const { 5788 MachineFunction &MF = *Root.getParent()->getParent()->getParent(); 5789 MachineRegisterInfo &MRI = MF.getRegInfo(); 5790 5791 if (!Root.isReg()) 5792 return None; 5793 5794 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 5795 if (!RootDef) 5796 return None; 5797 5798 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 5799 return {{ 5800 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, 5801 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 5802 }}; 5803 } 5804 5805 CodeModel::Model CM = MF.getTarget().getCodeModel(); 5806 // Check if we can fold in the ADD of small code model ADRP + ADD address. 5807 if (CM == CodeModel::Small) { 5808 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI); 5809 if (OpFns) 5810 return OpFns; 5811 } 5812 5813 if (isBaseWithConstantOffset(Root, MRI)) { 5814 MachineOperand &LHS = RootDef->getOperand(1); 5815 MachineOperand &RHS = RootDef->getOperand(2); 5816 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 5817 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 5818 if (LHSDef && RHSDef) { 5819 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); 5820 unsigned Scale = Log2_32(Size); 5821 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 5822 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 5823 return {{ 5824 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, 5825 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 5826 }}; 5827 5828 return {{ 5829 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 5830 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 5831 }}; 5832 } 5833 } 5834 } 5835 5836 // Before falling back to our general case, check if the unscaled 5837 // instructions can handle this. If so, that's preferable. 5838 if (selectAddrModeUnscaled(Root, Size).hasValue()) 5839 return None; 5840 5841 return {{ 5842 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 5843 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 5844 }}; 5845 } 5846 5847 /// Given a shift instruction, return the correct shift type for that 5848 /// instruction. 5849 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { 5850 // TODO: Handle AArch64_AM::ROR 5851 switch (MI.getOpcode()) { 5852 default: 5853 return AArch64_AM::InvalidShiftExtend; 5854 case TargetOpcode::G_SHL: 5855 return AArch64_AM::LSL; 5856 case TargetOpcode::G_LSHR: 5857 return AArch64_AM::LSR; 5858 case TargetOpcode::G_ASHR: 5859 return AArch64_AM::ASR; 5860 } 5861 } 5862 5863 /// Select a "shifted register" operand. If the value is not shifted, set the 5864 /// shift operand to a default value of "lsl 0". 5865 /// 5866 /// TODO: Allow shifted register to be rotated in logical instructions. 5867 InstructionSelector::ComplexRendererFns 5868 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const { 5869 if (!Root.isReg()) 5870 return None; 5871 MachineRegisterInfo &MRI = 5872 Root.getParent()->getParent()->getParent()->getRegInfo(); 5873 5874 // Check if the operand is defined by an instruction which corresponds to 5875 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. 5876 // 5877 // TODO: Handle AArch64_AM::ROR for logical instructions. 5878 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); 5879 if (!ShiftInst) 5880 return None; 5881 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); 5882 if (ShType == AArch64_AM::InvalidShiftExtend) 5883 return None; 5884 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) 5885 return None; 5886 5887 // Need an immediate on the RHS. 5888 MachineOperand &ShiftRHS = ShiftInst->getOperand(2); 5889 auto Immed = getImmedFromMO(ShiftRHS); 5890 if (!Immed) 5891 return None; 5892 5893 // We have something that we can fold. Fold in the shift's LHS and RHS into 5894 // the instruction. 5895 MachineOperand &ShiftLHS = ShiftInst->getOperand(1); 5896 Register ShiftReg = ShiftLHS.getReg(); 5897 5898 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); 5899 unsigned Val = *Immed & (NumBits - 1); 5900 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); 5901 5902 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, 5903 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; 5904 } 5905 5906 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( 5907 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { 5908 unsigned Opc = MI.getOpcode(); 5909 5910 // Handle explicit extend instructions first. 5911 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { 5912 unsigned Size; 5913 if (Opc == TargetOpcode::G_SEXT) 5914 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 5915 else 5916 Size = MI.getOperand(2).getImm(); 5917 assert(Size != 64 && "Extend from 64 bits?"); 5918 switch (Size) { 5919 case 8: 5920 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB; 5921 case 16: 5922 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH; 5923 case 32: 5924 return AArch64_AM::SXTW; 5925 default: 5926 return AArch64_AM::InvalidShiftExtend; 5927 } 5928 } 5929 5930 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { 5931 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 5932 assert(Size != 64 && "Extend from 64 bits?"); 5933 switch (Size) { 5934 case 8: 5935 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB; 5936 case 16: 5937 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH; 5938 case 32: 5939 return AArch64_AM::UXTW; 5940 default: 5941 return AArch64_AM::InvalidShiftExtend; 5942 } 5943 } 5944 5945 // Don't have an explicit extend. Try to handle a G_AND with a constant mask 5946 // on the RHS. 5947 if (Opc != TargetOpcode::G_AND) 5948 return AArch64_AM::InvalidShiftExtend; 5949 5950 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); 5951 if (!MaybeAndMask) 5952 return AArch64_AM::InvalidShiftExtend; 5953 uint64_t AndMask = *MaybeAndMask; 5954 switch (AndMask) { 5955 default: 5956 return AArch64_AM::InvalidShiftExtend; 5957 case 0xFF: 5958 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 5959 case 0xFFFF: 5960 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 5961 case 0xFFFFFFFF: 5962 return AArch64_AM::UXTW; 5963 } 5964 } 5965 5966 Register AArch64InstructionSelector::moveScalarRegClass( 5967 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { 5968 MachineRegisterInfo &MRI = *MIB.getMRI(); 5969 auto Ty = MRI.getType(Reg); 5970 assert(!Ty.isVector() && "Expected scalars only!"); 5971 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) 5972 return Reg; 5973 5974 // Create a copy and immediately select it. 5975 // FIXME: We should have an emitCopy function? 5976 auto Copy = MIB.buildCopy({&RC}, {Reg}); 5977 selectCopy(*Copy, TII, MRI, TRI, RBI); 5978 return Copy.getReg(0); 5979 } 5980 5981 /// Select an "extended register" operand. This operand folds in an extend 5982 /// followed by an optional left shift. 5983 InstructionSelector::ComplexRendererFns 5984 AArch64InstructionSelector::selectArithExtendedRegister( 5985 MachineOperand &Root) const { 5986 if (!Root.isReg()) 5987 return None; 5988 MachineRegisterInfo &MRI = 5989 Root.getParent()->getParent()->getParent()->getRegInfo(); 5990 5991 uint64_t ShiftVal = 0; 5992 Register ExtReg; 5993 AArch64_AM::ShiftExtendType Ext; 5994 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); 5995 if (!RootDef) 5996 return None; 5997 5998 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) 5999 return None; 6000 6001 // Check if we can fold a shift and an extend. 6002 if (RootDef->getOpcode() == TargetOpcode::G_SHL) { 6003 // Look for a constant on the RHS of the shift. 6004 MachineOperand &RHS = RootDef->getOperand(2); 6005 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); 6006 if (!MaybeShiftVal) 6007 return None; 6008 ShiftVal = *MaybeShiftVal; 6009 if (ShiftVal > 4) 6010 return None; 6011 // Look for a valid extend instruction on the LHS of the shift. 6012 MachineOperand &LHS = RootDef->getOperand(1); 6013 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); 6014 if (!ExtDef) 6015 return None; 6016 Ext = getExtendTypeForInst(*ExtDef, MRI); 6017 if (Ext == AArch64_AM::InvalidShiftExtend) 6018 return None; 6019 ExtReg = ExtDef->getOperand(1).getReg(); 6020 } else { 6021 // Didn't get a shift. Try just folding an extend. 6022 Ext = getExtendTypeForInst(*RootDef, MRI); 6023 if (Ext == AArch64_AM::InvalidShiftExtend) 6024 return None; 6025 ExtReg = RootDef->getOperand(1).getReg(); 6026 6027 // If we have a 32 bit instruction which zeroes out the high half of a 6028 // register, we get an implicit zero extend for free. Check if we have one. 6029 // FIXME: We actually emit the extend right now even though we don't have 6030 // to. 6031 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { 6032 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); 6033 if (ExtInst && isDef32(*ExtInst)) 6034 return None; 6035 } 6036 } 6037 6038 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister 6039 // copy. 6040 MachineIRBuilder MIB(*RootDef); 6041 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); 6042 6043 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 6044 [=](MachineInstrBuilder &MIB) { 6045 MIB.addImm(getArithExtendImm(Ext, ShiftVal)); 6046 }}}; 6047 } 6048 6049 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, 6050 const MachineInstr &MI, 6051 int OpIdx) const { 6052 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 6053 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 6054 "Expected G_CONSTANT"); 6055 Optional<int64_t> CstVal = 6056 getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); 6057 assert(CstVal && "Expected constant value"); 6058 MIB.addImm(CstVal.getValue()); 6059 } 6060 6061 void AArch64InstructionSelector::renderLogicalImm32( 6062 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 6063 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 6064 "Expected G_CONSTANT"); 6065 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 6066 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); 6067 MIB.addImm(Enc); 6068 } 6069 6070 void AArch64InstructionSelector::renderLogicalImm64( 6071 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 6072 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 6073 "Expected G_CONSTANT"); 6074 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 6075 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); 6076 MIB.addImm(Enc); 6077 } 6078 6079 void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB, 6080 const MachineInstr &MI, 6081 int OpIdx) const { 6082 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && 6083 "Expected G_FCONSTANT"); 6084 MIB.addImm( 6085 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF())); 6086 } 6087 6088 void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB, 6089 const MachineInstr &MI, 6090 int OpIdx) const { 6091 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && 6092 "Expected G_FCONSTANT"); 6093 MIB.addImm( 6094 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF())); 6095 } 6096 6097 void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, 6098 const MachineInstr &MI, 6099 int OpIdx) const { 6100 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && 6101 "Expected G_FCONSTANT"); 6102 MIB.addImm( 6103 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF())); 6104 } 6105 6106 bool AArch64InstructionSelector::isLoadStoreOfNumBytes( 6107 const MachineInstr &MI, unsigned NumBytes) const { 6108 if (!MI.mayLoadOrStore()) 6109 return false; 6110 assert(MI.hasOneMemOperand() && 6111 "Expected load/store to have only one mem op!"); 6112 return (*MI.memoperands_begin())->getSize() == NumBytes; 6113 } 6114 6115 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { 6116 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 6117 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) 6118 return false; 6119 6120 // Only return true if we know the operation will zero-out the high half of 6121 // the 64-bit register. Truncates can be subregister copies, which don't 6122 // zero out the high bits. Copies and other copy-like instructions can be 6123 // fed by truncates, or could be lowered as subregister copies. 6124 switch (MI.getOpcode()) { 6125 default: 6126 return true; 6127 case TargetOpcode::COPY: 6128 case TargetOpcode::G_BITCAST: 6129 case TargetOpcode::G_TRUNC: 6130 case TargetOpcode::G_PHI: 6131 return false; 6132 } 6133 } 6134 6135 6136 // Perform fixups on the given PHI instruction's operands to force them all 6137 // to be the same as the destination regbank. 6138 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, 6139 const AArch64RegisterBankInfo &RBI) { 6140 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI"); 6141 Register DstReg = MI.getOperand(0).getReg(); 6142 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg); 6143 assert(DstRB && "Expected PHI dst to have regbank assigned"); 6144 MachineIRBuilder MIB(MI); 6145 6146 // Go through each operand and ensure it has the same regbank. 6147 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { 6148 MachineOperand &MO = MI.getOperand(OpIdx); 6149 if (!MO.isReg()) 6150 continue; 6151 Register OpReg = MO.getReg(); 6152 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg); 6153 if (RB != DstRB) { 6154 // Insert a cross-bank copy. 6155 auto *OpDef = MRI.getVRegDef(OpReg); 6156 const LLT &Ty = MRI.getType(OpReg); 6157 MachineBasicBlock &OpDefBB = *OpDef->getParent(); 6158 6159 // Any instruction we insert must appear after all PHIs in the block 6160 // for the block to be valid MIR. 6161 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator()); 6162 if (InsertPt != OpDefBB.end() && InsertPt->isPHI()) 6163 InsertPt = OpDefBB.getFirstNonPHI(); 6164 MIB.setInsertPt(*OpDef->getParent(), InsertPt); 6165 auto Copy = MIB.buildCopy(Ty, OpReg); 6166 MRI.setRegBank(Copy.getReg(0), *DstRB); 6167 MO.setReg(Copy.getReg(0)); 6168 } 6169 } 6170 } 6171 6172 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { 6173 // We're looking for PHIs, build a list so we don't invalidate iterators. 6174 MachineRegisterInfo &MRI = MF.getRegInfo(); 6175 SmallVector<MachineInstr *, 32> Phis; 6176 for (auto &BB : MF) { 6177 for (auto &MI : BB) { 6178 if (MI.getOpcode() == TargetOpcode::G_PHI) 6179 Phis.emplace_back(&MI); 6180 } 6181 } 6182 6183 for (auto *MI : Phis) { 6184 // We need to do some work here if the operand types are < 16 bit and they 6185 // are split across fpr/gpr banks. Since all types <32b on gpr 6186 // end up being assigned gpr32 regclasses, we can end up with PHIs here 6187 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't 6188 // be selecting heterogenous regbanks for operands if possible, but we 6189 // still need to be able to deal with it here. 6190 // 6191 // To fix this, if we have a gpr-bank operand < 32b in size and at least 6192 // one other operand is on the fpr bank, then we add cross-bank copies 6193 // to homogenize the operand banks. For simplicity the bank that we choose 6194 // to settle on is whatever bank the def operand has. For example: 6195 // 6196 // %endbb: 6197 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 6198 // => 6199 // %bb2: 6200 // ... 6201 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) 6202 // ... 6203 // %endbb: 6204 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 6205 bool HasGPROp = false, HasFPROp = false; 6206 for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) { 6207 const auto &MO = MI->getOperand(OpIdx); 6208 if (!MO.isReg()) 6209 continue; 6210 const LLT &Ty = MRI.getType(MO.getReg()); 6211 if (!Ty.isValid() || !Ty.isScalar()) 6212 break; 6213 if (Ty.getSizeInBits() >= 32) 6214 break; 6215 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); 6216 // If for some reason we don't have a regbank yet. Don't try anything. 6217 if (!RB) 6218 break; 6219 6220 if (RB->getID() == AArch64::GPRRegBankID) 6221 HasGPROp = true; 6222 else 6223 HasFPROp = true; 6224 } 6225 // We have heterogenous regbanks, need to fixup. 6226 if (HasGPROp && HasFPROp) 6227 fixupPHIOpBanks(*MI, MRI, RBI); 6228 } 6229 } 6230 6231 namespace llvm { 6232 InstructionSelector * 6233 createAArch64InstructionSelector(const AArch64TargetMachine &TM, 6234 AArch64Subtarget &Subtarget, 6235 AArch64RegisterBankInfo &RBI) { 6236 return new AArch64InstructionSelector(TM, Subtarget, RBI); 6237 } 6238 } 6239