1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64GlobalISelUtils.h" 15 #include "AArch64InstrInfo.h" 16 #include "AArch64MachineFunctionInfo.h" 17 #include "AArch64RegisterBankInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "AArch64TargetMachine.h" 21 #include "MCTargetDesc/AArch64AddressingModes.h" 22 #include "MCTargetDesc/AArch64MCTargetDesc.h" 23 #include "llvm/ADT/Optional.h" 24 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 25 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 26 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 27 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 28 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 29 #include "llvm/CodeGen/MachineBasicBlock.h" 30 #include "llvm/CodeGen/MachineConstantPool.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineInstr.h" 33 #include "llvm/CodeGen/MachineInstrBuilder.h" 34 #include "llvm/CodeGen/MachineMemOperand.h" 35 #include "llvm/CodeGen/MachineOperand.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/TargetOpcodes.h" 38 #include "llvm/IR/Constants.h" 39 #include "llvm/IR/DerivedTypes.h" 40 #include "llvm/IR/Instructions.h" 41 #include "llvm/IR/PatternMatch.h" 42 #include "llvm/IR/Type.h" 43 #include "llvm/IR/IntrinsicsAArch64.h" 44 #include "llvm/Pass.h" 45 #include "llvm/Support/Debug.h" 46 #include "llvm/Support/raw_ostream.h" 47 48 #define DEBUG_TYPE "aarch64-isel" 49 50 using namespace llvm; 51 using namespace MIPatternMatch; 52 using namespace AArch64GISelUtils; 53 54 namespace llvm { 55 class BlockFrequencyInfo; 56 class ProfileSummaryInfo; 57 } 58 59 namespace { 60 61 #define GET_GLOBALISEL_PREDICATE_BITSET 62 #include "AArch64GenGlobalISel.inc" 63 #undef GET_GLOBALISEL_PREDICATE_BITSET 64 65 class AArch64InstructionSelector : public InstructionSelector { 66 public: 67 AArch64InstructionSelector(const AArch64TargetMachine &TM, 68 const AArch64Subtarget &STI, 69 const AArch64RegisterBankInfo &RBI); 70 71 bool select(MachineInstr &I) override; 72 static const char *getName() { return DEBUG_TYPE; } 73 74 void setupMF(MachineFunction &MF, GISelKnownBits *KB, 75 CodeGenCoverage &CoverageInfo, ProfileSummaryInfo *PSI, 76 BlockFrequencyInfo *BFI) override { 77 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); 78 MIB.setMF(MF); 79 80 // hasFnAttribute() is expensive to call on every BRCOND selection, so 81 // cache it here for each run of the selector. 82 ProduceNonFlagSettingCondBr = 83 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); 84 MFReturnAddr = Register(); 85 86 processPHIs(MF); 87 } 88 89 private: 90 /// tblgen-erated 'select' implementation, used as the initial selector for 91 /// the patterns that don't require complex C++. 92 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 93 94 // A lowering phase that runs before any selection attempts. 95 // Returns true if the instruction was modified. 96 bool preISelLower(MachineInstr &I); 97 98 // An early selection function that runs before the selectImpl() call. 99 bool earlySelect(MachineInstr &I); 100 101 // Do some preprocessing of G_PHIs before we begin selection. 102 void processPHIs(MachineFunction &MF); 103 104 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); 105 106 /// Eliminate same-sized cross-bank copies into stores before selectImpl(). 107 bool contractCrossBankCopyIntoStore(MachineInstr &I, 108 MachineRegisterInfo &MRI); 109 110 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); 111 112 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, 113 MachineRegisterInfo &MRI) const; 114 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, 115 MachineRegisterInfo &MRI) const; 116 117 ///@{ 118 /// Helper functions for selectCompareBranch. 119 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, 120 MachineIRBuilder &MIB) const; 121 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 122 MachineIRBuilder &MIB) const; 123 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 124 MachineIRBuilder &MIB) const; 125 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, 126 MachineBasicBlock *DstMBB, 127 MachineIRBuilder &MIB) const; 128 ///@} 129 130 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, 131 MachineRegisterInfo &MRI); 132 133 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI); 134 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI); 135 136 // Helper to generate an equivalent of scalar_to_vector into a new register, 137 // returned via 'Dst'. 138 MachineInstr *emitScalarToVector(unsigned EltSize, 139 const TargetRegisterClass *DstRC, 140 Register Scalar, 141 MachineIRBuilder &MIRBuilder) const; 142 143 /// Emit a lane insert into \p DstReg, or a new vector register if None is 144 /// provided. 145 /// 146 /// The lane inserted into is defined by \p LaneIdx. The vector source 147 /// register is given by \p SrcReg. The register containing the element is 148 /// given by \p EltReg. 149 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg, 150 Register EltReg, unsigned LaneIdx, 151 const RegisterBank &RB, 152 MachineIRBuilder &MIRBuilder) const; 153 154 /// Emit a sequence of instructions representing a constant \p CV for a 155 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) 156 /// 157 /// \returns the last instruction in the sequence on success, and nullptr 158 /// otherwise. 159 MachineInstr *emitConstantVector(Register Dst, Constant *CV, 160 MachineIRBuilder &MIRBuilder, 161 MachineRegisterInfo &MRI); 162 163 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); 164 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, 165 MachineRegisterInfo &MRI); 166 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a 167 /// SUBREG_TO_REG. 168 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); 169 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); 170 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); 171 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); 172 173 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI); 174 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI); 175 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI); 176 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI); 177 178 /// Helper function to select vector load intrinsics like 179 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc. 180 /// \p Opc is the opcode that the selected instruction should use. 181 /// \p NumVecs is the number of vector destinations for the instruction. 182 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction. 183 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs, 184 MachineInstr &I); 185 bool selectIntrinsicWithSideEffects(MachineInstr &I, 186 MachineRegisterInfo &MRI); 187 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); 188 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI); 189 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; 190 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; 191 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); 192 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); 193 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); 194 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); 195 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); 196 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); 197 198 unsigned emitConstantPoolEntry(const Constant *CPVal, 199 MachineFunction &MF) const; 200 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, 201 MachineIRBuilder &MIRBuilder) const; 202 203 // Emit a vector concat operation. 204 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1, 205 Register Op2, 206 MachineIRBuilder &MIRBuilder) const; 207 208 // Emit an integer compare between LHS and RHS, which checks for Predicate. 209 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 210 MachineOperand &Predicate, 211 MachineIRBuilder &MIRBuilder) const; 212 213 /// Emit a floating point comparison between \p LHS and \p RHS. 214 /// \p Pred if given is the intended predicate to use. 215 MachineInstr *emitFPCompare(Register LHS, Register RHS, 216 MachineIRBuilder &MIRBuilder, 217 Optional<CmpInst::Predicate> = None) const; 218 219 MachineInstr *emitInstr(unsigned Opcode, 220 std::initializer_list<llvm::DstOp> DstOps, 221 std::initializer_list<llvm::SrcOp> SrcOps, 222 MachineIRBuilder &MIRBuilder, 223 const ComplexRendererFns &RenderFns = None) const; 224 /// Helper function to emit an add or sub instruction. 225 /// 226 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above 227 /// in a specific order. 228 /// 229 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. 230 /// 231 /// \code 232 /// const std::array<std::array<unsigned, 2>, 4> Table { 233 /// {{AArch64::ADDXri, AArch64::ADDWri}, 234 /// {AArch64::ADDXrs, AArch64::ADDWrs}, 235 /// {AArch64::ADDXrr, AArch64::ADDWrr}, 236 /// {AArch64::SUBXri, AArch64::SUBWri}, 237 /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; 238 /// \endcode 239 /// 240 /// Each row in the table corresponds to a different addressing mode. Each 241 /// column corresponds to a different register size. 242 /// 243 /// \attention Rows must be structured as follows: 244 /// - Row 0: The ri opcode variants 245 /// - Row 1: The rs opcode variants 246 /// - Row 2: The rr opcode variants 247 /// - Row 3: The ri opcode variants for negative immediates 248 /// - Row 4: The rx opcode variants 249 /// 250 /// \attention Columns must be structured as follows: 251 /// - Column 0: The 64-bit opcode variants 252 /// - Column 1: The 32-bit opcode variants 253 /// 254 /// \p Dst is the destination register of the binop to emit. 255 /// \p LHS is the left-hand operand of the binop to emit. 256 /// \p RHS is the right-hand operand of the binop to emit. 257 MachineInstr *emitAddSub( 258 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 259 Register Dst, MachineOperand &LHS, MachineOperand &RHS, 260 MachineIRBuilder &MIRBuilder) const; 261 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, 262 MachineOperand &RHS, 263 MachineIRBuilder &MIRBuilder) const; 264 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 265 MachineIRBuilder &MIRBuilder) const; 266 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 267 MachineIRBuilder &MIRBuilder) const; 268 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, 269 MachineIRBuilder &MIRBuilder) const; 270 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, 271 MachineIRBuilder &MIRBuilder) const; 272 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, 273 AArch64CC::CondCode CC, 274 MachineIRBuilder &MIRBuilder) const; 275 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg, 276 const RegisterBank &DstRB, LLT ScalarTy, 277 Register VecReg, unsigned LaneIdx, 278 MachineIRBuilder &MIRBuilder) const; 279 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2, 280 AArch64CC::CondCode Pred, 281 MachineIRBuilder &MIRBuilder) const; 282 /// Emit a CSet for a FP compare. 283 /// 284 /// \p Dst is expected to be a 32-bit scalar register. 285 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, 286 MachineIRBuilder &MIRBuilder) const; 287 288 /// Emit the overflow op for \p Opcode. 289 /// 290 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, 291 /// G_USUBO, etc. 292 std::pair<MachineInstr *, AArch64CC::CondCode> 293 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, 294 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; 295 296 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. 297 /// \p IsNegative is true if the test should be "not zero". 298 /// This will also optimize the test bit instruction when possible. 299 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, 300 MachineBasicBlock *DstMBB, 301 MachineIRBuilder &MIB) const; 302 303 /// Emit a CB(N)Z instruction which branches to \p DestMBB. 304 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, 305 MachineBasicBlock *DestMBB, 306 MachineIRBuilder &MIB) const; 307 308 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. 309 // We use these manually instead of using the importer since it doesn't 310 // support SDNodeXForm. 311 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; 312 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; 313 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; 314 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; 315 316 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; 317 ComplexRendererFns selectArithImmed(MachineOperand &Root) const; 318 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; 319 320 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, 321 unsigned Size) const; 322 323 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { 324 return selectAddrModeUnscaled(Root, 1); 325 } 326 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { 327 return selectAddrModeUnscaled(Root, 2); 328 } 329 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { 330 return selectAddrModeUnscaled(Root, 4); 331 } 332 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { 333 return selectAddrModeUnscaled(Root, 8); 334 } 335 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { 336 return selectAddrModeUnscaled(Root, 16); 337 } 338 339 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used 340 /// from complex pattern matchers like selectAddrModeIndexed(). 341 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, 342 MachineRegisterInfo &MRI) const; 343 344 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, 345 unsigned Size) const; 346 template <int Width> 347 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { 348 return selectAddrModeIndexed(Root, Width / 8); 349 } 350 351 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, 352 const MachineRegisterInfo &MRI) const; 353 ComplexRendererFns 354 selectAddrModeShiftedExtendXReg(MachineOperand &Root, 355 unsigned SizeInBytes) const; 356 357 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether 358 /// or not a shift + extend should be folded into an addressing mode. Returns 359 /// None when this is not profitable or possible. 360 ComplexRendererFns 361 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, 362 MachineOperand &Offset, unsigned SizeInBytes, 363 bool WantsExt) const; 364 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; 365 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, 366 unsigned SizeInBytes) const; 367 template <int Width> 368 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { 369 return selectAddrModeXRO(Root, Width / 8); 370 } 371 372 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, 373 unsigned SizeInBytes) const; 374 template <int Width> 375 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { 376 return selectAddrModeWRO(Root, Width / 8); 377 } 378 379 ComplexRendererFns selectShiftedRegister(MachineOperand &Root, 380 bool AllowROR = false) const; 381 382 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { 383 return selectShiftedRegister(Root); 384 } 385 386 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { 387 return selectShiftedRegister(Root, true); 388 } 389 390 /// Given an extend instruction, determine the correct shift-extend type for 391 /// that instruction. 392 /// 393 /// If the instruction is going to be used in a load or store, pass 394 /// \p IsLoadStore = true. 395 AArch64_AM::ShiftExtendType 396 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, 397 bool IsLoadStore = false) const; 398 399 /// Move \p Reg to \p RC if \p Reg is not already on \p RC. 400 /// 401 /// \returns Either \p Reg if no change was necessary, or the new register 402 /// created by moving \p Reg. 403 /// 404 /// Note: This uses emitCopy right now. 405 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, 406 MachineIRBuilder &MIB) const; 407 408 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; 409 410 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 411 int OpIdx = -1) const; 412 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, 413 int OpIdx = -1) const; 414 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, 415 int OpIdx = -1) const; 416 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI, 417 int OpIdx = -1) const; 418 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, 419 int OpIdx = -1) const; 420 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, 421 int OpIdx = -1) const; 422 423 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. 424 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); 425 426 // Optimization methods. 427 bool tryOptSelect(MachineInstr &MI); 428 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 429 MachineOperand &Predicate, 430 MachineIRBuilder &MIRBuilder) const; 431 432 /// Return true if \p MI is a load or store of \p NumBytes bytes. 433 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; 434 435 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit 436 /// register zeroed out. In other words, the result of MI has been explicitly 437 /// zero extended. 438 bool isDef32(const MachineInstr &MI) const; 439 440 const AArch64TargetMachine &TM; 441 const AArch64Subtarget &STI; 442 const AArch64InstrInfo &TII; 443 const AArch64RegisterInfo &TRI; 444 const AArch64RegisterBankInfo &RBI; 445 446 bool ProduceNonFlagSettingCondBr = false; 447 448 // Some cached values used during selection. 449 // We use LR as a live-in register, and we keep track of it here as it can be 450 // clobbered by calls. 451 Register MFReturnAddr; 452 453 MachineIRBuilder MIB; 454 455 #define GET_GLOBALISEL_PREDICATES_DECL 456 #include "AArch64GenGlobalISel.inc" 457 #undef GET_GLOBALISEL_PREDICATES_DECL 458 459 // We declare the temporaries used by selectImpl() in the class to minimize the 460 // cost of constructing placeholder values. 461 #define GET_GLOBALISEL_TEMPORARIES_DECL 462 #include "AArch64GenGlobalISel.inc" 463 #undef GET_GLOBALISEL_TEMPORARIES_DECL 464 }; 465 466 } // end anonymous namespace 467 468 #define GET_GLOBALISEL_IMPL 469 #include "AArch64GenGlobalISel.inc" 470 #undef GET_GLOBALISEL_IMPL 471 472 AArch64InstructionSelector::AArch64InstructionSelector( 473 const AArch64TargetMachine &TM, const AArch64Subtarget &STI, 474 const AArch64RegisterBankInfo &RBI) 475 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), 476 RBI(RBI), 477 #define GET_GLOBALISEL_PREDICATES_INIT 478 #include "AArch64GenGlobalISel.inc" 479 #undef GET_GLOBALISEL_PREDICATES_INIT 480 #define GET_GLOBALISEL_TEMPORARIES_INIT 481 #include "AArch64GenGlobalISel.inc" 482 #undef GET_GLOBALISEL_TEMPORARIES_INIT 483 { 484 } 485 486 // FIXME: This should be target-independent, inferred from the types declared 487 // for each class in the bank. 488 static const TargetRegisterClass * 489 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, 490 const RegisterBankInfo &RBI, 491 bool GetAllRegSet = false) { 492 if (RB.getID() == AArch64::GPRRegBankID) { 493 if (Ty.getSizeInBits() <= 32) 494 return GetAllRegSet ? &AArch64::GPR32allRegClass 495 : &AArch64::GPR32RegClass; 496 if (Ty.getSizeInBits() == 64) 497 return GetAllRegSet ? &AArch64::GPR64allRegClass 498 : &AArch64::GPR64RegClass; 499 if (Ty.getSizeInBits() == 128) 500 return &AArch64::XSeqPairsClassRegClass; 501 return nullptr; 502 } 503 504 if (RB.getID() == AArch64::FPRRegBankID) { 505 switch (Ty.getSizeInBits()) { 506 case 8: 507 return &AArch64::FPR8RegClass; 508 case 16: 509 return &AArch64::FPR16RegClass; 510 case 32: 511 return &AArch64::FPR32RegClass; 512 case 64: 513 return &AArch64::FPR64RegClass; 514 case 128: 515 return &AArch64::FPR128RegClass; 516 } 517 return nullptr; 518 } 519 520 return nullptr; 521 } 522 523 /// Given a register bank, and size in bits, return the smallest register class 524 /// that can represent that combination. 525 static const TargetRegisterClass * 526 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, 527 bool GetAllRegSet = false) { 528 unsigned RegBankID = RB.getID(); 529 530 if (RegBankID == AArch64::GPRRegBankID) { 531 if (SizeInBits <= 32) 532 return GetAllRegSet ? &AArch64::GPR32allRegClass 533 : &AArch64::GPR32RegClass; 534 if (SizeInBits == 64) 535 return GetAllRegSet ? &AArch64::GPR64allRegClass 536 : &AArch64::GPR64RegClass; 537 if (SizeInBits == 128) 538 return &AArch64::XSeqPairsClassRegClass; 539 } 540 541 if (RegBankID == AArch64::FPRRegBankID) { 542 switch (SizeInBits) { 543 default: 544 return nullptr; 545 case 8: 546 return &AArch64::FPR8RegClass; 547 case 16: 548 return &AArch64::FPR16RegClass; 549 case 32: 550 return &AArch64::FPR32RegClass; 551 case 64: 552 return &AArch64::FPR64RegClass; 553 case 128: 554 return &AArch64::FPR128RegClass; 555 } 556 } 557 558 return nullptr; 559 } 560 561 /// Returns the correct subregister to use for a given register class. 562 static bool getSubRegForClass(const TargetRegisterClass *RC, 563 const TargetRegisterInfo &TRI, unsigned &SubReg) { 564 switch (TRI.getRegSizeInBits(*RC)) { 565 case 8: 566 SubReg = AArch64::bsub; 567 break; 568 case 16: 569 SubReg = AArch64::hsub; 570 break; 571 case 32: 572 if (RC != &AArch64::FPR32RegClass) 573 SubReg = AArch64::sub_32; 574 else 575 SubReg = AArch64::ssub; 576 break; 577 case 64: 578 SubReg = AArch64::dsub; 579 break; 580 default: 581 LLVM_DEBUG( 582 dbgs() << "Couldn't find appropriate subregister for register class."); 583 return false; 584 } 585 586 return true; 587 } 588 589 /// Returns the minimum size the given register bank can hold. 590 static unsigned getMinSizeForRegBank(const RegisterBank &RB) { 591 switch (RB.getID()) { 592 case AArch64::GPRRegBankID: 593 return 32; 594 case AArch64::FPRRegBankID: 595 return 8; 596 default: 597 llvm_unreachable("Tried to get minimum size for unknown register bank."); 598 } 599 } 600 601 /// Create a REG_SEQUENCE instruction using the registers in \p Regs. 602 /// Helper function for functions like createDTuple and createQTuple. 603 /// 604 /// \p RegClassIDs - The list of register class IDs available for some tuple of 605 /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is 606 /// expected to contain between 2 and 4 tuple classes. 607 /// 608 /// \p SubRegs - The list of subregister classes associated with each register 609 /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 610 /// subregister class. The index of each subregister class is expected to 611 /// correspond with the index of each register class. 612 /// 613 /// \returns Either the destination register of REG_SEQUENCE instruction that 614 /// was created, or the 0th element of \p Regs if \p Regs contains a single 615 /// element. 616 static Register createTuple(ArrayRef<Register> Regs, 617 const unsigned RegClassIDs[], 618 const unsigned SubRegs[], MachineIRBuilder &MIB) { 619 unsigned NumRegs = Regs.size(); 620 if (NumRegs == 1) 621 return Regs[0]; 622 assert(NumRegs >= 2 && NumRegs <= 4 && 623 "Only support between two and 4 registers in a tuple!"); 624 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); 625 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]); 626 auto RegSequence = 627 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {}); 628 for (unsigned I = 0, E = Regs.size(); I < E; ++I) { 629 RegSequence.addUse(Regs[I]); 630 RegSequence.addImm(SubRegs[I]); 631 } 632 return RegSequence.getReg(0); 633 } 634 635 /// Create a tuple of D-registers using the registers in \p Regs. 636 static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { 637 static const unsigned RegClassIDs[] = { 638 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 639 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 640 AArch64::dsub2, AArch64::dsub3}; 641 return createTuple(Regs, RegClassIDs, SubRegs, MIB); 642 } 643 644 /// Create a tuple of Q-registers using the registers in \p Regs. 645 static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { 646 static const unsigned RegClassIDs[] = { 647 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 648 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 649 AArch64::qsub2, AArch64::qsub3}; 650 return createTuple(Regs, RegClassIDs, SubRegs, MIB); 651 } 652 653 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { 654 auto &MI = *Root.getParent(); 655 auto &MBB = *MI.getParent(); 656 auto &MF = *MBB.getParent(); 657 auto &MRI = MF.getRegInfo(); 658 uint64_t Immed; 659 if (Root.isImm()) 660 Immed = Root.getImm(); 661 else if (Root.isCImm()) 662 Immed = Root.getCImm()->getZExtValue(); 663 else if (Root.isReg()) { 664 auto ValAndVReg = 665 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true); 666 if (!ValAndVReg) 667 return None; 668 Immed = ValAndVReg->Value.getSExtValue(); 669 } else 670 return None; 671 return Immed; 672 } 673 674 /// Check whether \p I is a currently unsupported binary operation: 675 /// - it has an unsized type 676 /// - an operand is not a vreg 677 /// - all operands are not in the same bank 678 /// These are checks that should someday live in the verifier, but right now, 679 /// these are mostly limitations of the aarch64 selector. 680 static bool unsupportedBinOp(const MachineInstr &I, 681 const AArch64RegisterBankInfo &RBI, 682 const MachineRegisterInfo &MRI, 683 const AArch64RegisterInfo &TRI) { 684 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 685 if (!Ty.isValid()) { 686 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n"); 687 return true; 688 } 689 690 const RegisterBank *PrevOpBank = nullptr; 691 for (auto &MO : I.operands()) { 692 // FIXME: Support non-register operands. 693 if (!MO.isReg()) { 694 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n"); 695 return true; 696 } 697 698 // FIXME: Can generic operations have physical registers operands? If 699 // so, this will need to be taught about that, and we'll need to get the 700 // bank out of the minimal class for the register. 701 // Either way, this needs to be documented (and possibly verified). 702 if (!Register::isVirtualRegister(MO.getReg())) { 703 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n"); 704 return true; 705 } 706 707 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); 708 if (!OpBank) { 709 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n"); 710 return true; 711 } 712 713 if (PrevOpBank && OpBank != PrevOpBank) { 714 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n"); 715 return true; 716 } 717 PrevOpBank = OpBank; 718 } 719 return false; 720 } 721 722 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc 723 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID 724 /// and of size \p OpSize. 725 /// \returns \p GenericOpc if the combination is unsupported. 726 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, 727 unsigned OpSize) { 728 switch (RegBankID) { 729 case AArch64::GPRRegBankID: 730 if (OpSize == 32) { 731 switch (GenericOpc) { 732 case TargetOpcode::G_SHL: 733 return AArch64::LSLVWr; 734 case TargetOpcode::G_LSHR: 735 return AArch64::LSRVWr; 736 case TargetOpcode::G_ASHR: 737 return AArch64::ASRVWr; 738 default: 739 return GenericOpc; 740 } 741 } else if (OpSize == 64) { 742 switch (GenericOpc) { 743 case TargetOpcode::G_PTR_ADD: 744 return AArch64::ADDXrr; 745 case TargetOpcode::G_SHL: 746 return AArch64::LSLVXr; 747 case TargetOpcode::G_LSHR: 748 return AArch64::LSRVXr; 749 case TargetOpcode::G_ASHR: 750 return AArch64::ASRVXr; 751 default: 752 return GenericOpc; 753 } 754 } 755 break; 756 case AArch64::FPRRegBankID: 757 switch (OpSize) { 758 case 32: 759 switch (GenericOpc) { 760 case TargetOpcode::G_FADD: 761 return AArch64::FADDSrr; 762 case TargetOpcode::G_FSUB: 763 return AArch64::FSUBSrr; 764 case TargetOpcode::G_FMUL: 765 return AArch64::FMULSrr; 766 case TargetOpcode::G_FDIV: 767 return AArch64::FDIVSrr; 768 default: 769 return GenericOpc; 770 } 771 case 64: 772 switch (GenericOpc) { 773 case TargetOpcode::G_FADD: 774 return AArch64::FADDDrr; 775 case TargetOpcode::G_FSUB: 776 return AArch64::FSUBDrr; 777 case TargetOpcode::G_FMUL: 778 return AArch64::FMULDrr; 779 case TargetOpcode::G_FDIV: 780 return AArch64::FDIVDrr; 781 case TargetOpcode::G_OR: 782 return AArch64::ORRv8i8; 783 default: 784 return GenericOpc; 785 } 786 } 787 break; 788 } 789 return GenericOpc; 790 } 791 792 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, 793 /// appropriate for the (value) register bank \p RegBankID and of memory access 794 /// size \p OpSize. This returns the variant with the base+unsigned-immediate 795 /// addressing mode (e.g., LDRXui). 796 /// \returns \p GenericOpc if the combination is unsupported. 797 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, 798 unsigned OpSize) { 799 const bool isStore = GenericOpc == TargetOpcode::G_STORE; 800 switch (RegBankID) { 801 case AArch64::GPRRegBankID: 802 switch (OpSize) { 803 case 8: 804 return isStore ? AArch64::STRBBui : AArch64::LDRBBui; 805 case 16: 806 return isStore ? AArch64::STRHHui : AArch64::LDRHHui; 807 case 32: 808 return isStore ? AArch64::STRWui : AArch64::LDRWui; 809 case 64: 810 return isStore ? AArch64::STRXui : AArch64::LDRXui; 811 } 812 break; 813 case AArch64::FPRRegBankID: 814 switch (OpSize) { 815 case 8: 816 return isStore ? AArch64::STRBui : AArch64::LDRBui; 817 case 16: 818 return isStore ? AArch64::STRHui : AArch64::LDRHui; 819 case 32: 820 return isStore ? AArch64::STRSui : AArch64::LDRSui; 821 case 64: 822 return isStore ? AArch64::STRDui : AArch64::LDRDui; 823 case 128: 824 return isStore ? AArch64::STRQui : AArch64::LDRQui; 825 } 826 break; 827 } 828 return GenericOpc; 829 } 830 831 #ifndef NDEBUG 832 /// Helper function that verifies that we have a valid copy at the end of 833 /// selectCopy. Verifies that the source and dest have the expected sizes and 834 /// then returns true. 835 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, 836 const MachineRegisterInfo &MRI, 837 const TargetRegisterInfo &TRI, 838 const RegisterBankInfo &RBI) { 839 const Register DstReg = I.getOperand(0).getReg(); 840 const Register SrcReg = I.getOperand(1).getReg(); 841 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 842 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 843 844 // Make sure the size of the source and dest line up. 845 assert( 846 (DstSize == SrcSize || 847 // Copies are a mean to setup initial types, the number of 848 // bits may not exactly match. 849 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || 850 // Copies are a mean to copy bits around, as long as we are 851 // on the same register class, that's fine. Otherwise, that 852 // means we need some SUBREG_TO_REG or AND & co. 853 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && 854 "Copy with different width?!"); 855 856 // Check the size of the destination. 857 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && 858 "GPRs cannot get more than 64-bit width values"); 859 860 return true; 861 } 862 #endif 863 864 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg 865 /// to \p *To. 866 /// 867 /// E.g "To = COPY SrcReg:SubReg" 868 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, 869 const RegisterBankInfo &RBI, Register SrcReg, 870 const TargetRegisterClass *To, unsigned SubReg) { 871 assert(SrcReg.isValid() && "Expected a valid source register?"); 872 assert(To && "Destination register class cannot be null"); 873 assert(SubReg && "Expected a valid subregister"); 874 875 MachineIRBuilder MIB(I); 876 auto SubRegCopy = 877 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg); 878 MachineOperand &RegOp = I.getOperand(1); 879 RegOp.setReg(SubRegCopy.getReg(0)); 880 881 // It's possible that the destination register won't be constrained. Make 882 // sure that happens. 883 if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) 884 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); 885 886 return true; 887 } 888 889 /// Helper function to get the source and destination register classes for a 890 /// copy. Returns a std::pair containing the source register class for the 891 /// copy, and the destination register class for the copy. If a register class 892 /// cannot be determined, then it will be nullptr. 893 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 894 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, 895 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 896 const RegisterBankInfo &RBI) { 897 Register DstReg = I.getOperand(0).getReg(); 898 Register SrcReg = I.getOperand(1).getReg(); 899 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 900 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 901 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 902 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 903 904 // Special casing for cross-bank copies of s1s. We can technically represent 905 // a 1-bit value with any size of register. The minimum size for a GPR is 32 906 // bits. So, we need to put the FPR on 32 bits as well. 907 // 908 // FIXME: I'm not sure if this case holds true outside of copies. If it does, 909 // then we can pull it into the helpers that get the appropriate class for a 910 // register bank. Or make a new helper that carries along some constraint 911 // information. 912 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) 913 SrcSize = DstSize = 32; 914 915 return {getMinClassForRegBank(SrcRegBank, SrcSize, true), 916 getMinClassForRegBank(DstRegBank, DstSize, true)}; 917 } 918 919 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, 920 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 921 const RegisterBankInfo &RBI) { 922 Register DstReg = I.getOperand(0).getReg(); 923 Register SrcReg = I.getOperand(1).getReg(); 924 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 925 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 926 927 // Find the correct register classes for the source and destination registers. 928 const TargetRegisterClass *SrcRC; 929 const TargetRegisterClass *DstRC; 930 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); 931 932 if (!DstRC) { 933 LLVM_DEBUG(dbgs() << "Unexpected dest size " 934 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); 935 return false; 936 } 937 938 // A couple helpers below, for making sure that the copy we produce is valid. 939 940 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want 941 // to verify that the src and dst are the same size, since that's handled by 942 // the SUBREG_TO_REG. 943 bool KnownValid = false; 944 945 // Returns true, or asserts if something we don't expect happens. Instead of 946 // returning true, we return isValidCopy() to ensure that we verify the 947 // result. 948 auto CheckCopy = [&]() { 949 // If we have a bitcast or something, we can't have physical registers. 950 assert((I.isCopy() || 951 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && 952 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && 953 "No phys reg on generic operator!"); 954 bool ValidCopy = true; 955 #ifndef NDEBUG 956 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI); 957 assert(ValidCopy && "Invalid copy."); 958 #endif 959 (void)KnownValid; 960 return ValidCopy; 961 }; 962 963 // Is this a copy? If so, then we may need to insert a subregister copy. 964 if (I.isCopy()) { 965 // Yes. Check if there's anything to fix up. 966 if (!SrcRC) { 967 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n"); 968 return false; 969 } 970 971 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); 972 unsigned DstSize = TRI.getRegSizeInBits(*DstRC); 973 unsigned SubReg; 974 975 // If the source bank doesn't support a subregister copy small enough, 976 // then we first need to copy to the destination bank. 977 if (getMinSizeForRegBank(SrcRegBank) > DstSize) { 978 const TargetRegisterClass *DstTempRC = 979 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true); 980 getSubRegForClass(DstRC, TRI, SubReg); 981 982 MachineIRBuilder MIB(I); 983 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg}); 984 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); 985 } else if (SrcSize > DstSize) { 986 // If the source register is bigger than the destination we need to 987 // perform a subregister copy. 988 const TargetRegisterClass *SubRegRC = 989 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 990 getSubRegForClass(SubRegRC, TRI, SubReg); 991 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); 992 } else if (DstSize > SrcSize) { 993 // If the destination register is bigger than the source we need to do 994 // a promotion using SUBREG_TO_REG. 995 const TargetRegisterClass *PromotionRC = 996 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 997 getSubRegForClass(SrcRC, TRI, SubReg); 998 999 Register PromoteReg = MRI.createVirtualRegister(PromotionRC); 1000 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1001 TII.get(AArch64::SUBREG_TO_REG), PromoteReg) 1002 .addImm(0) 1003 .addUse(SrcReg) 1004 .addImm(SubReg); 1005 MachineOperand &RegOp = I.getOperand(1); 1006 RegOp.setReg(PromoteReg); 1007 1008 // Promise that the copy is implicitly validated by the SUBREG_TO_REG. 1009 KnownValid = true; 1010 } 1011 1012 // If the destination is a physical register, then there's nothing to 1013 // change, so we're done. 1014 if (Register::isPhysicalRegister(DstReg)) 1015 return CheckCopy(); 1016 } 1017 1018 // No need to constrain SrcReg. It will get constrained when we hit another 1019 // of its use or its defs. Copies do not have constraints. 1020 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1021 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1022 << " operand\n"); 1023 return false; 1024 } 1025 1026 // If this a GPR ZEXT that we want to just reduce down into a copy. 1027 // The sizes will be mismatched with the source < 32b but that's ok. 1028 if (I.getOpcode() == TargetOpcode::G_ZEXT) { 1029 I.setDesc(TII.get(AArch64::COPY)); 1030 assert(SrcRegBank.getID() == AArch64::GPRRegBankID); 1031 return selectCopy(I, TII, MRI, TRI, RBI); 1032 } 1033 1034 I.setDesc(TII.get(AArch64::COPY)); 1035 return CheckCopy(); 1036 } 1037 1038 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { 1039 if (!DstTy.isScalar() || !SrcTy.isScalar()) 1040 return GenericOpc; 1041 1042 const unsigned DstSize = DstTy.getSizeInBits(); 1043 const unsigned SrcSize = SrcTy.getSizeInBits(); 1044 1045 switch (DstSize) { 1046 case 32: 1047 switch (SrcSize) { 1048 case 32: 1049 switch (GenericOpc) { 1050 case TargetOpcode::G_SITOFP: 1051 return AArch64::SCVTFUWSri; 1052 case TargetOpcode::G_UITOFP: 1053 return AArch64::UCVTFUWSri; 1054 case TargetOpcode::G_FPTOSI: 1055 return AArch64::FCVTZSUWSr; 1056 case TargetOpcode::G_FPTOUI: 1057 return AArch64::FCVTZUUWSr; 1058 default: 1059 return GenericOpc; 1060 } 1061 case 64: 1062 switch (GenericOpc) { 1063 case TargetOpcode::G_SITOFP: 1064 return AArch64::SCVTFUXSri; 1065 case TargetOpcode::G_UITOFP: 1066 return AArch64::UCVTFUXSri; 1067 case TargetOpcode::G_FPTOSI: 1068 return AArch64::FCVTZSUWDr; 1069 case TargetOpcode::G_FPTOUI: 1070 return AArch64::FCVTZUUWDr; 1071 default: 1072 return GenericOpc; 1073 } 1074 default: 1075 return GenericOpc; 1076 } 1077 case 64: 1078 switch (SrcSize) { 1079 case 32: 1080 switch (GenericOpc) { 1081 case TargetOpcode::G_SITOFP: 1082 return AArch64::SCVTFUWDri; 1083 case TargetOpcode::G_UITOFP: 1084 return AArch64::UCVTFUWDri; 1085 case TargetOpcode::G_FPTOSI: 1086 return AArch64::FCVTZSUXSr; 1087 case TargetOpcode::G_FPTOUI: 1088 return AArch64::FCVTZUUXSr; 1089 default: 1090 return GenericOpc; 1091 } 1092 case 64: 1093 switch (GenericOpc) { 1094 case TargetOpcode::G_SITOFP: 1095 return AArch64::SCVTFUXDri; 1096 case TargetOpcode::G_UITOFP: 1097 return AArch64::UCVTFUXDri; 1098 case TargetOpcode::G_FPTOSI: 1099 return AArch64::FCVTZSUXDr; 1100 case TargetOpcode::G_FPTOUI: 1101 return AArch64::FCVTZUUXDr; 1102 default: 1103 return GenericOpc; 1104 } 1105 default: 1106 return GenericOpc; 1107 } 1108 default: 1109 return GenericOpc; 1110 }; 1111 return GenericOpc; 1112 } 1113 1114 MachineInstr * 1115 AArch64InstructionSelector::emitSelect(Register Dst, Register True, 1116 Register False, AArch64CC::CondCode CC, 1117 MachineIRBuilder &MIB) const { 1118 MachineRegisterInfo &MRI = *MIB.getMRI(); 1119 assert(RBI.getRegBank(False, MRI, TRI)->getID() == 1120 RBI.getRegBank(True, MRI, TRI)->getID() && 1121 "Expected both select operands to have the same regbank?"); 1122 LLT Ty = MRI.getType(True); 1123 if (Ty.isVector()) 1124 return nullptr; 1125 const unsigned Size = Ty.getSizeInBits(); 1126 assert((Size == 32 || Size == 64) && 1127 "Expected 32 bit or 64 bit select only?"); 1128 const bool Is32Bit = Size == 32; 1129 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { 1130 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; 1131 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 1132 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); 1133 return &*FCSel; 1134 } 1135 1136 // By default, we'll try and emit a CSEL. 1137 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; 1138 bool Optimized = false; 1139 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, 1140 &Optimized](Register &Reg, Register &OtherReg, 1141 bool Invert) { 1142 if (Optimized) 1143 return false; 1144 1145 // Attempt to fold: 1146 // 1147 // %sub = G_SUB 0, %x 1148 // %select = G_SELECT cc, %reg, %sub 1149 // 1150 // Into: 1151 // %select = CSNEG %reg, %x, cc 1152 Register MatchReg; 1153 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { 1154 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; 1155 Reg = MatchReg; 1156 if (Invert) { 1157 CC = AArch64CC::getInvertedCondCode(CC); 1158 std::swap(Reg, OtherReg); 1159 } 1160 return true; 1161 } 1162 1163 // Attempt to fold: 1164 // 1165 // %xor = G_XOR %x, -1 1166 // %select = G_SELECT cc, %reg, %xor 1167 // 1168 // Into: 1169 // %select = CSINV %reg, %x, cc 1170 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { 1171 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1172 Reg = MatchReg; 1173 if (Invert) { 1174 CC = AArch64CC::getInvertedCondCode(CC); 1175 std::swap(Reg, OtherReg); 1176 } 1177 return true; 1178 } 1179 1180 // Attempt to fold: 1181 // 1182 // %add = G_ADD %x, 1 1183 // %select = G_SELECT cc, %reg, %add 1184 // 1185 // Into: 1186 // %select = CSINC %reg, %x, cc 1187 if (mi_match(Reg, MRI, 1188 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)), 1189 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) { 1190 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1191 Reg = MatchReg; 1192 if (Invert) { 1193 CC = AArch64CC::getInvertedCondCode(CC); 1194 std::swap(Reg, OtherReg); 1195 } 1196 return true; 1197 } 1198 1199 return false; 1200 }; 1201 1202 // Helper lambda which tries to use CSINC/CSINV for the instruction when its 1203 // true/false values are constants. 1204 // FIXME: All of these patterns already exist in tablegen. We should be 1205 // able to import these. 1206 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, 1207 &Optimized]() { 1208 if (Optimized) 1209 return false; 1210 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI); 1211 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI); 1212 if (!TrueCst && !FalseCst) 1213 return false; 1214 1215 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 1216 if (TrueCst && FalseCst) { 1217 int64_t T = TrueCst->Value.getSExtValue(); 1218 int64_t F = FalseCst->Value.getSExtValue(); 1219 1220 if (T == 0 && F == 1) { 1221 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc 1222 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1223 True = ZReg; 1224 False = ZReg; 1225 return true; 1226 } 1227 1228 if (T == 0 && F == -1) { 1229 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc 1230 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1231 True = ZReg; 1232 False = ZReg; 1233 return true; 1234 } 1235 } 1236 1237 if (TrueCst) { 1238 int64_t T = TrueCst->Value.getSExtValue(); 1239 if (T == 1) { 1240 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc 1241 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1242 True = False; 1243 False = ZReg; 1244 CC = AArch64CC::getInvertedCondCode(CC); 1245 return true; 1246 } 1247 1248 if (T == -1) { 1249 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc 1250 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1251 True = False; 1252 False = ZReg; 1253 CC = AArch64CC::getInvertedCondCode(CC); 1254 return true; 1255 } 1256 } 1257 1258 if (FalseCst) { 1259 int64_t F = FalseCst->Value.getSExtValue(); 1260 if (F == 1) { 1261 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc 1262 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1263 False = ZReg; 1264 return true; 1265 } 1266 1267 if (F == -1) { 1268 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc 1269 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1270 False = ZReg; 1271 return true; 1272 } 1273 } 1274 return false; 1275 }; 1276 1277 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); 1278 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); 1279 Optimized |= TryOptSelectCst(); 1280 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 1281 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); 1282 return &*SelectInst; 1283 } 1284 1285 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { 1286 switch (P) { 1287 default: 1288 llvm_unreachable("Unknown condition code!"); 1289 case CmpInst::ICMP_NE: 1290 return AArch64CC::NE; 1291 case CmpInst::ICMP_EQ: 1292 return AArch64CC::EQ; 1293 case CmpInst::ICMP_SGT: 1294 return AArch64CC::GT; 1295 case CmpInst::ICMP_SGE: 1296 return AArch64CC::GE; 1297 case CmpInst::ICMP_SLT: 1298 return AArch64CC::LT; 1299 case CmpInst::ICMP_SLE: 1300 return AArch64CC::LE; 1301 case CmpInst::ICMP_UGT: 1302 return AArch64CC::HI; 1303 case CmpInst::ICMP_UGE: 1304 return AArch64CC::HS; 1305 case CmpInst::ICMP_ULT: 1306 return AArch64CC::LO; 1307 case CmpInst::ICMP_ULE: 1308 return AArch64CC::LS; 1309 } 1310 } 1311 1312 /// Return a register which can be used as a bit to test in a TB(N)Z. 1313 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, 1314 MachineRegisterInfo &MRI) { 1315 assert(Reg.isValid() && "Expected valid register!"); 1316 bool HasZext = false; 1317 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { 1318 unsigned Opc = MI->getOpcode(); 1319 1320 if (!MI->getOperand(0).isReg() || 1321 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1322 break; 1323 1324 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. 1325 // 1326 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number 1327 // on the truncated x is the same as the bit number on x. 1328 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || 1329 Opc == TargetOpcode::G_TRUNC) { 1330 if (Opc == TargetOpcode::G_ZEXT) 1331 HasZext = true; 1332 1333 Register NextReg = MI->getOperand(1).getReg(); 1334 // Did we find something worth folding? 1335 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg)) 1336 break; 1337 1338 // NextReg is worth folding. Keep looking. 1339 Reg = NextReg; 1340 continue; 1341 } 1342 1343 // Attempt to find a suitable operation with a constant on one side. 1344 Optional<uint64_t> C; 1345 Register TestReg; 1346 switch (Opc) { 1347 default: 1348 break; 1349 case TargetOpcode::G_AND: 1350 case TargetOpcode::G_XOR: { 1351 TestReg = MI->getOperand(1).getReg(); 1352 Register ConstantReg = MI->getOperand(2).getReg(); 1353 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI); 1354 if (!VRegAndVal) { 1355 // AND commutes, check the other side for a constant. 1356 // FIXME: Can we canonicalize the constant so that it's always on the 1357 // same side at some point earlier? 1358 std::swap(ConstantReg, TestReg); 1359 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI); 1360 } 1361 if (VRegAndVal) { 1362 if (HasZext) 1363 C = VRegAndVal->Value.getZExtValue(); 1364 else 1365 C = VRegAndVal->Value.getSExtValue(); 1366 } 1367 break; 1368 } 1369 case TargetOpcode::G_ASHR: 1370 case TargetOpcode::G_LSHR: 1371 case TargetOpcode::G_SHL: { 1372 TestReg = MI->getOperand(1).getReg(); 1373 auto VRegAndVal = 1374 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 1375 if (VRegAndVal) 1376 C = VRegAndVal->Value.getSExtValue(); 1377 break; 1378 } 1379 } 1380 1381 // Didn't find a constant or viable register. Bail out of the loop. 1382 if (!C || !TestReg.isValid()) 1383 break; 1384 1385 // We found a suitable instruction with a constant. Check to see if we can 1386 // walk through the instruction. 1387 Register NextReg; 1388 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits(); 1389 switch (Opc) { 1390 default: 1391 break; 1392 case TargetOpcode::G_AND: 1393 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. 1394 if ((*C >> Bit) & 1) 1395 NextReg = TestReg; 1396 break; 1397 case TargetOpcode::G_SHL: 1398 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in 1399 // the type of the register. 1400 if (*C <= Bit && (Bit - *C) < TestRegSize) { 1401 NextReg = TestReg; 1402 Bit = Bit - *C; 1403 } 1404 break; 1405 case TargetOpcode::G_ASHR: 1406 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits 1407 // in x 1408 NextReg = TestReg; 1409 Bit = Bit + *C; 1410 if (Bit >= TestRegSize) 1411 Bit = TestRegSize - 1; 1412 break; 1413 case TargetOpcode::G_LSHR: 1414 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x 1415 if ((Bit + *C) < TestRegSize) { 1416 NextReg = TestReg; 1417 Bit = Bit + *C; 1418 } 1419 break; 1420 case TargetOpcode::G_XOR: 1421 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when 1422 // appropriate. 1423 // 1424 // e.g. If x' = xor x, c, and the b-th bit is set in c then 1425 // 1426 // tbz x', b -> tbnz x, b 1427 // 1428 // Because x' only has the b-th bit set if x does not. 1429 if ((*C >> Bit) & 1) 1430 Invert = !Invert; 1431 NextReg = TestReg; 1432 break; 1433 } 1434 1435 // Check if we found anything worth folding. 1436 if (!NextReg.isValid()) 1437 return Reg; 1438 Reg = NextReg; 1439 } 1440 1441 return Reg; 1442 } 1443 1444 MachineInstr *AArch64InstructionSelector::emitTestBit( 1445 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, 1446 MachineIRBuilder &MIB) const { 1447 assert(TestReg.isValid()); 1448 assert(ProduceNonFlagSettingCondBr && 1449 "Cannot emit TB(N)Z with speculation tracking!"); 1450 MachineRegisterInfo &MRI = *MIB.getMRI(); 1451 1452 // Attempt to optimize the test bit by walking over instructions. 1453 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI); 1454 LLT Ty = MRI.getType(TestReg); 1455 unsigned Size = Ty.getSizeInBits(); 1456 assert(!Ty.isVector() && "Expected a scalar!"); 1457 assert(Bit < 64 && "Bit is too large!"); 1458 1459 // When the test register is a 64-bit register, we have to narrow to make 1460 // TBNZW work. 1461 bool UseWReg = Bit < 32; 1462 unsigned NecessarySize = UseWReg ? 32 : 64; 1463 if (Size != NecessarySize) 1464 TestReg = moveScalarRegClass( 1465 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, 1466 MIB); 1467 1468 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, 1469 {AArch64::TBZW, AArch64::TBNZW}}; 1470 unsigned Opc = OpcTable[UseWReg][IsNegative]; 1471 auto TestBitMI = 1472 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); 1473 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); 1474 return &*TestBitMI; 1475 } 1476 1477 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( 1478 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, 1479 MachineIRBuilder &MIB) const { 1480 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"); 1481 // Given something like this: 1482 // 1483 // %x = ...Something... 1484 // %one = G_CONSTANT i64 1 1485 // %zero = G_CONSTANT i64 0 1486 // %and = G_AND %x, %one 1487 // %cmp = G_ICMP intpred(ne), %and, %zero 1488 // %cmp_trunc = G_TRUNC %cmp 1489 // G_BRCOND %cmp_trunc, %bb.3 1490 // 1491 // We want to try and fold the AND into the G_BRCOND and produce either a 1492 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). 1493 // 1494 // In this case, we'd get 1495 // 1496 // TBNZ %x %bb.3 1497 // 1498 1499 // Check if the AND has a constant on its RHS which we can use as a mask. 1500 // If it's a power of 2, then it's the same as checking a specific bit. 1501 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) 1502 auto MaybeBit = getIConstantVRegValWithLookThrough( 1503 AndInst.getOperand(2).getReg(), *MIB.getMRI()); 1504 if (!MaybeBit) 1505 return false; 1506 1507 int32_t Bit = MaybeBit->Value.exactLogBase2(); 1508 if (Bit < 0) 1509 return false; 1510 1511 Register TestReg = AndInst.getOperand(1).getReg(); 1512 1513 // Emit a TB(N)Z. 1514 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB); 1515 return true; 1516 } 1517 1518 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, 1519 bool IsNegative, 1520 MachineBasicBlock *DestMBB, 1521 MachineIRBuilder &MIB) const { 1522 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!"); 1523 MachineRegisterInfo &MRI = *MIB.getMRI(); 1524 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == 1525 AArch64::GPRRegBankID && 1526 "Expected GPRs only?"); 1527 auto Ty = MRI.getType(CompareReg); 1528 unsigned Width = Ty.getSizeInBits(); 1529 assert(!Ty.isVector() && "Expected scalar only?"); 1530 assert(Width <= 64 && "Expected width to be at most 64?"); 1531 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, 1532 {AArch64::CBNZW, AArch64::CBNZX}}; 1533 unsigned Opc = OpcTable[IsNegative][Width == 64]; 1534 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); 1535 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); 1536 return &*BranchMI; 1537 } 1538 1539 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( 1540 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { 1541 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); 1542 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1543 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't 1544 // totally clean. Some of them require two branches to implement. 1545 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); 1546 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, 1547 Pred); 1548 AArch64CC::CondCode CC1, CC2; 1549 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); 1550 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1551 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); 1552 if (CC2 != AArch64CC::AL) 1553 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); 1554 I.eraseFromParent(); 1555 return true; 1556 } 1557 1558 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( 1559 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 1560 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 1561 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1562 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. 1563 // 1564 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1565 // instructions will not be produced, as they are conditional branch 1566 // instructions that do not set flags. 1567 if (!ProduceNonFlagSettingCondBr) 1568 return false; 1569 1570 MachineRegisterInfo &MRI = *MIB.getMRI(); 1571 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1572 auto Pred = 1573 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); 1574 Register LHS = ICmp.getOperand(2).getReg(); 1575 Register RHS = ICmp.getOperand(3).getReg(); 1576 1577 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. 1578 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI); 1579 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 1580 1581 // When we can emit a TB(N)Z, prefer that. 1582 // 1583 // Handle non-commutative condition codes first. 1584 // Note that we don't want to do this when we have a G_AND because it can 1585 // become a tst. The tst will make the test bit in the TB(N)Z redundant. 1586 if (VRegAndVal && !AndInst) { 1587 int64_t C = VRegAndVal->Value.getSExtValue(); 1588 1589 // When we have a greater-than comparison, we can just test if the msb is 1590 // zero. 1591 if (C == -1 && Pred == CmpInst::ICMP_SGT) { 1592 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1593 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); 1594 I.eraseFromParent(); 1595 return true; 1596 } 1597 1598 // When we have a less than comparison, we can just test if the msb is not 1599 // zero. 1600 if (C == 0 && Pred == CmpInst::ICMP_SLT) { 1601 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1602 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB); 1603 I.eraseFromParent(); 1604 return true; 1605 } 1606 } 1607 1608 // Attempt to handle commutative condition codes. Right now, that's only 1609 // eq/ne. 1610 if (ICmpInst::isEquality(Pred)) { 1611 if (!VRegAndVal) { 1612 std::swap(RHS, LHS); 1613 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI); 1614 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 1615 } 1616 1617 if (VRegAndVal && VRegAndVal->Value == 0) { 1618 // If there's a G_AND feeding into this branch, try to fold it away by 1619 // emitting a TB(N)Z instead. 1620 // 1621 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be 1622 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding 1623 // would be redundant. 1624 if (AndInst && 1625 tryOptAndIntoCompareBranch( 1626 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { 1627 I.eraseFromParent(); 1628 return true; 1629 } 1630 1631 // Otherwise, try to emit a CB(N)Z instead. 1632 auto LHSTy = MRI.getType(LHS); 1633 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { 1634 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); 1635 I.eraseFromParent(); 1636 return true; 1637 } 1638 } 1639 } 1640 1641 return false; 1642 } 1643 1644 bool AArch64InstructionSelector::selectCompareBranchFedByICmp( 1645 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 1646 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 1647 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1648 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) 1649 return true; 1650 1651 // Couldn't optimize. Emit a compare + a Bcc. 1652 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1653 auto PredOp = ICmp.getOperand(1); 1654 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); 1655 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( 1656 static_cast<CmpInst::Predicate>(PredOp.getPredicate())); 1657 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); 1658 I.eraseFromParent(); 1659 return true; 1660 } 1661 1662 bool AArch64InstructionSelector::selectCompareBranch( 1663 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { 1664 Register CondReg = I.getOperand(0).getReg(); 1665 MachineInstr *CCMI = MRI.getVRegDef(CondReg); 1666 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) { 1667 CondReg = CCMI->getOperand(1).getReg(); 1668 CCMI = MRI.getVRegDef(CondReg); 1669 } 1670 1671 // Try to select the G_BRCOND using whatever is feeding the condition if 1672 // possible. 1673 unsigned CCMIOpc = CCMI->getOpcode(); 1674 if (CCMIOpc == TargetOpcode::G_FCMP) 1675 return selectCompareBranchFedByFCmp(I, *CCMI, MIB); 1676 if (CCMIOpc == TargetOpcode::G_ICMP) 1677 return selectCompareBranchFedByICmp(I, *CCMI, MIB); 1678 1679 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1680 // instructions will not be produced, as they are conditional branch 1681 // instructions that do not set flags. 1682 if (ProduceNonFlagSettingCondBr) { 1683 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, 1684 I.getOperand(1).getMBB(), MIB); 1685 I.eraseFromParent(); 1686 return true; 1687 } 1688 1689 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. 1690 auto TstMI = 1691 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); 1692 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 1693 auto Bcc = MIB.buildInstr(AArch64::Bcc) 1694 .addImm(AArch64CC::EQ) 1695 .addMBB(I.getOperand(1).getMBB()); 1696 I.eraseFromParent(); 1697 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); 1698 } 1699 1700 /// Returns the element immediate value of a vector shift operand if found. 1701 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. 1702 static Optional<int64_t> getVectorShiftImm(Register Reg, 1703 MachineRegisterInfo &MRI) { 1704 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"); 1705 MachineInstr *OpMI = MRI.getVRegDef(Reg); 1706 assert(OpMI && "Expected to find a vreg def for vector shift operand"); 1707 return getAArch64VectorSplatScalar(*OpMI, MRI); 1708 } 1709 1710 /// Matches and returns the shift immediate value for a SHL instruction given 1711 /// a shift operand. 1712 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { 1713 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); 1714 if (!ShiftImm) 1715 return None; 1716 // Check the immediate is in range for a SHL. 1717 int64_t Imm = *ShiftImm; 1718 if (Imm < 0) 1719 return None; 1720 switch (SrcTy.getElementType().getSizeInBits()) { 1721 default: 1722 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift"); 1723 return None; 1724 case 8: 1725 if (Imm > 7) 1726 return None; 1727 break; 1728 case 16: 1729 if (Imm > 15) 1730 return None; 1731 break; 1732 case 32: 1733 if (Imm > 31) 1734 return None; 1735 break; 1736 case 64: 1737 if (Imm > 63) 1738 return None; 1739 break; 1740 } 1741 return Imm; 1742 } 1743 1744 bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I, 1745 MachineRegisterInfo &MRI) { 1746 assert(I.getOpcode() == TargetOpcode::G_SHL); 1747 Register DstReg = I.getOperand(0).getReg(); 1748 const LLT Ty = MRI.getType(DstReg); 1749 Register Src1Reg = I.getOperand(1).getReg(); 1750 Register Src2Reg = I.getOperand(2).getReg(); 1751 1752 if (!Ty.isVector()) 1753 return false; 1754 1755 // Check if we have a vector of constants on RHS that we can select as the 1756 // immediate form. 1757 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); 1758 1759 unsigned Opc = 0; 1760 if (Ty == LLT::fixed_vector(2, 64)) { 1761 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; 1762 } else if (Ty == LLT::fixed_vector(4, 32)) { 1763 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; 1764 } else if (Ty == LLT::fixed_vector(2, 32)) { 1765 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; 1766 } else if (Ty == LLT::fixed_vector(4, 16)) { 1767 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; 1768 } else if (Ty == LLT::fixed_vector(8, 16)) { 1769 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; 1770 } else if (Ty == LLT::fixed_vector(16, 8)) { 1771 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; 1772 } else if (Ty == LLT::fixed_vector(8, 8)) { 1773 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; 1774 } else { 1775 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); 1776 return false; 1777 } 1778 1779 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); 1780 if (ImmVal) 1781 Shl.addImm(*ImmVal); 1782 else 1783 Shl.addUse(Src2Reg); 1784 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); 1785 I.eraseFromParent(); 1786 return true; 1787 } 1788 1789 bool AArch64InstructionSelector::selectVectorAshrLshr( 1790 MachineInstr &I, MachineRegisterInfo &MRI) { 1791 assert(I.getOpcode() == TargetOpcode::G_ASHR || 1792 I.getOpcode() == TargetOpcode::G_LSHR); 1793 Register DstReg = I.getOperand(0).getReg(); 1794 const LLT Ty = MRI.getType(DstReg); 1795 Register Src1Reg = I.getOperand(1).getReg(); 1796 Register Src2Reg = I.getOperand(2).getReg(); 1797 1798 if (!Ty.isVector()) 1799 return false; 1800 1801 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; 1802 1803 // We expect the immediate case to be lowered in the PostLegalCombiner to 1804 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. 1805 1806 // There is not a shift right register instruction, but the shift left 1807 // register instruction takes a signed value, where negative numbers specify a 1808 // right shift. 1809 1810 unsigned Opc = 0; 1811 unsigned NegOpc = 0; 1812 const TargetRegisterClass *RC = 1813 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI); 1814 if (Ty == LLT::fixed_vector(2, 64)) { 1815 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; 1816 NegOpc = AArch64::NEGv2i64; 1817 } else if (Ty == LLT::fixed_vector(4, 32)) { 1818 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; 1819 NegOpc = AArch64::NEGv4i32; 1820 } else if (Ty == LLT::fixed_vector(2, 32)) { 1821 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; 1822 NegOpc = AArch64::NEGv2i32; 1823 } else if (Ty == LLT::fixed_vector(4, 16)) { 1824 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; 1825 NegOpc = AArch64::NEGv4i16; 1826 } else if (Ty == LLT::fixed_vector(8, 16)) { 1827 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; 1828 NegOpc = AArch64::NEGv8i16; 1829 } else if (Ty == LLT::fixed_vector(16, 8)) { 1830 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; 1831 NegOpc = AArch64::NEGv16i8; 1832 } else if (Ty == LLT::fixed_vector(8, 8)) { 1833 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; 1834 NegOpc = AArch64::NEGv8i8; 1835 } else { 1836 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); 1837 return false; 1838 } 1839 1840 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); 1841 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); 1842 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); 1843 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); 1844 I.eraseFromParent(); 1845 return true; 1846 } 1847 1848 bool AArch64InstructionSelector::selectVaStartAAPCS( 1849 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1850 return false; 1851 } 1852 1853 bool AArch64InstructionSelector::selectVaStartDarwin( 1854 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1855 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 1856 Register ListReg = I.getOperand(0).getReg(); 1857 1858 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1859 1860 auto MIB = 1861 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) 1862 .addDef(ArgsAddrReg) 1863 .addFrameIndex(FuncInfo->getVarArgsStackIndex()) 1864 .addImm(0) 1865 .addImm(0); 1866 1867 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1868 1869 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) 1870 .addUse(ArgsAddrReg) 1871 .addUse(ListReg) 1872 .addImm(0) 1873 .addMemOperand(*I.memoperands_begin()); 1874 1875 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1876 I.eraseFromParent(); 1877 return true; 1878 } 1879 1880 void AArch64InstructionSelector::materializeLargeCMVal( 1881 MachineInstr &I, const Value *V, unsigned OpFlags) { 1882 MachineBasicBlock &MBB = *I.getParent(); 1883 MachineFunction &MF = *MBB.getParent(); 1884 MachineRegisterInfo &MRI = MF.getRegInfo(); 1885 1886 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); 1887 MovZ->addOperand(MF, I.getOperand(1)); 1888 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | 1889 AArch64II::MO_NC); 1890 MovZ->addOperand(MF, MachineOperand::CreateImm(0)); 1891 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); 1892 1893 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, 1894 Register ForceDstReg) { 1895 Register DstReg = ForceDstReg 1896 ? ForceDstReg 1897 : MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1898 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); 1899 if (auto *GV = dyn_cast<GlobalValue>(V)) { 1900 MovI->addOperand(MF, MachineOperand::CreateGA( 1901 GV, MovZ->getOperand(1).getOffset(), Flags)); 1902 } else { 1903 MovI->addOperand( 1904 MF, MachineOperand::CreateBA(cast<BlockAddress>(V), 1905 MovZ->getOperand(1).getOffset(), Flags)); 1906 } 1907 MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); 1908 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); 1909 return DstReg; 1910 }; 1911 Register DstReg = BuildMovK(MovZ.getReg(0), 1912 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); 1913 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); 1914 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); 1915 } 1916 1917 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { 1918 MachineBasicBlock &MBB = *I.getParent(); 1919 MachineFunction &MF = *MBB.getParent(); 1920 MachineRegisterInfo &MRI = MF.getRegInfo(); 1921 1922 switch (I.getOpcode()) { 1923 case TargetOpcode::G_STORE: { 1924 bool Changed = contractCrossBankCopyIntoStore(I, MRI); 1925 MachineOperand &SrcOp = I.getOperand(0); 1926 if (MRI.getType(SrcOp.getReg()).isPointer()) { 1927 // Allow matching with imported patterns for stores of pointers. Unlike 1928 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy 1929 // and constrain. 1930 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp); 1931 Register NewSrc = Copy.getReg(0); 1932 SrcOp.setReg(NewSrc); 1933 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI); 1934 Changed = true; 1935 } 1936 return Changed; 1937 } 1938 case TargetOpcode::G_PTR_ADD: 1939 return convertPtrAddToAdd(I, MRI); 1940 case TargetOpcode::G_LOAD: { 1941 // For scalar loads of pointers, we try to convert the dest type from p0 1942 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD 1943 // conversion, this should be ok because all users should have been 1944 // selected already, so the type doesn't matter for them. 1945 Register DstReg = I.getOperand(0).getReg(); 1946 const LLT DstTy = MRI.getType(DstReg); 1947 if (!DstTy.isPointer()) 1948 return false; 1949 MRI.setType(DstReg, LLT::scalar(64)); 1950 return true; 1951 } 1952 case AArch64::G_DUP: { 1953 // Convert the type from p0 to s64 to help selection. 1954 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1955 if (!DstTy.getElementType().isPointer()) 1956 return false; 1957 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); 1958 MRI.setType(I.getOperand(0).getReg(), 1959 DstTy.changeElementType(LLT::scalar(64))); 1960 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass); 1961 I.getOperand(1).setReg(NewSrc.getReg(0)); 1962 return true; 1963 } 1964 case TargetOpcode::G_UITOFP: 1965 case TargetOpcode::G_SITOFP: { 1966 // If both source and destination regbanks are FPR, then convert the opcode 1967 // to G_SITOF so that the importer can select it to an fpr variant. 1968 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank 1969 // copy. 1970 Register SrcReg = I.getOperand(1).getReg(); 1971 LLT SrcTy = MRI.getType(SrcReg); 1972 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1973 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) 1974 return false; 1975 1976 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { 1977 if (I.getOpcode() == TargetOpcode::G_SITOFP) 1978 I.setDesc(TII.get(AArch64::G_SITOF)); 1979 else 1980 I.setDesc(TII.get(AArch64::G_UITOF)); 1981 return true; 1982 } 1983 return false; 1984 } 1985 default: 1986 return false; 1987 } 1988 } 1989 1990 /// This lowering tries to look for G_PTR_ADD instructions and then converts 1991 /// them to a standard G_ADD with a COPY on the source. 1992 /// 1993 /// The motivation behind this is to expose the add semantics to the imported 1994 /// tablegen patterns. We shouldn't need to check for uses being loads/stores, 1995 /// because the selector works bottom up, uses before defs. By the time we 1996 /// end up trying to select a G_PTR_ADD, we should have already attempted to 1997 /// fold this into addressing modes and were therefore unsuccessful. 1998 bool AArch64InstructionSelector::convertPtrAddToAdd( 1999 MachineInstr &I, MachineRegisterInfo &MRI) { 2000 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 2001 Register DstReg = I.getOperand(0).getReg(); 2002 Register AddOp1Reg = I.getOperand(1).getReg(); 2003 const LLT PtrTy = MRI.getType(DstReg); 2004 if (PtrTy.getAddressSpace() != 0) 2005 return false; 2006 2007 const LLT CastPtrTy = 2008 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64); 2009 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg); 2010 // Set regbanks on the registers. 2011 if (PtrTy.isVector()) 2012 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); 2013 else 2014 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 2015 2016 // Now turn the %dst(p0) = G_PTR_ADD %base, off into: 2017 // %dst(intty) = G_ADD %intbase, off 2018 I.setDesc(TII.get(TargetOpcode::G_ADD)); 2019 MRI.setType(DstReg, CastPtrTy); 2020 I.getOperand(1).setReg(PtrToInt.getReg(0)); 2021 if (!select(*PtrToInt)) { 2022 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"); 2023 return false; 2024 } 2025 2026 // Also take the opportunity here to try to do some optimization. 2027 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. 2028 Register NegatedReg; 2029 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) 2030 return true; 2031 I.getOperand(2).setReg(NegatedReg); 2032 I.setDesc(TII.get(TargetOpcode::G_SUB)); 2033 return true; 2034 } 2035 2036 bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, 2037 MachineRegisterInfo &MRI) { 2038 // We try to match the immediate variant of LSL, which is actually an alias 2039 // for a special case of UBFM. Otherwise, we fall back to the imported 2040 // selector which will match the register variant. 2041 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"); 2042 const auto &MO = I.getOperand(2); 2043 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI); 2044 if (!VRegAndVal) 2045 return false; 2046 2047 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2048 if (DstTy.isVector()) 2049 return false; 2050 bool Is64Bit = DstTy.getSizeInBits() == 64; 2051 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); 2052 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); 2053 2054 if (!Imm1Fn || !Imm2Fn) 2055 return false; 2056 2057 auto NewI = 2058 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, 2059 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); 2060 2061 for (auto &RenderFn : *Imm1Fn) 2062 RenderFn(NewI); 2063 for (auto &RenderFn : *Imm2Fn) 2064 RenderFn(NewI); 2065 2066 I.eraseFromParent(); 2067 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 2068 } 2069 2070 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( 2071 MachineInstr &I, MachineRegisterInfo &MRI) { 2072 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); 2073 // If we're storing a scalar, it doesn't matter what register bank that 2074 // scalar is on. All that matters is the size. 2075 // 2076 // So, if we see something like this (with a 32-bit scalar as an example): 2077 // 2078 // %x:gpr(s32) = ... something ... 2079 // %y:fpr(s32) = COPY %x:gpr(s32) 2080 // G_STORE %y:fpr(s32) 2081 // 2082 // We can fix this up into something like this: 2083 // 2084 // G_STORE %x:gpr(s32) 2085 // 2086 // And then continue the selection process normally. 2087 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); 2088 if (!DefDstReg.isValid()) 2089 return false; 2090 LLT DefDstTy = MRI.getType(DefDstReg); 2091 Register StoreSrcReg = I.getOperand(0).getReg(); 2092 LLT StoreSrcTy = MRI.getType(StoreSrcReg); 2093 2094 // If we get something strange like a physical register, then we shouldn't 2095 // go any further. 2096 if (!DefDstTy.isValid()) 2097 return false; 2098 2099 // Are the source and dst types the same size? 2100 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) 2101 return false; 2102 2103 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == 2104 RBI.getRegBank(DefDstReg, MRI, TRI)) 2105 return false; 2106 2107 // We have a cross-bank copy, which is entering a store. Let's fold it. 2108 I.getOperand(0).setReg(DefDstReg); 2109 return true; 2110 } 2111 2112 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { 2113 assert(I.getParent() && "Instruction should be in a basic block!"); 2114 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 2115 2116 MachineBasicBlock &MBB = *I.getParent(); 2117 MachineFunction &MF = *MBB.getParent(); 2118 MachineRegisterInfo &MRI = MF.getRegInfo(); 2119 2120 switch (I.getOpcode()) { 2121 case AArch64::G_DUP: { 2122 // Before selecting a DUP instruction, check if it is better selected as a 2123 // MOV or load from a constant pool. 2124 Register Src = I.getOperand(1).getReg(); 2125 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI); 2126 if (!ValAndVReg) 2127 return false; 2128 LLVMContext &Ctx = MF.getFunction().getContext(); 2129 Register Dst = I.getOperand(0).getReg(); 2130 auto *CV = ConstantDataVector::getSplat( 2131 MRI.getType(Dst).getNumElements(), 2132 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()), 2133 ValAndVReg->Value)); 2134 if (!emitConstantVector(Dst, CV, MIB, MRI)) 2135 return false; 2136 I.eraseFromParent(); 2137 return true; 2138 } 2139 case TargetOpcode::G_SEXT: 2140 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV 2141 // over a normal extend. 2142 if (selectUSMovFromExtend(I, MRI)) 2143 return true; 2144 return false; 2145 case TargetOpcode::G_BR: 2146 return false; 2147 case TargetOpcode::G_SHL: 2148 return earlySelectSHL(I, MRI); 2149 case TargetOpcode::G_CONSTANT: { 2150 bool IsZero = false; 2151 if (I.getOperand(1).isCImm()) 2152 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; 2153 else if (I.getOperand(1).isImm()) 2154 IsZero = I.getOperand(1).getImm() == 0; 2155 2156 if (!IsZero) 2157 return false; 2158 2159 Register DefReg = I.getOperand(0).getReg(); 2160 LLT Ty = MRI.getType(DefReg); 2161 if (Ty.getSizeInBits() == 64) { 2162 I.getOperand(1).ChangeToRegister(AArch64::XZR, false); 2163 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); 2164 } else if (Ty.getSizeInBits() == 32) { 2165 I.getOperand(1).ChangeToRegister(AArch64::WZR, false); 2166 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); 2167 } else 2168 return false; 2169 2170 I.setDesc(TII.get(TargetOpcode::COPY)); 2171 return true; 2172 } 2173 2174 case TargetOpcode::G_ADD: { 2175 // Check if this is being fed by a G_ICMP on either side. 2176 // 2177 // (cmp pred, x, y) + z 2178 // 2179 // In the above case, when the cmp is true, we increment z by 1. So, we can 2180 // fold the add into the cset for the cmp by using cinc. 2181 // 2182 // FIXME: This would probably be a lot nicer in PostLegalizerLowering. 2183 Register AddDst = I.getOperand(0).getReg(); 2184 Register AddLHS = I.getOperand(1).getReg(); 2185 Register AddRHS = I.getOperand(2).getReg(); 2186 // Only handle scalars. 2187 LLT Ty = MRI.getType(AddLHS); 2188 if (Ty.isVector()) 2189 return false; 2190 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64 2191 // bits. 2192 unsigned Size = Ty.getSizeInBits(); 2193 if (Size != 32 && Size != 64) 2194 return false; 2195 auto MatchCmp = [&](Register Reg) -> MachineInstr * { 2196 if (!MRI.hasOneNonDBGUse(Reg)) 2197 return nullptr; 2198 // If the LHS of the add is 32 bits, then we want to fold a 32-bit 2199 // compare. 2200 if (Size == 32) 2201 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI); 2202 // We model scalar compares using 32-bit destinations right now. 2203 // If it's a 64-bit compare, it'll have 64-bit sources. 2204 Register ZExt; 2205 if (!mi_match(Reg, MRI, 2206 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt)))))) 2207 return nullptr; 2208 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI); 2209 if (!Cmp || 2210 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64) 2211 return nullptr; 2212 return Cmp; 2213 }; 2214 // Try to match 2215 // z + (cmp pred, x, y) 2216 MachineInstr *Cmp = MatchCmp(AddRHS); 2217 if (!Cmp) { 2218 // (cmp pred, x, y) + z 2219 std::swap(AddLHS, AddRHS); 2220 Cmp = MatchCmp(AddRHS); 2221 if (!Cmp) 2222 return false; 2223 } 2224 auto &PredOp = Cmp->getOperand(1); 2225 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate()); 2226 const AArch64CC::CondCode InvCC = 2227 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred)); 2228 MIB.setInstrAndDebugLoc(I); 2229 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2), 2230 /*RHS=*/Cmp->getOperand(3), PredOp, MIB); 2231 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB); 2232 I.eraseFromParent(); 2233 return true; 2234 } 2235 case TargetOpcode::G_OR: { 2236 // Look for operations that take the lower `Width=Size-ShiftImm` bits of 2237 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via 2238 // shifting and masking that we can replace with a BFI (encoded as a BFM). 2239 Register Dst = I.getOperand(0).getReg(); 2240 LLT Ty = MRI.getType(Dst); 2241 2242 if (!Ty.isScalar()) 2243 return false; 2244 2245 unsigned Size = Ty.getSizeInBits(); 2246 if (Size != 32 && Size != 64) 2247 return false; 2248 2249 Register ShiftSrc; 2250 int64_t ShiftImm; 2251 Register MaskSrc; 2252 int64_t MaskImm; 2253 if (!mi_match( 2254 Dst, MRI, 2255 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))), 2256 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm)))))) 2257 return false; 2258 2259 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) 2260 return false; 2261 2262 int64_t Immr = Size - ShiftImm; 2263 int64_t Imms = Size - ShiftImm - 1; 2264 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; 2265 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB); 2266 I.eraseFromParent(); 2267 return true; 2268 } 2269 default: 2270 return false; 2271 } 2272 } 2273 2274 bool AArch64InstructionSelector::select(MachineInstr &I) { 2275 assert(I.getParent() && "Instruction should be in a basic block!"); 2276 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 2277 2278 MachineBasicBlock &MBB = *I.getParent(); 2279 MachineFunction &MF = *MBB.getParent(); 2280 MachineRegisterInfo &MRI = MF.getRegInfo(); 2281 2282 const AArch64Subtarget *Subtarget = 2283 &static_cast<const AArch64Subtarget &>(MF.getSubtarget()); 2284 if (Subtarget->requiresStrictAlign()) { 2285 // We don't support this feature yet. 2286 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n"); 2287 return false; 2288 } 2289 2290 MIB.setInstrAndDebugLoc(I); 2291 2292 unsigned Opcode = I.getOpcode(); 2293 // G_PHI requires same handling as PHI 2294 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { 2295 // Certain non-generic instructions also need some special handling. 2296 2297 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 2298 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2299 2300 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { 2301 const Register DefReg = I.getOperand(0).getReg(); 2302 const LLT DefTy = MRI.getType(DefReg); 2303 2304 const RegClassOrRegBank &RegClassOrBank = 2305 MRI.getRegClassOrRegBank(DefReg); 2306 2307 const TargetRegisterClass *DefRC 2308 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 2309 if (!DefRC) { 2310 if (!DefTy.isValid()) { 2311 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 2312 return false; 2313 } 2314 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 2315 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); 2316 if (!DefRC) { 2317 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 2318 return false; 2319 } 2320 } 2321 2322 I.setDesc(TII.get(TargetOpcode::PHI)); 2323 2324 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 2325 } 2326 2327 if (I.isCopy()) 2328 return selectCopy(I, TII, MRI, TRI, RBI); 2329 2330 return true; 2331 } 2332 2333 2334 if (I.getNumOperands() != I.getNumExplicitOperands()) { 2335 LLVM_DEBUG( 2336 dbgs() << "Generic instruction has unexpected implicit operands\n"); 2337 return false; 2338 } 2339 2340 // Try to do some lowering before we start instruction selecting. These 2341 // lowerings are purely transformations on the input G_MIR and so selection 2342 // must continue after any modification of the instruction. 2343 if (preISelLower(I)) { 2344 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. 2345 } 2346 2347 // There may be patterns where the importer can't deal with them optimally, 2348 // but does select it to a suboptimal sequence so our custom C++ selection 2349 // code later never has a chance to work on it. Therefore, we have an early 2350 // selection attempt here to give priority to certain selection routines 2351 // over the imported ones. 2352 if (earlySelect(I)) 2353 return true; 2354 2355 if (selectImpl(I, *CoverageInfo)) 2356 return true; 2357 2358 LLT Ty = 2359 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; 2360 2361 switch (Opcode) { 2362 case TargetOpcode::G_SBFX: 2363 case TargetOpcode::G_UBFX: { 2364 static const unsigned OpcTable[2][2] = { 2365 {AArch64::UBFMWri, AArch64::UBFMXri}, 2366 {AArch64::SBFMWri, AArch64::SBFMXri}}; 2367 bool IsSigned = Opcode == TargetOpcode::G_SBFX; 2368 unsigned Size = Ty.getSizeInBits(); 2369 unsigned Opc = OpcTable[IsSigned][Size == 64]; 2370 auto Cst1 = 2371 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI); 2372 assert(Cst1 && "Should have gotten a constant for src 1?"); 2373 auto Cst2 = 2374 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI); 2375 assert(Cst2 && "Should have gotten a constant for src 2?"); 2376 auto LSB = Cst1->Value.getZExtValue(); 2377 auto Width = Cst2->Value.getZExtValue(); 2378 auto BitfieldInst = 2379 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)}) 2380 .addImm(LSB) 2381 .addImm(LSB + Width - 1); 2382 I.eraseFromParent(); 2383 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI); 2384 } 2385 case TargetOpcode::G_BRCOND: 2386 return selectCompareBranch(I, MF, MRI); 2387 2388 case TargetOpcode::G_BRINDIRECT: { 2389 I.setDesc(TII.get(AArch64::BR)); 2390 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2391 } 2392 2393 case TargetOpcode::G_BRJT: 2394 return selectBrJT(I, MRI); 2395 2396 case AArch64::G_ADD_LOW: { 2397 // This op may have been separated from it's ADRP companion by the localizer 2398 // or some other code motion pass. Given that many CPUs will try to 2399 // macro fuse these operations anyway, select this into a MOVaddr pseudo 2400 // which will later be expanded into an ADRP+ADD pair after scheduling. 2401 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg()); 2402 if (BaseMI->getOpcode() != AArch64::ADRP) { 2403 I.setDesc(TII.get(AArch64::ADDXri)); 2404 I.addOperand(MachineOperand::CreateImm(0)); 2405 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2406 } 2407 assert(TM.getCodeModel() == CodeModel::Small && 2408 "Expected small code model"); 2409 auto Op1 = BaseMI->getOperand(1); 2410 auto Op2 = I.getOperand(2); 2411 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) 2412 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), 2413 Op1.getTargetFlags()) 2414 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), 2415 Op2.getTargetFlags()); 2416 I.eraseFromParent(); 2417 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); 2418 } 2419 2420 case TargetOpcode::G_BSWAP: { 2421 // Handle vector types for G_BSWAP directly. 2422 Register DstReg = I.getOperand(0).getReg(); 2423 LLT DstTy = MRI.getType(DstReg); 2424 2425 // We should only get vector types here; everything else is handled by the 2426 // importer right now. 2427 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { 2428 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n"); 2429 return false; 2430 } 2431 2432 // Only handle 4 and 2 element vectors for now. 2433 // TODO: 16-bit elements. 2434 unsigned NumElts = DstTy.getNumElements(); 2435 if (NumElts != 4 && NumElts != 2) { 2436 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n"); 2437 return false; 2438 } 2439 2440 // Choose the correct opcode for the supported types. Right now, that's 2441 // v2s32, v4s32, and v2s64. 2442 unsigned Opc = 0; 2443 unsigned EltSize = DstTy.getElementType().getSizeInBits(); 2444 if (EltSize == 32) 2445 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 2446 : AArch64::REV32v16i8; 2447 else if (EltSize == 64) 2448 Opc = AArch64::REV64v16i8; 2449 2450 // We should always get something by the time we get here... 2451 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?"); 2452 2453 I.setDesc(TII.get(Opc)); 2454 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2455 } 2456 2457 case TargetOpcode::G_FCONSTANT: 2458 case TargetOpcode::G_CONSTANT: { 2459 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; 2460 2461 const LLT s8 = LLT::scalar(8); 2462 const LLT s16 = LLT::scalar(16); 2463 const LLT s32 = LLT::scalar(32); 2464 const LLT s64 = LLT::scalar(64); 2465 const LLT s128 = LLT::scalar(128); 2466 const LLT p0 = LLT::pointer(0, 64); 2467 2468 const Register DefReg = I.getOperand(0).getReg(); 2469 const LLT DefTy = MRI.getType(DefReg); 2470 const unsigned DefSize = DefTy.getSizeInBits(); 2471 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2472 2473 // FIXME: Redundant check, but even less readable when factored out. 2474 if (isFP) { 2475 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) { 2476 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2477 << " constant, expected: " << s16 << " or " << s32 2478 << " or " << s64 << " or " << s128 << '\n'); 2479 return false; 2480 } 2481 2482 if (RB.getID() != AArch64::FPRRegBankID) { 2483 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2484 << " constant on bank: " << RB 2485 << ", expected: FPR\n"); 2486 return false; 2487 } 2488 2489 // The case when we have 0.0 is covered by tablegen. Reject it here so we 2490 // can be sure tablegen works correctly and isn't rescued by this code. 2491 // 0.0 is not covered by tablegen for FP128. So we will handle this 2492 // scenario in the code here. 2493 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) 2494 return false; 2495 } else { 2496 // s32 and s64 are covered by tablegen. 2497 if (Ty != p0 && Ty != s8 && Ty != s16) { 2498 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2499 << " constant, expected: " << s32 << ", " << s64 2500 << ", or " << p0 << '\n'); 2501 return false; 2502 } 2503 2504 if (RB.getID() != AArch64::GPRRegBankID) { 2505 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2506 << " constant on bank: " << RB 2507 << ", expected: GPR\n"); 2508 return false; 2509 } 2510 } 2511 2512 if (isFP) { 2513 const TargetRegisterClass &FPRRC = *getMinClassForRegBank(RB, DefSize); 2514 // For 16, 64, and 128b values, emit a constant pool load. 2515 switch (DefSize) { 2516 default: 2517 llvm_unreachable("Unexpected destination size for G_FCONSTANT?"); 2518 case 32: 2519 // For s32, use a cp load if we have optsize/minsize. 2520 if (!shouldOptForSize(&MF)) 2521 break; 2522 LLVM_FALLTHROUGH; 2523 case 16: 2524 case 64: 2525 case 128: { 2526 auto *FPImm = I.getOperand(1).getFPImm(); 2527 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB); 2528 if (!LoadMI) { 2529 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n"); 2530 return false; 2531 } 2532 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()}); 2533 I.eraseFromParent(); 2534 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI); 2535 } 2536 } 2537 2538 // Either emit a FMOV, or emit a copy to emit a normal mov. 2539 assert(DefSize == 32 && 2540 "Expected constant pool loads for all sizes other than 32!"); 2541 const Register DefGPRReg = 2542 MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2543 MachineOperand &RegOp = I.getOperand(0); 2544 RegOp.setReg(DefGPRReg); 2545 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2546 MIB.buildCopy({DefReg}, {DefGPRReg}); 2547 2548 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { 2549 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n"); 2550 return false; 2551 } 2552 2553 MachineOperand &ImmOp = I.getOperand(1); 2554 // FIXME: Is going through int64_t always correct? 2555 ImmOp.ChangeToImmediate( 2556 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); 2557 } else if (I.getOperand(1).isCImm()) { 2558 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); 2559 I.getOperand(1).ChangeToImmediate(Val); 2560 } else if (I.getOperand(1).isImm()) { 2561 uint64_t Val = I.getOperand(1).getImm(); 2562 I.getOperand(1).ChangeToImmediate(Val); 2563 } 2564 2565 const unsigned MovOpc = 2566 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; 2567 I.setDesc(TII.get(MovOpc)); 2568 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2569 return true; 2570 } 2571 case TargetOpcode::G_EXTRACT: { 2572 Register DstReg = I.getOperand(0).getReg(); 2573 Register SrcReg = I.getOperand(1).getReg(); 2574 LLT SrcTy = MRI.getType(SrcReg); 2575 LLT DstTy = MRI.getType(DstReg); 2576 (void)DstTy; 2577 unsigned SrcSize = SrcTy.getSizeInBits(); 2578 2579 if (SrcTy.getSizeInBits() > 64) { 2580 // This should be an extract of an s128, which is like a vector extract. 2581 if (SrcTy.getSizeInBits() != 128) 2582 return false; 2583 // Only support extracting 64 bits from an s128 at the moment. 2584 if (DstTy.getSizeInBits() != 64) 2585 return false; 2586 2587 unsigned Offset = I.getOperand(2).getImm(); 2588 if (Offset % 64 != 0) 2589 return false; 2590 2591 // Check we have the right regbank always. 2592 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2593 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2594 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!"); 2595 2596 if (SrcRB.getID() == AArch64::GPRRegBankID) { 2597 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 2598 .addUse(SrcReg, 0, Offset == 0 ? AArch64::sube64 : AArch64::subo64); 2599 I.eraseFromParent(); 2600 return true; 2601 } 2602 2603 // Emit the same code as a vector extract. 2604 // Offset must be a multiple of 64. 2605 unsigned LaneIdx = Offset / 64; 2606 MachineInstr *Extract = emitExtractVectorElt( 2607 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); 2608 if (!Extract) 2609 return false; 2610 I.eraseFromParent(); 2611 return true; 2612 } 2613 2614 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); 2615 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + 2616 Ty.getSizeInBits() - 1); 2617 2618 if (SrcSize < 64) { 2619 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && 2620 "unexpected G_EXTRACT types"); 2621 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2622 } 2623 2624 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2625 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2626 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 2627 .addReg(DstReg, 0, AArch64::sub_32); 2628 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 2629 AArch64::GPR32RegClass, MRI); 2630 I.getOperand(0).setReg(DstReg); 2631 2632 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2633 } 2634 2635 case TargetOpcode::G_INSERT: { 2636 LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); 2637 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2638 unsigned DstSize = DstTy.getSizeInBits(); 2639 // Larger inserts are vectors, same-size ones should be something else by 2640 // now (split up or turned into COPYs). 2641 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) 2642 return false; 2643 2644 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); 2645 unsigned LSB = I.getOperand(3).getImm(); 2646 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 2647 I.getOperand(3).setImm((DstSize - LSB) % DstSize); 2648 MachineInstrBuilder(MF, I).addImm(Width - 1); 2649 2650 if (DstSize < 64) { 2651 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && 2652 "unexpected G_INSERT types"); 2653 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2654 } 2655 2656 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2657 BuildMI(MBB, I.getIterator(), I.getDebugLoc(), 2658 TII.get(AArch64::SUBREG_TO_REG)) 2659 .addDef(SrcReg) 2660 .addImm(0) 2661 .addUse(I.getOperand(2).getReg()) 2662 .addImm(AArch64::sub_32); 2663 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 2664 AArch64::GPR32RegClass, MRI); 2665 I.getOperand(2).setReg(SrcReg); 2666 2667 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2668 } 2669 case TargetOpcode::G_FRAME_INDEX: { 2670 // allocas and G_FRAME_INDEX are only supported in addrspace(0). 2671 if (Ty != LLT::pointer(0, 64)) { 2672 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty 2673 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2674 return false; 2675 } 2676 I.setDesc(TII.get(AArch64::ADDXri)); 2677 2678 // MOs for a #0 shifted immediate. 2679 I.addOperand(MachineOperand::CreateImm(0)); 2680 I.addOperand(MachineOperand::CreateImm(0)); 2681 2682 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2683 } 2684 2685 case TargetOpcode::G_GLOBAL_VALUE: { 2686 auto GV = I.getOperand(1).getGlobal(); 2687 if (GV->isThreadLocal()) 2688 return selectTLSGlobalValue(I, MRI); 2689 2690 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); 2691 if (OpFlags & AArch64II::MO_GOT) { 2692 I.setDesc(TII.get(AArch64::LOADgot)); 2693 I.getOperand(1).setTargetFlags(OpFlags); 2694 } else if (TM.getCodeModel() == CodeModel::Large) { 2695 // Materialize the global using movz/movk instructions. 2696 materializeLargeCMVal(I, GV, OpFlags); 2697 I.eraseFromParent(); 2698 return true; 2699 } else if (TM.getCodeModel() == CodeModel::Tiny) { 2700 I.setDesc(TII.get(AArch64::ADR)); 2701 I.getOperand(1).setTargetFlags(OpFlags); 2702 } else { 2703 I.setDesc(TII.get(AArch64::MOVaddr)); 2704 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); 2705 MachineInstrBuilder MIB(MF, I); 2706 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), 2707 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 2708 } 2709 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2710 } 2711 2712 case TargetOpcode::G_ZEXTLOAD: 2713 case TargetOpcode::G_LOAD: 2714 case TargetOpcode::G_STORE: { 2715 GLoadStore &LdSt = cast<GLoadStore>(I); 2716 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; 2717 LLT PtrTy = MRI.getType(LdSt.getPointerReg()); 2718 2719 if (PtrTy != LLT::pointer(0, 64)) { 2720 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy 2721 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2722 return false; 2723 } 2724 2725 uint64_t MemSizeInBytes = LdSt.getMemSize(); 2726 unsigned MemSizeInBits = LdSt.getMemSizeInBits(); 2727 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); 2728 2729 // Need special instructions for atomics that affect ordering. 2730 if (Order != AtomicOrdering::NotAtomic && 2731 Order != AtomicOrdering::Unordered && 2732 Order != AtomicOrdering::Monotonic) { 2733 assert(!isa<GZExtLoad>(LdSt)); 2734 if (MemSizeInBytes > 64) 2735 return false; 2736 2737 if (isa<GLoad>(LdSt)) { 2738 static unsigned Opcodes[] = {AArch64::LDARB, AArch64::LDARH, 2739 AArch64::LDARW, AArch64::LDARX}; 2740 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); 2741 } else { 2742 static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, 2743 AArch64::STLRW, AArch64::STLRX}; 2744 Register ValReg = LdSt.getReg(0); 2745 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) { 2746 // Emit a subreg copy of 32 bits. 2747 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2748 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {}) 2749 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32); 2750 I.getOperand(0).setReg(NewVal); 2751 } 2752 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); 2753 } 2754 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2755 return true; 2756 } 2757 2758 #ifndef NDEBUG 2759 const Register PtrReg = LdSt.getPointerReg(); 2760 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); 2761 // Check that the pointer register is valid. 2762 assert(PtrRB.getID() == AArch64::GPRRegBankID && 2763 "Load/Store pointer operand isn't a GPR"); 2764 assert(MRI.getType(PtrReg).isPointer() && 2765 "Load/Store pointer operand isn't a pointer"); 2766 #endif 2767 2768 const Register ValReg = LdSt.getReg(0); 2769 const LLT ValTy = MRI.getType(ValReg); 2770 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); 2771 2772 // The code below doesn't support truncating stores, so we need to split it 2773 // again. 2774 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { 2775 unsigned SubReg; 2776 LLT MemTy = LdSt.getMMO().getMemoryType(); 2777 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI); 2778 if (!getSubRegForClass(RC, TRI, SubReg)) 2779 return false; 2780 2781 // Generate a subreg copy. 2782 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {}) 2783 .addReg(ValReg, 0, SubReg) 2784 .getReg(0); 2785 RBI.constrainGenericRegister(Copy, *RC, MRI); 2786 LdSt.getOperand(0).setReg(Copy); 2787 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { 2788 // If this is an any-extending load from the FPR bank, split it into a regular 2789 // load + extend. 2790 if (RB.getID() == AArch64::FPRRegBankID) { 2791 unsigned SubReg; 2792 LLT MemTy = LdSt.getMMO().getMemoryType(); 2793 auto *RC = getRegClassForTypeOnBank(MemTy, RB, RBI); 2794 if (!getSubRegForClass(RC, TRI, SubReg)) 2795 return false; 2796 Register OldDst = LdSt.getReg(0); 2797 Register NewDst = 2798 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType()); 2799 LdSt.getOperand(0).setReg(NewDst); 2800 MRI.setRegBank(NewDst, RB); 2801 // Generate a SUBREG_TO_REG to extend it. 2802 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator())); 2803 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {}) 2804 .addImm(0) 2805 .addUse(NewDst) 2806 .addImm(SubReg); 2807 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB, RBI); 2808 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI); 2809 MIB.setInstr(LdSt); 2810 } 2811 } 2812 2813 // Helper lambda for partially selecting I. Either returns the original 2814 // instruction with an updated opcode, or a new instruction. 2815 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { 2816 bool IsStore = isa<GStore>(I); 2817 const unsigned NewOpc = 2818 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); 2819 if (NewOpc == I.getOpcode()) 2820 return nullptr; 2821 // Check if we can fold anything into the addressing mode. 2822 auto AddrModeFns = 2823 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); 2824 if (!AddrModeFns) { 2825 // Can't fold anything. Use the original instruction. 2826 I.setDesc(TII.get(NewOpc)); 2827 I.addOperand(MachineOperand::CreateImm(0)); 2828 return &I; 2829 } 2830 2831 // Folded something. Create a new instruction and return it. 2832 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); 2833 Register CurValReg = I.getOperand(0).getReg(); 2834 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg); 2835 NewInst.cloneMemRefs(I); 2836 for (auto &Fn : *AddrModeFns) 2837 Fn(NewInst); 2838 I.eraseFromParent(); 2839 return &*NewInst; 2840 }; 2841 2842 MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); 2843 if (!LoadStore) 2844 return false; 2845 2846 // If we're storing a 0, use WZR/XZR. 2847 if (Opcode == TargetOpcode::G_STORE) { 2848 auto CVal = getIConstantVRegValWithLookThrough( 2849 LoadStore->getOperand(0).getReg(), MRI); 2850 if (CVal && CVal->Value == 0) { 2851 switch (LoadStore->getOpcode()) { 2852 case AArch64::STRWui: 2853 case AArch64::STRHHui: 2854 case AArch64::STRBBui: 2855 LoadStore->getOperand(0).setReg(AArch64::WZR); 2856 break; 2857 case AArch64::STRXui: 2858 LoadStore->getOperand(0).setReg(AArch64::XZR); 2859 break; 2860 } 2861 } 2862 } 2863 2864 if (IsZExtLoad) { 2865 // The zextload from a smaller type to i32 should be handled by the 2866 // importer. 2867 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) 2868 return false; 2869 // If we have a ZEXTLOAD then change the load's type to be a narrower reg 2870 // and zero_extend with SUBREG_TO_REG. 2871 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2872 Register DstReg = LoadStore->getOperand(0).getReg(); 2873 LoadStore->getOperand(0).setReg(LdReg); 2874 2875 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); 2876 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) 2877 .addImm(0) 2878 .addUse(LdReg) 2879 .addImm(AArch64::sub_32); 2880 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 2881 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, 2882 MRI); 2883 } 2884 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 2885 } 2886 2887 case TargetOpcode::G_SMULH: 2888 case TargetOpcode::G_UMULH: { 2889 // Reject the various things we don't support yet. 2890 if (unsupportedBinOp(I, RBI, MRI, TRI)) 2891 return false; 2892 2893 const Register DefReg = I.getOperand(0).getReg(); 2894 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2895 2896 if (RB.getID() != AArch64::GPRRegBankID) { 2897 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"); 2898 return false; 2899 } 2900 2901 if (Ty != LLT::scalar(64)) { 2902 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty 2903 << ", expected: " << LLT::scalar(64) << '\n'); 2904 return false; 2905 } 2906 2907 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr 2908 : AArch64::UMULHrr; 2909 I.setDesc(TII.get(NewOpc)); 2910 2911 // Now that we selected an opcode, we need to constrain the register 2912 // operands to use appropriate classes. 2913 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2914 } 2915 case TargetOpcode::G_LSHR: 2916 case TargetOpcode::G_ASHR: 2917 if (MRI.getType(I.getOperand(0).getReg()).isVector()) 2918 return selectVectorAshrLshr(I, MRI); 2919 LLVM_FALLTHROUGH; 2920 case TargetOpcode::G_SHL: 2921 if (Opcode == TargetOpcode::G_SHL && 2922 MRI.getType(I.getOperand(0).getReg()).isVector()) 2923 return selectVectorSHL(I, MRI); 2924 2925 // These shifts were legalized to have 64 bit shift amounts because we 2926 // want to take advantage of the selection patterns that assume the 2927 // immediates are s64s, however, selectBinaryOp will assume both operands 2928 // will have the same bit size. 2929 { 2930 Register SrcReg = I.getOperand(1).getReg(); 2931 Register ShiftReg = I.getOperand(2).getReg(); 2932 const LLT ShiftTy = MRI.getType(ShiftReg); 2933 const LLT SrcTy = MRI.getType(SrcReg); 2934 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && 2935 ShiftTy.getSizeInBits() == 64) { 2936 assert(!ShiftTy.isVector() && "unexpected vector shift ty"); 2937 assert(MRI.getVRegDef(ShiftReg) && 2938 "could not find a vreg definition for shift amount"); 2939 // Insert a subregister copy to implement a 64->32 trunc 2940 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) 2941 .addReg(ShiftReg, 0, AArch64::sub_32); 2942 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 2943 I.getOperand(2).setReg(Trunc.getReg(0)); 2944 } 2945 } 2946 LLVM_FALLTHROUGH; 2947 case TargetOpcode::G_FADD: 2948 case TargetOpcode::G_FSUB: 2949 case TargetOpcode::G_FMUL: 2950 case TargetOpcode::G_FDIV: 2951 case TargetOpcode::G_OR: { 2952 // Reject the various things we don't support yet. 2953 if (unsupportedBinOp(I, RBI, MRI, TRI)) 2954 return false; 2955 2956 const unsigned OpSize = Ty.getSizeInBits(); 2957 2958 const Register DefReg = I.getOperand(0).getReg(); 2959 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2960 2961 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); 2962 if (NewOpc == I.getOpcode()) 2963 return false; 2964 2965 I.setDesc(TII.get(NewOpc)); 2966 // FIXME: Should the type be always reset in setDesc? 2967 2968 // Now that we selected an opcode, we need to constrain the register 2969 // operands to use appropriate classes. 2970 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2971 } 2972 2973 case TargetOpcode::G_PTR_ADD: { 2974 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB); 2975 I.eraseFromParent(); 2976 return true; 2977 } 2978 case TargetOpcode::G_SADDO: 2979 case TargetOpcode::G_UADDO: 2980 case TargetOpcode::G_SSUBO: 2981 case TargetOpcode::G_USUBO: { 2982 // Emit the operation and get the correct condition code. 2983 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), 2984 I.getOperand(2), I.getOperand(3), MIB); 2985 2986 // Now, put the overflow result in the register given by the first operand 2987 // to the overflow op. CSINC increments the result when the predicate is 2988 // false, so to get the increment when it's true, we need to use the 2989 // inverse. In this case, we want to increment when carry is set. 2990 Register ZReg = AArch64::WZR; 2991 emitCSINC(/*Dst=*/I.getOperand(1).getReg(), /*Src1=*/ZReg, /*Src2=*/ZReg, 2992 getInvertedCondCode(OpAndCC.second), MIB); 2993 I.eraseFromParent(); 2994 return true; 2995 } 2996 2997 case TargetOpcode::G_PTRMASK: { 2998 Register MaskReg = I.getOperand(2).getReg(); 2999 Optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI); 3000 // TODO: Implement arbitrary cases 3001 if (!MaskVal || !isShiftedMask_64(*MaskVal)) 3002 return false; 3003 3004 uint64_t Mask = *MaskVal; 3005 I.setDesc(TII.get(AArch64::ANDXri)); 3006 I.getOperand(2).ChangeToImmediate( 3007 AArch64_AM::encodeLogicalImmediate(Mask, 64)); 3008 3009 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3010 } 3011 case TargetOpcode::G_PTRTOINT: 3012 case TargetOpcode::G_TRUNC: { 3013 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3014 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 3015 3016 const Register DstReg = I.getOperand(0).getReg(); 3017 const Register SrcReg = I.getOperand(1).getReg(); 3018 3019 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 3020 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 3021 3022 if (DstRB.getID() != SrcRB.getID()) { 3023 LLVM_DEBUG( 3024 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); 3025 return false; 3026 } 3027 3028 if (DstRB.getID() == AArch64::GPRRegBankID) { 3029 const TargetRegisterClass *DstRC = 3030 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 3031 if (!DstRC) 3032 return false; 3033 3034 const TargetRegisterClass *SrcRC = 3035 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); 3036 if (!SrcRC) 3037 return false; 3038 3039 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 3040 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 3041 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); 3042 return false; 3043 } 3044 3045 if (DstRC == SrcRC) { 3046 // Nothing to be done 3047 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && 3048 SrcTy == LLT::scalar(64)) { 3049 llvm_unreachable("TableGen can import this case"); 3050 return false; 3051 } else if (DstRC == &AArch64::GPR32RegClass && 3052 SrcRC == &AArch64::GPR64RegClass) { 3053 I.getOperand(1).setSubReg(AArch64::sub_32); 3054 } else { 3055 LLVM_DEBUG( 3056 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); 3057 return false; 3058 } 3059 3060 I.setDesc(TII.get(TargetOpcode::COPY)); 3061 return true; 3062 } else if (DstRB.getID() == AArch64::FPRRegBankID) { 3063 if (DstTy == LLT::fixed_vector(4, 16) && 3064 SrcTy == LLT::fixed_vector(4, 32)) { 3065 I.setDesc(TII.get(AArch64::XTNv4i16)); 3066 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3067 return true; 3068 } 3069 3070 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { 3071 MachineInstr *Extract = emitExtractVectorElt( 3072 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); 3073 if (!Extract) 3074 return false; 3075 I.eraseFromParent(); 3076 return true; 3077 } 3078 3079 // We might have a vector G_PTRTOINT, in which case just emit a COPY. 3080 if (Opcode == TargetOpcode::G_PTRTOINT) { 3081 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"); 3082 I.setDesc(TII.get(TargetOpcode::COPY)); 3083 return selectCopy(I, TII, MRI, TRI, RBI); 3084 } 3085 } 3086 3087 return false; 3088 } 3089 3090 case TargetOpcode::G_ANYEXT: { 3091 if (selectUSMovFromExtend(I, MRI)) 3092 return true; 3093 3094 const Register DstReg = I.getOperand(0).getReg(); 3095 const Register SrcReg = I.getOperand(1).getReg(); 3096 3097 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); 3098 if (RBDst.getID() != AArch64::GPRRegBankID) { 3099 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst 3100 << ", expected: GPR\n"); 3101 return false; 3102 } 3103 3104 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); 3105 if (RBSrc.getID() != AArch64::GPRRegBankID) { 3106 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc 3107 << ", expected: GPR\n"); 3108 return false; 3109 } 3110 3111 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); 3112 3113 if (DstSize == 0) { 3114 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"); 3115 return false; 3116 } 3117 3118 if (DstSize != 64 && DstSize > 32) { 3119 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize 3120 << ", expected: 32 or 64\n"); 3121 return false; 3122 } 3123 // At this point G_ANYEXT is just like a plain COPY, but we need 3124 // to explicitly form the 64-bit value if any. 3125 if (DstSize > 32) { 3126 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); 3127 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) 3128 .addDef(ExtSrc) 3129 .addImm(0) 3130 .addUse(SrcReg) 3131 .addImm(AArch64::sub_32); 3132 I.getOperand(1).setReg(ExtSrc); 3133 } 3134 return selectCopy(I, TII, MRI, TRI, RBI); 3135 } 3136 3137 case TargetOpcode::G_ZEXT: 3138 case TargetOpcode::G_SEXT_INREG: 3139 case TargetOpcode::G_SEXT: { 3140 if (selectUSMovFromExtend(I, MRI)) 3141 return true; 3142 3143 unsigned Opcode = I.getOpcode(); 3144 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; 3145 const Register DefReg = I.getOperand(0).getReg(); 3146 Register SrcReg = I.getOperand(1).getReg(); 3147 const LLT DstTy = MRI.getType(DefReg); 3148 const LLT SrcTy = MRI.getType(SrcReg); 3149 unsigned DstSize = DstTy.getSizeInBits(); 3150 unsigned SrcSize = SrcTy.getSizeInBits(); 3151 3152 // SEXT_INREG has the same src reg size as dst, the size of the value to be 3153 // extended is encoded in the imm. 3154 if (Opcode == TargetOpcode::G_SEXT_INREG) 3155 SrcSize = I.getOperand(2).getImm(); 3156 3157 if (DstTy.isVector()) 3158 return false; // Should be handled by imported patterns. 3159 3160 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == 3161 AArch64::GPRRegBankID && 3162 "Unexpected ext regbank"); 3163 3164 MachineInstr *ExtI; 3165 3166 // First check if we're extending the result of a load which has a dest type 3167 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest 3168 // GPR register on AArch64 and all loads which are smaller automatically 3169 // zero-extend the upper bits. E.g. 3170 // %v(s8) = G_LOAD %p, :: (load 1) 3171 // %v2(s32) = G_ZEXT %v(s8) 3172 if (!IsSigned) { 3173 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 3174 bool IsGPR = 3175 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; 3176 if (LoadMI && IsGPR) { 3177 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); 3178 unsigned BytesLoaded = MemOp->getSize(); 3179 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) 3180 return selectCopy(I, TII, MRI, TRI, RBI); 3181 } 3182 3183 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) 3184 // + SUBREG_TO_REG. 3185 // 3186 // If we are zero extending from 32 bits to 64 bits, it's possible that 3187 // the instruction implicitly does the zero extend for us. In that case, 3188 // we only need the SUBREG_TO_REG. 3189 if (IsGPR && SrcSize == 32 && DstSize == 64) { 3190 // Unlike with the G_LOAD case, we don't want to look through copies 3191 // here. (See isDef32.) 3192 MachineInstr *Def = MRI.getVRegDef(SrcReg); 3193 Register SubregToRegSrc = SrcReg; 3194 3195 // Does the instruction implicitly zero extend? 3196 if (!Def || !isDef32(*Def)) { 3197 // No. Zero out using an OR. 3198 Register OrDst = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 3199 const Register ZReg = AArch64::WZR; 3200 MIB.buildInstr(AArch64::ORRWrs, {OrDst}, {ZReg, SrcReg}).addImm(0); 3201 SubregToRegSrc = OrDst; 3202 } 3203 3204 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) 3205 .addImm(0) 3206 .addUse(SubregToRegSrc) 3207 .addImm(AArch64::sub_32); 3208 3209 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, 3210 MRI)) { 3211 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n"); 3212 return false; 3213 } 3214 3215 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 3216 MRI)) { 3217 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n"); 3218 return false; 3219 } 3220 3221 I.eraseFromParent(); 3222 return true; 3223 } 3224 } 3225 3226 if (DstSize == 64) { 3227 if (Opcode != TargetOpcode::G_SEXT_INREG) { 3228 // FIXME: Can we avoid manually doing this? 3229 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 3230 MRI)) { 3231 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) 3232 << " operand\n"); 3233 return false; 3234 } 3235 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, 3236 {&AArch64::GPR64RegClass}, {}) 3237 .addImm(0) 3238 .addUse(SrcReg) 3239 .addImm(AArch64::sub_32) 3240 .getReg(0); 3241 } 3242 3243 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, 3244 {DefReg}, {SrcReg}) 3245 .addImm(0) 3246 .addImm(SrcSize - 1); 3247 } else if (DstSize <= 32) { 3248 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, 3249 {DefReg}, {SrcReg}) 3250 .addImm(0) 3251 .addImm(SrcSize - 1); 3252 } else { 3253 return false; 3254 } 3255 3256 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 3257 I.eraseFromParent(); 3258 return true; 3259 } 3260 3261 case TargetOpcode::G_SITOFP: 3262 case TargetOpcode::G_UITOFP: 3263 case TargetOpcode::G_FPTOSI: 3264 case TargetOpcode::G_FPTOUI: { 3265 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), 3266 SrcTy = MRI.getType(I.getOperand(1).getReg()); 3267 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); 3268 if (NewOpc == Opcode) 3269 return false; 3270 3271 I.setDesc(TII.get(NewOpc)); 3272 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3273 3274 return true; 3275 } 3276 3277 case TargetOpcode::G_FREEZE: 3278 return selectCopy(I, TII, MRI, TRI, RBI); 3279 3280 case TargetOpcode::G_INTTOPTR: 3281 // The importer is currently unable to import pointer types since they 3282 // didn't exist in SelectionDAG. 3283 return selectCopy(I, TII, MRI, TRI, RBI); 3284 3285 case TargetOpcode::G_BITCAST: 3286 // Imported SelectionDAG rules can handle every bitcast except those that 3287 // bitcast from a type to the same type. Ideally, these shouldn't occur 3288 // but we might not run an optimizer that deletes them. The other exception 3289 // is bitcasts involving pointer types, as SelectionDAG has no knowledge 3290 // of them. 3291 return selectCopy(I, TII, MRI, TRI, RBI); 3292 3293 case TargetOpcode::G_SELECT: { 3294 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { 3295 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty 3296 << ", expected: " << LLT::scalar(1) << '\n'); 3297 return false; 3298 } 3299 3300 const Register CondReg = I.getOperand(1).getReg(); 3301 const Register TReg = I.getOperand(2).getReg(); 3302 const Register FReg = I.getOperand(3).getReg(); 3303 3304 if (tryOptSelect(I)) 3305 return true; 3306 3307 // Make sure to use an unused vreg instead of wzr, so that the peephole 3308 // optimizations will be able to optimize these. 3309 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 3310 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) 3311 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 3312 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 3313 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB)) 3314 return false; 3315 I.eraseFromParent(); 3316 return true; 3317 } 3318 case TargetOpcode::G_ICMP: { 3319 if (Ty.isVector()) 3320 return selectVectorICmp(I, MRI); 3321 3322 if (Ty != LLT::scalar(32)) { 3323 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty 3324 << ", expected: " << LLT::scalar(32) << '\n'); 3325 return false; 3326 } 3327 3328 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 3329 const AArch64CC::CondCode InvCC = 3330 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred)); 3331 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB); 3332 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR, 3333 /*Src2=*/AArch64::WZR, InvCC, MIB); 3334 I.eraseFromParent(); 3335 return true; 3336 } 3337 3338 case TargetOpcode::G_FCMP: { 3339 CmpInst::Predicate Pred = 3340 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 3341 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB, 3342 Pred) || 3343 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB)) 3344 return false; 3345 I.eraseFromParent(); 3346 return true; 3347 } 3348 case TargetOpcode::G_VASTART: 3349 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) 3350 : selectVaStartAAPCS(I, MF, MRI); 3351 case TargetOpcode::G_INTRINSIC: 3352 return selectIntrinsic(I, MRI); 3353 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 3354 return selectIntrinsicWithSideEffects(I, MRI); 3355 case TargetOpcode::G_IMPLICIT_DEF: { 3356 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 3357 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3358 const Register DstReg = I.getOperand(0).getReg(); 3359 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 3360 const TargetRegisterClass *DstRC = 3361 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 3362 RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 3363 return true; 3364 } 3365 case TargetOpcode::G_BLOCK_ADDR: { 3366 if (TM.getCodeModel() == CodeModel::Large) { 3367 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); 3368 I.eraseFromParent(); 3369 return true; 3370 } else { 3371 I.setDesc(TII.get(AArch64::MOVaddrBA)); 3372 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), 3373 I.getOperand(0).getReg()) 3374 .addBlockAddress(I.getOperand(1).getBlockAddress(), 3375 /* Offset */ 0, AArch64II::MO_PAGE) 3376 .addBlockAddress( 3377 I.getOperand(1).getBlockAddress(), /* Offset */ 0, 3378 AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 3379 I.eraseFromParent(); 3380 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 3381 } 3382 } 3383 case AArch64::G_DUP: { 3384 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by 3385 // imported patterns. Do it manually here. Avoiding generating s16 gpr is 3386 // difficult because at RBS we may end up pessimizing the fpr case if we 3387 // decided to add an anyextend to fix this. Manual selection is the most 3388 // robust solution for now. 3389 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 3390 AArch64::GPRRegBankID) 3391 return false; // We expect the fpr regbank case to be imported. 3392 LLT VecTy = MRI.getType(I.getOperand(0).getReg()); 3393 if (VecTy == LLT::fixed_vector(8, 8)) 3394 I.setDesc(TII.get(AArch64::DUPv8i8gpr)); 3395 else if (VecTy == LLT::fixed_vector(16, 8)) 3396 I.setDesc(TII.get(AArch64::DUPv16i8gpr)); 3397 else if (VecTy == LLT::fixed_vector(4, 16)) 3398 I.setDesc(TII.get(AArch64::DUPv4i16gpr)); 3399 else if (VecTy == LLT::fixed_vector(8, 16)) 3400 I.setDesc(TII.get(AArch64::DUPv8i16gpr)); 3401 else 3402 return false; 3403 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3404 } 3405 case TargetOpcode::G_INTRINSIC_TRUNC: 3406 return selectIntrinsicTrunc(I, MRI); 3407 case TargetOpcode::G_INTRINSIC_ROUND: 3408 return selectIntrinsicRound(I, MRI); 3409 case TargetOpcode::G_BUILD_VECTOR: 3410 return selectBuildVector(I, MRI); 3411 case TargetOpcode::G_MERGE_VALUES: 3412 return selectMergeValues(I, MRI); 3413 case TargetOpcode::G_UNMERGE_VALUES: 3414 return selectUnmergeValues(I, MRI); 3415 case TargetOpcode::G_SHUFFLE_VECTOR: 3416 return selectShuffleVector(I, MRI); 3417 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 3418 return selectExtractElt(I, MRI); 3419 case TargetOpcode::G_INSERT_VECTOR_ELT: 3420 return selectInsertElt(I, MRI); 3421 case TargetOpcode::G_CONCAT_VECTORS: 3422 return selectConcatVectors(I, MRI); 3423 case TargetOpcode::G_JUMP_TABLE: 3424 return selectJumpTable(I, MRI); 3425 case TargetOpcode::G_VECREDUCE_FADD: 3426 case TargetOpcode::G_VECREDUCE_ADD: 3427 return selectReduction(I, MRI); 3428 case TargetOpcode::G_MEMCPY: 3429 case TargetOpcode::G_MEMCPY_INLINE: 3430 case TargetOpcode::G_MEMMOVE: 3431 case TargetOpcode::G_MEMSET: 3432 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature"); 3433 return selectMOPS(I, MRI); 3434 } 3435 3436 return false; 3437 } 3438 3439 bool AArch64InstructionSelector::selectReduction(MachineInstr &I, 3440 MachineRegisterInfo &MRI) { 3441 Register VecReg = I.getOperand(1).getReg(); 3442 LLT VecTy = MRI.getType(VecReg); 3443 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { 3444 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit 3445 // a subregister copy afterwards. 3446 if (VecTy == LLT::fixed_vector(2, 32)) { 3447 Register DstReg = I.getOperand(0).getReg(); 3448 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass}, 3449 {VecReg, VecReg}); 3450 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 3451 .addReg(AddP.getReg(0), 0, AArch64::ssub) 3452 .getReg(0); 3453 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI); 3454 I.eraseFromParent(); 3455 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI); 3456 } 3457 3458 unsigned Opc = 0; 3459 if (VecTy == LLT::fixed_vector(16, 8)) 3460 Opc = AArch64::ADDVv16i8v; 3461 else if (VecTy == LLT::fixed_vector(8, 16)) 3462 Opc = AArch64::ADDVv8i16v; 3463 else if (VecTy == LLT::fixed_vector(4, 32)) 3464 Opc = AArch64::ADDVv4i32v; 3465 else if (VecTy == LLT::fixed_vector(2, 64)) 3466 Opc = AArch64::ADDPv2i64p; 3467 else { 3468 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction"); 3469 return false; 3470 } 3471 I.setDesc(TII.get(Opc)); 3472 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3473 } 3474 3475 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) { 3476 unsigned Opc = 0; 3477 if (VecTy == LLT::fixed_vector(2, 32)) 3478 Opc = AArch64::FADDPv2i32p; 3479 else if (VecTy == LLT::fixed_vector(2, 64)) 3480 Opc = AArch64::FADDPv2i64p; 3481 else { 3482 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction"); 3483 return false; 3484 } 3485 I.setDesc(TII.get(Opc)); 3486 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3487 } 3488 return false; 3489 } 3490 3491 bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, 3492 MachineRegisterInfo &MRI) { 3493 unsigned Mopcode; 3494 switch (GI.getOpcode()) { 3495 case TargetOpcode::G_MEMCPY: 3496 case TargetOpcode::G_MEMCPY_INLINE: 3497 Mopcode = AArch64::MOPSMemoryCopyPseudo; 3498 break; 3499 case TargetOpcode::G_MEMMOVE: 3500 Mopcode = AArch64::MOPSMemoryMovePseudo; 3501 break; 3502 case TargetOpcode::G_MEMSET: 3503 // For tagged memset see llvm.aarch64.mops.memset.tag 3504 Mopcode = AArch64::MOPSMemorySetPseudo; 3505 break; 3506 } 3507 3508 auto &DstPtr = GI.getOperand(0); 3509 auto &SrcOrVal = GI.getOperand(1); 3510 auto &Size = GI.getOperand(2); 3511 3512 // Create copies of the registers that can be clobbered. 3513 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg()); 3514 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg()); 3515 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg()); 3516 3517 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo; 3518 const auto &SrcValRegClass = 3519 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; 3520 3521 // Constrain to specific registers 3522 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI); 3523 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI); 3524 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI); 3525 3526 MIB.buildCopy(DstPtrCopy, DstPtr); 3527 MIB.buildCopy(SrcValCopy, SrcOrVal); 3528 MIB.buildCopy(SizeCopy, Size); 3529 3530 // New instruction uses the copied registers because it must update them. 3531 // The defs are not used since they don't exist in G_MEM*. They are still 3532 // tied. 3533 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE 3534 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); 3535 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3536 if (IsSet) { 3537 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize}, 3538 {DstPtrCopy, SizeCopy, SrcValCopy}); 3539 } else { 3540 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass); 3541 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize}, 3542 {DstPtrCopy, SrcValCopy, SizeCopy}); 3543 } 3544 3545 GI.eraseFromParent(); 3546 return true; 3547 } 3548 3549 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, 3550 MachineRegisterInfo &MRI) { 3551 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); 3552 Register JTAddr = I.getOperand(0).getReg(); 3553 unsigned JTI = I.getOperand(1).getIndex(); 3554 Register Index = I.getOperand(2).getReg(); 3555 3556 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3557 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 3558 3559 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); 3560 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, 3561 {TargetReg, ScratchReg}, {JTAddr, Index}) 3562 .addJumpTableIndex(JTI); 3563 // Build the indirect branch. 3564 MIB.buildInstr(AArch64::BR, {}, {TargetReg}); 3565 I.eraseFromParent(); 3566 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); 3567 } 3568 3569 bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I, 3570 MachineRegisterInfo &MRI) { 3571 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"); 3572 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!"); 3573 3574 Register DstReg = I.getOperand(0).getReg(); 3575 unsigned JTI = I.getOperand(1).getIndex(); 3576 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. 3577 auto MovMI = 3578 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) 3579 .addJumpTableIndex(JTI, AArch64II::MO_PAGE) 3580 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 3581 I.eraseFromParent(); 3582 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 3583 } 3584 3585 bool AArch64InstructionSelector::selectTLSGlobalValue( 3586 MachineInstr &I, MachineRegisterInfo &MRI) { 3587 if (!STI.isTargetMachO()) 3588 return false; 3589 MachineFunction &MF = *I.getParent()->getParent(); 3590 MF.getFrameInfo().setAdjustsStack(true); 3591 3592 const auto &GlobalOp = I.getOperand(1); 3593 assert(GlobalOp.getOffset() == 0 && 3594 "Shouldn't have an offset on TLS globals!"); 3595 const GlobalValue &GV = *GlobalOp.getGlobal(); 3596 3597 auto LoadGOT = 3598 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) 3599 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); 3600 3601 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, 3602 {LoadGOT.getReg(0)}) 3603 .addImm(0); 3604 3605 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); 3606 // TLS calls preserve all registers except those that absolutely must be 3607 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be 3608 // silly). 3609 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) 3610 .addUse(AArch64::X0, RegState::Implicit) 3611 .addDef(AArch64::X0, RegState::Implicit) 3612 .addRegMask(TRI.getTLSCallPreservedMask()); 3613 3614 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); 3615 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, 3616 MRI); 3617 I.eraseFromParent(); 3618 return true; 3619 } 3620 3621 bool AArch64InstructionSelector::selectIntrinsicTrunc( 3622 MachineInstr &I, MachineRegisterInfo &MRI) const { 3623 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 3624 3625 // Select the correct opcode. 3626 unsigned Opc = 0; 3627 if (!SrcTy.isVector()) { 3628 switch (SrcTy.getSizeInBits()) { 3629 default: 3630 case 16: 3631 Opc = AArch64::FRINTZHr; 3632 break; 3633 case 32: 3634 Opc = AArch64::FRINTZSr; 3635 break; 3636 case 64: 3637 Opc = AArch64::FRINTZDr; 3638 break; 3639 } 3640 } else { 3641 unsigned NumElts = SrcTy.getNumElements(); 3642 switch (SrcTy.getElementType().getSizeInBits()) { 3643 default: 3644 break; 3645 case 16: 3646 if (NumElts == 4) 3647 Opc = AArch64::FRINTZv4f16; 3648 else if (NumElts == 8) 3649 Opc = AArch64::FRINTZv8f16; 3650 break; 3651 case 32: 3652 if (NumElts == 2) 3653 Opc = AArch64::FRINTZv2f32; 3654 else if (NumElts == 4) 3655 Opc = AArch64::FRINTZv4f32; 3656 break; 3657 case 64: 3658 if (NumElts == 2) 3659 Opc = AArch64::FRINTZv2f64; 3660 break; 3661 } 3662 } 3663 3664 if (!Opc) { 3665 // Didn't get an opcode above, bail. 3666 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"); 3667 return false; 3668 } 3669 3670 // Legalization would have set us up perfectly for this; we just need to 3671 // set the opcode and move on. 3672 I.setDesc(TII.get(Opc)); 3673 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3674 } 3675 3676 bool AArch64InstructionSelector::selectIntrinsicRound( 3677 MachineInstr &I, MachineRegisterInfo &MRI) const { 3678 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 3679 3680 // Select the correct opcode. 3681 unsigned Opc = 0; 3682 if (!SrcTy.isVector()) { 3683 switch (SrcTy.getSizeInBits()) { 3684 default: 3685 case 16: 3686 Opc = AArch64::FRINTAHr; 3687 break; 3688 case 32: 3689 Opc = AArch64::FRINTASr; 3690 break; 3691 case 64: 3692 Opc = AArch64::FRINTADr; 3693 break; 3694 } 3695 } else { 3696 unsigned NumElts = SrcTy.getNumElements(); 3697 switch (SrcTy.getElementType().getSizeInBits()) { 3698 default: 3699 break; 3700 case 16: 3701 if (NumElts == 4) 3702 Opc = AArch64::FRINTAv4f16; 3703 else if (NumElts == 8) 3704 Opc = AArch64::FRINTAv8f16; 3705 break; 3706 case 32: 3707 if (NumElts == 2) 3708 Opc = AArch64::FRINTAv2f32; 3709 else if (NumElts == 4) 3710 Opc = AArch64::FRINTAv4f32; 3711 break; 3712 case 64: 3713 if (NumElts == 2) 3714 Opc = AArch64::FRINTAv2f64; 3715 break; 3716 } 3717 } 3718 3719 if (!Opc) { 3720 // Didn't get an opcode above, bail. 3721 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"); 3722 return false; 3723 } 3724 3725 // Legalization would have set us up perfectly for this; we just need to 3726 // set the opcode and move on. 3727 I.setDesc(TII.get(Opc)); 3728 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3729 } 3730 3731 bool AArch64InstructionSelector::selectVectorICmp( 3732 MachineInstr &I, MachineRegisterInfo &MRI) { 3733 Register DstReg = I.getOperand(0).getReg(); 3734 LLT DstTy = MRI.getType(DstReg); 3735 Register SrcReg = I.getOperand(2).getReg(); 3736 Register Src2Reg = I.getOperand(3).getReg(); 3737 LLT SrcTy = MRI.getType(SrcReg); 3738 3739 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); 3740 unsigned NumElts = DstTy.getNumElements(); 3741 3742 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b 3743 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 3744 // Third index is cc opcode: 3745 // 0 == eq 3746 // 1 == ugt 3747 // 2 == uge 3748 // 3 == ult 3749 // 4 == ule 3750 // 5 == sgt 3751 // 6 == sge 3752 // 7 == slt 3753 // 8 == sle 3754 // ne is done by negating 'eq' result. 3755 3756 // This table below assumes that for some comparisons the operands will be 3757 // commuted. 3758 // ult op == commute + ugt op 3759 // ule op == commute + uge op 3760 // slt op == commute + sgt op 3761 // sle op == commute + sge op 3762 unsigned PredIdx = 0; 3763 bool SwapOperands = false; 3764 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 3765 switch (Pred) { 3766 case CmpInst::ICMP_NE: 3767 case CmpInst::ICMP_EQ: 3768 PredIdx = 0; 3769 break; 3770 case CmpInst::ICMP_UGT: 3771 PredIdx = 1; 3772 break; 3773 case CmpInst::ICMP_UGE: 3774 PredIdx = 2; 3775 break; 3776 case CmpInst::ICMP_ULT: 3777 PredIdx = 3; 3778 SwapOperands = true; 3779 break; 3780 case CmpInst::ICMP_ULE: 3781 PredIdx = 4; 3782 SwapOperands = true; 3783 break; 3784 case CmpInst::ICMP_SGT: 3785 PredIdx = 5; 3786 break; 3787 case CmpInst::ICMP_SGE: 3788 PredIdx = 6; 3789 break; 3790 case CmpInst::ICMP_SLT: 3791 PredIdx = 7; 3792 SwapOperands = true; 3793 break; 3794 case CmpInst::ICMP_SLE: 3795 PredIdx = 8; 3796 SwapOperands = true; 3797 break; 3798 default: 3799 llvm_unreachable("Unhandled icmp predicate"); 3800 return false; 3801 } 3802 3803 // This table obviously should be tablegen'd when we have our GISel native 3804 // tablegen selector. 3805 3806 static const unsigned OpcTable[4][4][9] = { 3807 { 3808 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3809 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3810 0 /* invalid */}, 3811 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3812 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3813 0 /* invalid */}, 3814 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, 3815 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, 3816 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, 3817 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, 3818 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, 3819 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} 3820 }, 3821 { 3822 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3823 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3824 0 /* invalid */}, 3825 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, 3826 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, 3827 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, 3828 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, 3829 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, 3830 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, 3831 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3832 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3833 0 /* invalid */} 3834 }, 3835 { 3836 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, 3837 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, 3838 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, 3839 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, 3840 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, 3841 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, 3842 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3843 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3844 0 /* invalid */}, 3845 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3846 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3847 0 /* invalid */} 3848 }, 3849 { 3850 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, 3851 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, 3852 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, 3853 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3854 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3855 0 /* invalid */}, 3856 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3857 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3858 0 /* invalid */}, 3859 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3860 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3861 0 /* invalid */} 3862 }, 3863 }; 3864 unsigned EltIdx = Log2_32(SrcEltSize / 8); 3865 unsigned NumEltsIdx = Log2_32(NumElts / 2); 3866 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; 3867 if (!Opc) { 3868 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode"); 3869 return false; 3870 } 3871 3872 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); 3873 const TargetRegisterClass *SrcRC = 3874 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); 3875 if (!SrcRC) { 3876 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 3877 return false; 3878 } 3879 3880 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; 3881 if (SrcTy.getSizeInBits() == 128) 3882 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; 3883 3884 if (SwapOperands) 3885 std::swap(SrcReg, Src2Reg); 3886 3887 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); 3888 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3889 3890 // Invert if we had a 'ne' cc. 3891 if (NotOpc) { 3892 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); 3893 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3894 } else { 3895 MIB.buildCopy(DstReg, Cmp.getReg(0)); 3896 } 3897 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); 3898 I.eraseFromParent(); 3899 return true; 3900 } 3901 3902 MachineInstr *AArch64InstructionSelector::emitScalarToVector( 3903 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, 3904 MachineIRBuilder &MIRBuilder) const { 3905 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); 3906 3907 auto BuildFn = [&](unsigned SubregIndex) { 3908 auto Ins = 3909 MIRBuilder 3910 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) 3911 .addImm(SubregIndex); 3912 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); 3913 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); 3914 return &*Ins; 3915 }; 3916 3917 switch (EltSize) { 3918 case 16: 3919 return BuildFn(AArch64::hsub); 3920 case 32: 3921 return BuildFn(AArch64::ssub); 3922 case 64: 3923 return BuildFn(AArch64::dsub); 3924 default: 3925 return nullptr; 3926 } 3927 } 3928 3929 bool AArch64InstructionSelector::selectMergeValues( 3930 MachineInstr &I, MachineRegisterInfo &MRI) { 3931 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"); 3932 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3933 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 3934 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); 3935 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 3936 3937 if (I.getNumOperands() != 3) 3938 return false; 3939 3940 // Merging 2 s64s into an s128. 3941 if (DstTy == LLT::scalar(128)) { 3942 if (SrcTy.getSizeInBits() != 64) 3943 return false; 3944 Register DstReg = I.getOperand(0).getReg(); 3945 Register Src1Reg = I.getOperand(1).getReg(); 3946 Register Src2Reg = I.getOperand(2).getReg(); 3947 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); 3948 MachineInstr *InsMI = 3949 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); 3950 if (!InsMI) 3951 return false; 3952 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), 3953 Src2Reg, /* LaneIdx */ 1, RB, MIB); 3954 if (!Ins2MI) 3955 return false; 3956 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 3957 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); 3958 I.eraseFromParent(); 3959 return true; 3960 } 3961 3962 if (RB.getID() != AArch64::GPRRegBankID) 3963 return false; 3964 3965 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) 3966 return false; 3967 3968 auto *DstRC = &AArch64::GPR64RegClass; 3969 Register SubToRegDef = MRI.createVirtualRegister(DstRC); 3970 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 3971 TII.get(TargetOpcode::SUBREG_TO_REG)) 3972 .addDef(SubToRegDef) 3973 .addImm(0) 3974 .addUse(I.getOperand(1).getReg()) 3975 .addImm(AArch64::sub_32); 3976 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); 3977 // Need to anyext the second scalar before we can use bfm 3978 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 3979 TII.get(TargetOpcode::SUBREG_TO_REG)) 3980 .addDef(SubToRegDef2) 3981 .addImm(0) 3982 .addUse(I.getOperand(2).getReg()) 3983 .addImm(AArch64::sub_32); 3984 MachineInstr &BFM = 3985 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) 3986 .addDef(I.getOperand(0).getReg()) 3987 .addUse(SubToRegDef) 3988 .addUse(SubToRegDef2) 3989 .addImm(32) 3990 .addImm(31); 3991 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); 3992 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); 3993 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); 3994 I.eraseFromParent(); 3995 return true; 3996 } 3997 3998 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, 3999 const unsigned EltSize) { 4000 // Choose a lane copy opcode and subregister based off of the size of the 4001 // vector's elements. 4002 switch (EltSize) { 4003 case 8: 4004 CopyOpc = AArch64::DUPi8; 4005 ExtractSubReg = AArch64::bsub; 4006 break; 4007 case 16: 4008 CopyOpc = AArch64::DUPi16; 4009 ExtractSubReg = AArch64::hsub; 4010 break; 4011 case 32: 4012 CopyOpc = AArch64::DUPi32; 4013 ExtractSubReg = AArch64::ssub; 4014 break; 4015 case 64: 4016 CopyOpc = AArch64::DUPi64; 4017 ExtractSubReg = AArch64::dsub; 4018 break; 4019 default: 4020 // Unknown size, bail out. 4021 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n"); 4022 return false; 4023 } 4024 return true; 4025 } 4026 4027 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( 4028 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, 4029 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { 4030 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4031 unsigned CopyOpc = 0; 4032 unsigned ExtractSubReg = 0; 4033 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { 4034 LLVM_DEBUG( 4035 dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); 4036 return nullptr; 4037 } 4038 4039 const TargetRegisterClass *DstRC = 4040 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); 4041 if (!DstRC) { 4042 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); 4043 return nullptr; 4044 } 4045 4046 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); 4047 const LLT &VecTy = MRI.getType(VecReg); 4048 const TargetRegisterClass *VecRC = 4049 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); 4050 if (!VecRC) { 4051 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 4052 return nullptr; 4053 } 4054 4055 // The register that we're going to copy into. 4056 Register InsertReg = VecReg; 4057 if (!DstReg) 4058 DstReg = MRI.createVirtualRegister(DstRC); 4059 // If the lane index is 0, we just use a subregister COPY. 4060 if (LaneIdx == 0) { 4061 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) 4062 .addReg(VecReg, 0, ExtractSubReg); 4063 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 4064 return &*Copy; 4065 } 4066 4067 // Lane copies require 128-bit wide registers. If we're dealing with an 4068 // unpacked vector, then we need to move up to that width. Insert an implicit 4069 // def and a subregister insert to get us there. 4070 if (VecTy.getSizeInBits() != 128) { 4071 MachineInstr *ScalarToVector = emitScalarToVector( 4072 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); 4073 if (!ScalarToVector) 4074 return nullptr; 4075 InsertReg = ScalarToVector->getOperand(0).getReg(); 4076 } 4077 4078 MachineInstr *LaneCopyMI = 4079 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); 4080 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); 4081 4082 // Make sure that we actually constrain the initial copy. 4083 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 4084 return LaneCopyMI; 4085 } 4086 4087 bool AArch64InstructionSelector::selectExtractElt( 4088 MachineInstr &I, MachineRegisterInfo &MRI) { 4089 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && 4090 "unexpected opcode!"); 4091 Register DstReg = I.getOperand(0).getReg(); 4092 const LLT NarrowTy = MRI.getType(DstReg); 4093 const Register SrcReg = I.getOperand(1).getReg(); 4094 const LLT WideTy = MRI.getType(SrcReg); 4095 (void)WideTy; 4096 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && 4097 "source register size too small!"); 4098 assert(!NarrowTy.isVector() && "cannot extract vector into vector!"); 4099 4100 // Need the lane index to determine the correct copy opcode. 4101 MachineOperand &LaneIdxOp = I.getOperand(2); 4102 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?"); 4103 4104 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 4105 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n"); 4106 return false; 4107 } 4108 4109 // Find the index to extract from. 4110 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); 4111 if (!VRegAndVal) 4112 return false; 4113 unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 4114 4115 4116 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 4117 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, 4118 LaneIdx, MIB); 4119 if (!Extract) 4120 return false; 4121 4122 I.eraseFromParent(); 4123 return true; 4124 } 4125 4126 bool AArch64InstructionSelector::selectSplitVectorUnmerge( 4127 MachineInstr &I, MachineRegisterInfo &MRI) { 4128 unsigned NumElts = I.getNumOperands() - 1; 4129 Register SrcReg = I.getOperand(NumElts).getReg(); 4130 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 4131 const LLT SrcTy = MRI.getType(SrcReg); 4132 4133 assert(NarrowTy.isVector() && "Expected an unmerge into vectors"); 4134 if (SrcTy.getSizeInBits() > 128) { 4135 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge"); 4136 return false; 4137 } 4138 4139 // We implement a split vector operation by treating the sub-vectors as 4140 // scalars and extracting them. 4141 const RegisterBank &DstRB = 4142 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); 4143 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { 4144 Register Dst = I.getOperand(OpIdx).getReg(); 4145 MachineInstr *Extract = 4146 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); 4147 if (!Extract) 4148 return false; 4149 } 4150 I.eraseFromParent(); 4151 return true; 4152 } 4153 4154 bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, 4155 MachineRegisterInfo &MRI) { 4156 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 4157 "unexpected opcode"); 4158 4159 // TODO: Handle unmerging into GPRs and from scalars to scalars. 4160 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 4161 AArch64::FPRRegBankID || 4162 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 4163 AArch64::FPRRegBankID) { 4164 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " 4165 "currently unsupported.\n"); 4166 return false; 4167 } 4168 4169 // The last operand is the vector source register, and every other operand is 4170 // a register to unpack into. 4171 unsigned NumElts = I.getNumOperands() - 1; 4172 Register SrcReg = I.getOperand(NumElts).getReg(); 4173 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 4174 const LLT WideTy = MRI.getType(SrcReg); 4175 (void)WideTy; 4176 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && 4177 "can only unmerge from vector or s128 types!"); 4178 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && 4179 "source register size too small!"); 4180 4181 if (!NarrowTy.isScalar()) 4182 return selectSplitVectorUnmerge(I, MRI); 4183 4184 // Choose a lane copy opcode and subregister based off of the size of the 4185 // vector's elements. 4186 unsigned CopyOpc = 0; 4187 unsigned ExtractSubReg = 0; 4188 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) 4189 return false; 4190 4191 // Set up for the lane copies. 4192 MachineBasicBlock &MBB = *I.getParent(); 4193 4194 // Stores the registers we'll be copying from. 4195 SmallVector<Register, 4> InsertRegs; 4196 4197 // We'll use the first register twice, so we only need NumElts-1 registers. 4198 unsigned NumInsertRegs = NumElts - 1; 4199 4200 // If our elements fit into exactly 128 bits, then we can copy from the source 4201 // directly. Otherwise, we need to do a bit of setup with some subregister 4202 // inserts. 4203 if (NarrowTy.getSizeInBits() * NumElts == 128) { 4204 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); 4205 } else { 4206 // No. We have to perform subregister inserts. For each insert, create an 4207 // implicit def and a subregister insert, and save the register we create. 4208 const TargetRegisterClass *RC = 4209 getMinClassForRegBank(*RBI.getRegBank(SrcReg, MRI, TRI), 4210 WideTy.getScalarSizeInBits() * NumElts); 4211 unsigned SubReg = 0; 4212 bool Found = getSubRegForClass(RC, TRI, SubReg); 4213 (void)Found; 4214 assert(Found && "expected to find last operand's subeg idx"); 4215 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { 4216 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 4217 MachineInstr &ImpDefMI = 4218 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), 4219 ImpDefReg); 4220 4221 // Now, create the subregister insert from SrcReg. 4222 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 4223 MachineInstr &InsMI = 4224 *BuildMI(MBB, I, I.getDebugLoc(), 4225 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) 4226 .addUse(ImpDefReg) 4227 .addUse(SrcReg) 4228 .addImm(SubReg); 4229 4230 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); 4231 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); 4232 4233 // Save the register so that we can copy from it after. 4234 InsertRegs.push_back(InsertReg); 4235 } 4236 } 4237 4238 // Now that we've created any necessary subregister inserts, we can 4239 // create the copies. 4240 // 4241 // Perform the first copy separately as a subregister copy. 4242 Register CopyTo = I.getOperand(0).getReg(); 4243 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) 4244 .addReg(InsertRegs[0], 0, ExtractSubReg); 4245 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); 4246 4247 // Now, perform the remaining copies as vector lane copies. 4248 unsigned LaneIdx = 1; 4249 for (Register InsReg : InsertRegs) { 4250 Register CopyTo = I.getOperand(LaneIdx).getReg(); 4251 MachineInstr &CopyInst = 4252 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) 4253 .addUse(InsReg) 4254 .addImm(LaneIdx); 4255 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); 4256 ++LaneIdx; 4257 } 4258 4259 // Separately constrain the first copy's destination. Because of the 4260 // limitation in constrainOperandRegClass, we can't guarantee that this will 4261 // actually be constrained. So, do it ourselves using the second operand. 4262 const TargetRegisterClass *RC = 4263 MRI.getRegClassOrNull(I.getOperand(1).getReg()); 4264 if (!RC) { 4265 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n"); 4266 return false; 4267 } 4268 4269 RBI.constrainGenericRegister(CopyTo, *RC, MRI); 4270 I.eraseFromParent(); 4271 return true; 4272 } 4273 4274 bool AArch64InstructionSelector::selectConcatVectors( 4275 MachineInstr &I, MachineRegisterInfo &MRI) { 4276 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 4277 "Unexpected opcode"); 4278 Register Dst = I.getOperand(0).getReg(); 4279 Register Op1 = I.getOperand(1).getReg(); 4280 Register Op2 = I.getOperand(2).getReg(); 4281 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB); 4282 if (!ConcatMI) 4283 return false; 4284 I.eraseFromParent(); 4285 return true; 4286 } 4287 4288 unsigned 4289 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, 4290 MachineFunction &MF) const { 4291 Type *CPTy = CPVal->getType(); 4292 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy); 4293 4294 MachineConstantPool *MCP = MF.getConstantPool(); 4295 return MCP->getConstantPoolIndex(CPVal, Alignment); 4296 } 4297 4298 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( 4299 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { 4300 auto &MF = MIRBuilder.getMF(); 4301 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF); 4302 4303 auto Adrp = 4304 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) 4305 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 4306 4307 MachineInstr *LoadMI = nullptr; 4308 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF); 4309 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType()); 4310 switch (Size) { 4311 case 16: 4312 LoadMI = 4313 &*MIRBuilder 4314 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp}) 4315 .addConstantPoolIndex(CPIdx, 0, 4316 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 4317 break; 4318 case 8: 4319 LoadMI = 4320 &*MIRBuilder 4321 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) 4322 .addConstantPoolIndex(CPIdx, 0, 4323 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 4324 break; 4325 case 4: 4326 LoadMI = 4327 &*MIRBuilder 4328 .buildInstr(AArch64::LDRSui, {&AArch64::FPR32RegClass}, {Adrp}) 4329 .addConstantPoolIndex(CPIdx, 0, 4330 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 4331 break; 4332 case 2: 4333 LoadMI = 4334 &*MIRBuilder 4335 .buildInstr(AArch64::LDRHui, {&AArch64::FPR16RegClass}, {Adrp}) 4336 .addConstantPoolIndex(CPIdx, 0, 4337 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 4338 break; 4339 default: 4340 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " 4341 << *CPVal->getType()); 4342 return nullptr; 4343 } 4344 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo, 4345 MachineMemOperand::MOLoad, 4346 Size, Align(Size))); 4347 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); 4348 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); 4349 return LoadMI; 4350 } 4351 4352 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given 4353 /// size and RB. 4354 static std::pair<unsigned, unsigned> 4355 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { 4356 unsigned Opc, SubregIdx; 4357 if (RB.getID() == AArch64::GPRRegBankID) { 4358 if (EltSize == 16) { 4359 Opc = AArch64::INSvi16gpr; 4360 SubregIdx = AArch64::ssub; 4361 } else if (EltSize == 32) { 4362 Opc = AArch64::INSvi32gpr; 4363 SubregIdx = AArch64::ssub; 4364 } else if (EltSize == 64) { 4365 Opc = AArch64::INSvi64gpr; 4366 SubregIdx = AArch64::dsub; 4367 } else { 4368 llvm_unreachable("invalid elt size!"); 4369 } 4370 } else { 4371 if (EltSize == 8) { 4372 Opc = AArch64::INSvi8lane; 4373 SubregIdx = AArch64::bsub; 4374 } else if (EltSize == 16) { 4375 Opc = AArch64::INSvi16lane; 4376 SubregIdx = AArch64::hsub; 4377 } else if (EltSize == 32) { 4378 Opc = AArch64::INSvi32lane; 4379 SubregIdx = AArch64::ssub; 4380 } else if (EltSize == 64) { 4381 Opc = AArch64::INSvi64lane; 4382 SubregIdx = AArch64::dsub; 4383 } else { 4384 llvm_unreachable("invalid elt size!"); 4385 } 4386 } 4387 return std::make_pair(Opc, SubregIdx); 4388 } 4389 4390 MachineInstr *AArch64InstructionSelector::emitInstr( 4391 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, 4392 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, 4393 const ComplexRendererFns &RenderFns) const { 4394 assert(Opcode && "Expected an opcode?"); 4395 assert(!isPreISelGenericOpcode(Opcode) && 4396 "Function should only be used to produce selected instructions!"); 4397 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); 4398 if (RenderFns) 4399 for (auto &Fn : *RenderFns) 4400 Fn(MI); 4401 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); 4402 return &*MI; 4403 } 4404 4405 MachineInstr *AArch64InstructionSelector::emitAddSub( 4406 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 4407 Register Dst, MachineOperand &LHS, MachineOperand &RHS, 4408 MachineIRBuilder &MIRBuilder) const { 4409 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4410 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 4411 auto Ty = MRI.getType(LHS.getReg()); 4412 assert(!Ty.isVector() && "Expected a scalar or pointer?"); 4413 unsigned Size = Ty.getSizeInBits(); 4414 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"); 4415 bool Is32Bit = Size == 32; 4416 4417 // INSTRri form with positive arithmetic immediate. 4418 if (auto Fns = selectArithImmed(RHS)) 4419 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, 4420 MIRBuilder, Fns); 4421 4422 // INSTRri form with negative arithmetic immediate. 4423 if (auto Fns = selectNegArithImmed(RHS)) 4424 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, 4425 MIRBuilder, Fns); 4426 4427 // INSTRrx form. 4428 if (auto Fns = selectArithExtendedRegister(RHS)) 4429 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, 4430 MIRBuilder, Fns); 4431 4432 // INSTRrs form. 4433 if (auto Fns = selectShiftedRegister(RHS)) 4434 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, 4435 MIRBuilder, Fns); 4436 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, 4437 MIRBuilder); 4438 } 4439 4440 MachineInstr * 4441 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, 4442 MachineOperand &RHS, 4443 MachineIRBuilder &MIRBuilder) const { 4444 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4445 {{AArch64::ADDXri, AArch64::ADDWri}, 4446 {AArch64::ADDXrs, AArch64::ADDWrs}, 4447 {AArch64::ADDXrr, AArch64::ADDWrr}, 4448 {AArch64::SUBXri, AArch64::SUBWri}, 4449 {AArch64::ADDXrx, AArch64::ADDWrx}}}; 4450 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); 4451 } 4452 4453 MachineInstr * 4454 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, 4455 MachineOperand &RHS, 4456 MachineIRBuilder &MIRBuilder) const { 4457 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4458 {{AArch64::ADDSXri, AArch64::ADDSWri}, 4459 {AArch64::ADDSXrs, AArch64::ADDSWrs}, 4460 {AArch64::ADDSXrr, AArch64::ADDSWrr}, 4461 {AArch64::SUBSXri, AArch64::SUBSWri}, 4462 {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; 4463 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 4464 } 4465 4466 MachineInstr * 4467 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, 4468 MachineOperand &RHS, 4469 MachineIRBuilder &MIRBuilder) const { 4470 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4471 {{AArch64::SUBSXri, AArch64::SUBSWri}, 4472 {AArch64::SUBSXrs, AArch64::SUBSWrs}, 4473 {AArch64::SUBSXrr, AArch64::SUBSWrr}, 4474 {AArch64::ADDSXri, AArch64::ADDSWri}, 4475 {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; 4476 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 4477 } 4478 4479 MachineInstr * 4480 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, 4481 MachineIRBuilder &MIRBuilder) const { 4482 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4483 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); 4484 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; 4485 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); 4486 } 4487 4488 MachineInstr * 4489 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, 4490 MachineIRBuilder &MIRBuilder) const { 4491 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 4492 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4493 LLT Ty = MRI.getType(LHS.getReg()); 4494 unsigned RegSize = Ty.getSizeInBits(); 4495 bool Is32Bit = (RegSize == 32); 4496 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, 4497 {AArch64::ANDSXrs, AArch64::ANDSWrs}, 4498 {AArch64::ANDSXrr, AArch64::ANDSWrr}}; 4499 // ANDS needs a logical immediate for its immediate form. Check if we can 4500 // fold one in. 4501 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { 4502 int64_t Imm = ValAndVReg->Value.getSExtValue(); 4503 4504 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { 4505 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); 4506 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 4507 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 4508 return &*TstMI; 4509 } 4510 } 4511 4512 if (auto Fns = selectLogicalShiftedRegister(RHS)) 4513 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); 4514 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); 4515 } 4516 4517 MachineInstr *AArch64InstructionSelector::emitIntegerCompare( 4518 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 4519 MachineIRBuilder &MIRBuilder) const { 4520 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 4521 assert(Predicate.isPredicate() && "Expected predicate?"); 4522 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4523 LLT CmpTy = MRI.getType(LHS.getReg()); 4524 assert(!CmpTy.isVector() && "Expected scalar or pointer"); 4525 unsigned Size = CmpTy.getSizeInBits(); 4526 (void)Size; 4527 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"); 4528 // Fold the compare into a cmn or tst if possible. 4529 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) 4530 return FoldCmp; 4531 auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); 4532 return emitSUBS(Dst, LHS, RHS, MIRBuilder); 4533 } 4534 4535 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( 4536 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { 4537 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4538 #ifndef NDEBUG 4539 LLT Ty = MRI.getType(Dst); 4540 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && 4541 "Expected a 32-bit scalar register?"); 4542 #endif 4543 const Register ZReg = AArch64::WZR; 4544 AArch64CC::CondCode CC1, CC2; 4545 changeFCMPPredToAArch64CC(Pred, CC1, CC2); 4546 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1); 4547 if (CC2 == AArch64CC::AL) 4548 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, 4549 MIRBuilder); 4550 const TargetRegisterClass *RC = &AArch64::GPR32RegClass; 4551 Register Def1Reg = MRI.createVirtualRegister(RC); 4552 Register Def2Reg = MRI.createVirtualRegister(RC); 4553 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2); 4554 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder); 4555 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder); 4556 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); 4557 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); 4558 return &*OrMI; 4559 } 4560 4561 MachineInstr * 4562 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, 4563 MachineIRBuilder &MIRBuilder, 4564 Optional<CmpInst::Predicate> Pred) const { 4565 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4566 LLT Ty = MRI.getType(LHS); 4567 if (Ty.isVector()) 4568 return nullptr; 4569 unsigned OpSize = Ty.getSizeInBits(); 4570 if (OpSize != 32 && OpSize != 64) 4571 return nullptr; 4572 4573 // If this is a compare against +0.0, then we don't have 4574 // to explicitly materialize a constant. 4575 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); 4576 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); 4577 4578 auto IsEqualityPred = [](CmpInst::Predicate P) { 4579 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || 4580 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; 4581 }; 4582 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { 4583 // Try commutating the operands. 4584 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); 4585 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { 4586 ShouldUseImm = true; 4587 std::swap(LHS, RHS); 4588 } 4589 } 4590 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, 4591 {AArch64::FCMPSri, AArch64::FCMPDri}}; 4592 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; 4593 4594 // Partially build the compare. Decide if we need to add a use for the 4595 // third operand based off whether or not we're comparing against 0.0. 4596 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); 4597 if (!ShouldUseImm) 4598 CmpMI.addUse(RHS); 4599 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 4600 return &*CmpMI; 4601 } 4602 4603 MachineInstr *AArch64InstructionSelector::emitVectorConcat( 4604 Optional<Register> Dst, Register Op1, Register Op2, 4605 MachineIRBuilder &MIRBuilder) const { 4606 // We implement a vector concat by: 4607 // 1. Use scalar_to_vector to insert the lower vector into the larger dest 4608 // 2. Insert the upper vector into the destination's upper element 4609 // TODO: some of this code is common with G_BUILD_VECTOR handling. 4610 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4611 4612 const LLT Op1Ty = MRI.getType(Op1); 4613 const LLT Op2Ty = MRI.getType(Op2); 4614 4615 if (Op1Ty != Op2Ty) { 4616 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys"); 4617 return nullptr; 4618 } 4619 assert(Op1Ty.isVector() && "Expected a vector for vector concat"); 4620 4621 if (Op1Ty.getSizeInBits() >= 128) { 4622 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors"); 4623 return nullptr; 4624 } 4625 4626 // At the moment we just support 64 bit vector concats. 4627 if (Op1Ty.getSizeInBits() != 64) { 4628 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors"); 4629 return nullptr; 4630 } 4631 4632 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); 4633 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); 4634 const TargetRegisterClass *DstRC = 4635 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); 4636 4637 MachineInstr *WidenedOp1 = 4638 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); 4639 MachineInstr *WidenedOp2 = 4640 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); 4641 if (!WidenedOp1 || !WidenedOp2) { 4642 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value"); 4643 return nullptr; 4644 } 4645 4646 // Now do the insert of the upper element. 4647 unsigned InsertOpc, InsSubRegIdx; 4648 std::tie(InsertOpc, InsSubRegIdx) = 4649 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); 4650 4651 if (!Dst) 4652 Dst = MRI.createVirtualRegister(DstRC); 4653 auto InsElt = 4654 MIRBuilder 4655 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) 4656 .addImm(1) /* Lane index */ 4657 .addUse(WidenedOp2->getOperand(0).getReg()) 4658 .addImm(0); 4659 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 4660 return &*InsElt; 4661 } 4662 4663 MachineInstr * 4664 AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1, 4665 Register Src2, AArch64CC::CondCode Pred, 4666 MachineIRBuilder &MIRBuilder) const { 4667 auto &MRI = *MIRBuilder.getMRI(); 4668 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst); 4669 // If we used a register class, then this won't necessarily have an LLT. 4670 // Compute the size based off whether or not we have a class or bank. 4671 unsigned Size; 4672 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) 4673 Size = TRI.getRegSizeInBits(*RC); 4674 else 4675 Size = MRI.getType(Dst).getSizeInBits(); 4676 // Some opcodes use s1. 4677 assert(Size <= 64 && "Expected 64 bits or less only!"); 4678 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr}; 4679 unsigned Opc = OpcTable[Size == 64]; 4680 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred); 4681 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI); 4682 return &*CSINC; 4683 } 4684 4685 std::pair<MachineInstr *, AArch64CC::CondCode> 4686 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, 4687 MachineOperand &LHS, 4688 MachineOperand &RHS, 4689 MachineIRBuilder &MIRBuilder) const { 4690 switch (Opcode) { 4691 default: 4692 llvm_unreachable("Unexpected opcode!"); 4693 case TargetOpcode::G_SADDO: 4694 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 4695 case TargetOpcode::G_UADDO: 4696 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); 4697 case TargetOpcode::G_SSUBO: 4698 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 4699 case TargetOpcode::G_USUBO: 4700 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); 4701 } 4702 } 4703 4704 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) { 4705 MachineRegisterInfo &MRI = *MIB.getMRI(); 4706 // We want to recognize this pattern: 4707 // 4708 // $z = G_FCMP pred, $x, $y 4709 // ... 4710 // $w = G_SELECT $z, $a, $b 4711 // 4712 // Where the value of $z is *only* ever used by the G_SELECT (possibly with 4713 // some copies/truncs in between.) 4714 // 4715 // If we see this, then we can emit something like this: 4716 // 4717 // fcmp $x, $y 4718 // fcsel $w, $a, $b, pred 4719 // 4720 // Rather than emitting both of the rather long sequences in the standard 4721 // G_FCMP/G_SELECT select methods. 4722 4723 // First, check if the condition is defined by a compare. 4724 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); 4725 while (CondDef) { 4726 // We can only fold if all of the defs have one use. 4727 Register CondDefReg = CondDef->getOperand(0).getReg(); 4728 if (!MRI.hasOneNonDBGUse(CondDefReg)) { 4729 // Unless it's another select. 4730 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { 4731 if (CondDef == &UI) 4732 continue; 4733 if (UI.getOpcode() != TargetOpcode::G_SELECT) 4734 return false; 4735 } 4736 } 4737 4738 // We can skip over G_TRUNC since the condition is 1-bit. 4739 // Truncating/extending can have no impact on the value. 4740 unsigned Opc = CondDef->getOpcode(); 4741 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) 4742 break; 4743 4744 // Can't see past copies from physregs. 4745 if (Opc == TargetOpcode::COPY && 4746 Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) 4747 return false; 4748 4749 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); 4750 } 4751 4752 // Is the condition defined by a compare? 4753 if (!CondDef) 4754 return false; 4755 4756 unsigned CondOpc = CondDef->getOpcode(); 4757 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) 4758 return false; 4759 4760 AArch64CC::CondCode CondCode; 4761 if (CondOpc == TargetOpcode::G_ICMP) { 4762 auto Pred = 4763 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 4764 CondCode = changeICMPPredToAArch64CC(Pred); 4765 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), 4766 CondDef->getOperand(1), MIB); 4767 } else { 4768 // Get the condition code for the select. 4769 auto Pred = 4770 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 4771 AArch64CC::CondCode CondCode2; 4772 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); 4773 4774 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two 4775 // instructions to emit the comparison. 4776 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be 4777 // unnecessary. 4778 if (CondCode2 != AArch64CC::AL) 4779 return false; 4780 4781 if (!emitFPCompare(CondDef->getOperand(2).getReg(), 4782 CondDef->getOperand(3).getReg(), MIB)) { 4783 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); 4784 return false; 4785 } 4786 } 4787 4788 // Emit the select. 4789 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), 4790 I.getOperand(3).getReg(), CondCode, MIB); 4791 I.eraseFromParent(); 4792 return true; 4793 } 4794 4795 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( 4796 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 4797 MachineIRBuilder &MIRBuilder) const { 4798 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && 4799 "Unexpected MachineOperand"); 4800 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4801 // We want to find this sort of thing: 4802 // x = G_SUB 0, y 4803 // G_ICMP z, x 4804 // 4805 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. 4806 // e.g: 4807 // 4808 // cmn z, y 4809 4810 // Check if the RHS or LHS of the G_ICMP is defined by a SUB 4811 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); 4812 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); 4813 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate()); 4814 // Given this: 4815 // 4816 // x = G_SUB 0, y 4817 // G_ICMP x, z 4818 // 4819 // Produce this: 4820 // 4821 // cmn y, z 4822 if (isCMN(LHSDef, P, MRI)) 4823 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); 4824 4825 // Same idea here, but with the RHS of the compare instead: 4826 // 4827 // Given this: 4828 // 4829 // x = G_SUB 0, y 4830 // G_ICMP z, x 4831 // 4832 // Produce this: 4833 // 4834 // cmn z, y 4835 if (isCMN(RHSDef, P, MRI)) 4836 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); 4837 4838 // Given this: 4839 // 4840 // z = G_AND x, y 4841 // G_ICMP z, 0 4842 // 4843 // Produce this if the compare is signed: 4844 // 4845 // tst x, y 4846 if (!CmpInst::isUnsigned(P) && LHSDef && 4847 LHSDef->getOpcode() == TargetOpcode::G_AND) { 4848 // Make sure that the RHS is 0. 4849 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI); 4850 if (!ValAndVReg || ValAndVReg->Value != 0) 4851 return nullptr; 4852 4853 return emitTST(LHSDef->getOperand(1), 4854 LHSDef->getOperand(2), MIRBuilder); 4855 } 4856 4857 return nullptr; 4858 } 4859 4860 bool AArch64InstructionSelector::selectShuffleVector( 4861 MachineInstr &I, MachineRegisterInfo &MRI) { 4862 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 4863 Register Src1Reg = I.getOperand(1).getReg(); 4864 const LLT Src1Ty = MRI.getType(Src1Reg); 4865 Register Src2Reg = I.getOperand(2).getReg(); 4866 const LLT Src2Ty = MRI.getType(Src2Reg); 4867 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); 4868 4869 MachineBasicBlock &MBB = *I.getParent(); 4870 MachineFunction &MF = *MBB.getParent(); 4871 LLVMContext &Ctx = MF.getFunction().getContext(); 4872 4873 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if 4874 // it's originated from a <1 x T> type. Those should have been lowered into 4875 // G_BUILD_VECTOR earlier. 4876 if (!Src1Ty.isVector() || !Src2Ty.isVector()) { 4877 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"); 4878 return false; 4879 } 4880 4881 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; 4882 4883 SmallVector<Constant *, 64> CstIdxs; 4884 for (int Val : Mask) { 4885 // For now, any undef indexes we'll just assume to be 0. This should be 4886 // optimized in future, e.g. to select DUP etc. 4887 Val = Val < 0 ? 0 : Val; 4888 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { 4889 unsigned Offset = Byte + Val * BytesPerElt; 4890 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); 4891 } 4892 } 4893 4894 // Use a constant pool to load the index vector for TBL. 4895 Constant *CPVal = ConstantVector::get(CstIdxs); 4896 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB); 4897 if (!IndexLoad) { 4898 LLVM_DEBUG(dbgs() << "Could not load from a constant pool"); 4899 return false; 4900 } 4901 4902 if (DstTy.getSizeInBits() != 128) { 4903 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); 4904 // This case can be done with TBL1. 4905 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIB); 4906 if (!Concat) { 4907 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); 4908 return false; 4909 } 4910 4911 // The constant pool load will be 64 bits, so need to convert to FPR128 reg. 4912 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass, 4913 IndexLoad->getOperand(0).getReg(), MIB); 4914 4915 auto TBL1 = MIB.buildInstr( 4916 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, 4917 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); 4918 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); 4919 4920 auto Copy = 4921 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 4922 .addReg(TBL1.getReg(0), 0, AArch64::dsub); 4923 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); 4924 I.eraseFromParent(); 4925 return true; 4926 } 4927 4928 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive 4929 // Q registers for regalloc. 4930 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg}; 4931 auto RegSeq = createQTuple(Regs, MIB); 4932 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, 4933 {RegSeq, IndexLoad->getOperand(0)}); 4934 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); 4935 I.eraseFromParent(); 4936 return true; 4937 } 4938 4939 MachineInstr *AArch64InstructionSelector::emitLaneInsert( 4940 Optional<Register> DstReg, Register SrcReg, Register EltReg, 4941 unsigned LaneIdx, const RegisterBank &RB, 4942 MachineIRBuilder &MIRBuilder) const { 4943 MachineInstr *InsElt = nullptr; 4944 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 4945 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4946 4947 // Create a register to define with the insert if one wasn't passed in. 4948 if (!DstReg) 4949 DstReg = MRI.createVirtualRegister(DstRC); 4950 4951 unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); 4952 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; 4953 4954 if (RB.getID() == AArch64::FPRRegBankID) { 4955 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); 4956 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 4957 .addImm(LaneIdx) 4958 .addUse(InsSub->getOperand(0).getReg()) 4959 .addImm(0); 4960 } else { 4961 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 4962 .addImm(LaneIdx) 4963 .addUse(EltReg); 4964 } 4965 4966 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 4967 return InsElt; 4968 } 4969 4970 bool AArch64InstructionSelector::selectUSMovFromExtend( 4971 MachineInstr &MI, MachineRegisterInfo &MRI) { 4972 if (MI.getOpcode() != TargetOpcode::G_SEXT && 4973 MI.getOpcode() != TargetOpcode::G_ZEXT && 4974 MI.getOpcode() != TargetOpcode::G_ANYEXT) 4975 return false; 4976 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT; 4977 const Register DefReg = MI.getOperand(0).getReg(); 4978 const LLT DstTy = MRI.getType(DefReg); 4979 unsigned DstSize = DstTy.getSizeInBits(); 4980 4981 if (DstSize != 32 && DstSize != 64) 4982 return false; 4983 4984 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT, 4985 MI.getOperand(1).getReg(), MRI); 4986 int64_t Lane; 4987 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane))) 4988 return false; 4989 Register Src0 = Extract->getOperand(1).getReg(); 4990 4991 const LLT &VecTy = MRI.getType(Src0); 4992 4993 if (VecTy.getSizeInBits() != 128) { 4994 const MachineInstr *ScalarToVector = emitScalarToVector( 4995 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB); 4996 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!"); 4997 Src0 = ScalarToVector->getOperand(0).getReg(); 4998 } 4999 5000 unsigned Opcode; 5001 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32) 5002 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32; 5003 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16) 5004 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16; 5005 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8) 5006 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8; 5007 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16) 5008 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16; 5009 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8) 5010 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8; 5011 else 5012 llvm_unreachable("Unexpected type combo for S/UMov!"); 5013 5014 // We may need to generate one of these, depending on the type and sign of the 5015 // input: 5016 // DstReg = SMOV Src0, Lane; 5017 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32; 5018 MachineInstr *ExtI = nullptr; 5019 if (DstSize == 64 && !IsSigned) { 5020 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 5021 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane); 5022 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) 5023 .addImm(0) 5024 .addUse(NewReg) 5025 .addImm(AArch64::sub_32); 5026 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); 5027 } else 5028 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane); 5029 5030 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 5031 MI.eraseFromParent(); 5032 return true; 5033 } 5034 5035 bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, 5036 MachineRegisterInfo &MRI) { 5037 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); 5038 5039 // Get information on the destination. 5040 Register DstReg = I.getOperand(0).getReg(); 5041 const LLT DstTy = MRI.getType(DstReg); 5042 unsigned VecSize = DstTy.getSizeInBits(); 5043 5044 // Get information on the element we want to insert into the destination. 5045 Register EltReg = I.getOperand(2).getReg(); 5046 const LLT EltTy = MRI.getType(EltReg); 5047 unsigned EltSize = EltTy.getSizeInBits(); 5048 if (EltSize < 16 || EltSize > 64) 5049 return false; // Don't support all element types yet. 5050 5051 // Find the definition of the index. Bail out if it's not defined by a 5052 // G_CONSTANT. 5053 Register IdxReg = I.getOperand(3).getReg(); 5054 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI); 5055 if (!VRegAndVal) 5056 return false; 5057 unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 5058 5059 // Perform the lane insert. 5060 Register SrcReg = I.getOperand(1).getReg(); 5061 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); 5062 5063 if (VecSize < 128) { 5064 // If the vector we're inserting into is smaller than 128 bits, widen it 5065 // to 128 to do the insert. 5066 MachineInstr *ScalarToVec = 5067 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB); 5068 if (!ScalarToVec) 5069 return false; 5070 SrcReg = ScalarToVec->getOperand(0).getReg(); 5071 } 5072 5073 // Create an insert into a new FPR128 register. 5074 // Note that if our vector is already 128 bits, we end up emitting an extra 5075 // register. 5076 MachineInstr *InsMI = 5077 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIB); 5078 5079 if (VecSize < 128) { 5080 // If we had to widen to perform the insert, then we have to demote back to 5081 // the original size to get the result we want. 5082 Register DemoteVec = InsMI->getOperand(0).getReg(); 5083 const TargetRegisterClass *RC = 5084 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); 5085 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 5086 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 5087 return false; 5088 } 5089 unsigned SubReg = 0; 5090 if (!getSubRegForClass(RC, TRI, SubReg)) 5091 return false; 5092 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 5093 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize 5094 << "\n"); 5095 return false; 5096 } 5097 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 5098 .addReg(DemoteVec, 0, SubReg); 5099 RBI.constrainGenericRegister(DstReg, *RC, MRI); 5100 } else { 5101 // No widening needed. 5102 InsMI->getOperand(0).setReg(DstReg); 5103 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 5104 } 5105 5106 I.eraseFromParent(); 5107 return true; 5108 } 5109 5110 MachineInstr * 5111 AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, 5112 MachineIRBuilder &MIRBuilder, 5113 MachineRegisterInfo &MRI) { 5114 LLT DstTy = MRI.getType(Dst); 5115 unsigned DstSize = DstTy.getSizeInBits(); 5116 if (CV->isNullValue()) { 5117 if (DstSize == 128) { 5118 auto Mov = 5119 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0); 5120 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); 5121 return &*Mov; 5122 } 5123 5124 if (DstSize == 64) { 5125 auto Mov = 5126 MIRBuilder 5127 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) 5128 .addImm(0); 5129 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {}) 5130 .addReg(Mov.getReg(0), 0, AArch64::dsub); 5131 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI); 5132 return &*Copy; 5133 } 5134 } 5135 5136 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder); 5137 if (!CPLoad) { 5138 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!"); 5139 return nullptr; 5140 } 5141 5142 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0)); 5143 RBI.constrainGenericRegister( 5144 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI); 5145 return &*Copy; 5146 } 5147 5148 bool AArch64InstructionSelector::tryOptConstantBuildVec( 5149 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) { 5150 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 5151 unsigned DstSize = DstTy.getSizeInBits(); 5152 assert(DstSize <= 128 && "Unexpected build_vec type!"); 5153 if (DstSize < 32) 5154 return false; 5155 // Check if we're building a constant vector, in which case we want to 5156 // generate a constant pool load instead of a vector insert sequence. 5157 SmallVector<Constant *, 16> Csts; 5158 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { 5159 // Try to find G_CONSTANT or G_FCONSTANT 5160 auto *OpMI = 5161 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI); 5162 if (OpMI) 5163 Csts.emplace_back( 5164 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm())); 5165 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT, 5166 I.getOperand(Idx).getReg(), MRI))) 5167 Csts.emplace_back( 5168 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm())); 5169 else 5170 return false; 5171 } 5172 Constant *CV = ConstantVector::get(Csts); 5173 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI)) 5174 return false; 5175 I.eraseFromParent(); 5176 return true; 5177 } 5178 5179 bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( 5180 MachineInstr &I, MachineRegisterInfo &MRI) { 5181 // Given: 5182 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef 5183 // 5184 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt. 5185 Register Dst = I.getOperand(0).getReg(); 5186 Register EltReg = I.getOperand(1).getReg(); 5187 LLT EltTy = MRI.getType(EltReg); 5188 // If the index isn't on the same bank as its elements, then this can't be a 5189 // SUBREG_TO_REG. 5190 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); 5191 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI); 5192 if (EltRB != DstRB) 5193 return false; 5194 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()), 5195 [&MRI](const MachineOperand &Op) { 5196 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), 5197 MRI); 5198 })) 5199 return false; 5200 unsigned SubReg; 5201 const TargetRegisterClass *EltRC = 5202 getMinClassForRegBank(EltRB, EltTy.getSizeInBits()); 5203 if (!EltRC) 5204 return false; 5205 const TargetRegisterClass *DstRC = 5206 getMinClassForRegBank(DstRB, MRI.getType(Dst).getSizeInBits()); 5207 if (!DstRC) 5208 return false; 5209 if (!getSubRegForClass(EltRC, TRI, SubReg)) 5210 return false; 5211 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {}) 5212 .addImm(0) 5213 .addUse(EltReg) 5214 .addImm(SubReg); 5215 I.eraseFromParent(); 5216 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI); 5217 return RBI.constrainGenericRegister(Dst, *DstRC, MRI); 5218 } 5219 5220 bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, 5221 MachineRegisterInfo &MRI) { 5222 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 5223 // Until we port more of the optimized selections, for now just use a vector 5224 // insert sequence. 5225 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 5226 const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); 5227 unsigned EltSize = EltTy.getSizeInBits(); 5228 5229 if (tryOptConstantBuildVec(I, DstTy, MRI)) 5230 return true; 5231 if (tryOptBuildVecToSubregToReg(I, MRI)) 5232 return true; 5233 5234 if (EltSize < 16 || EltSize > 64) 5235 return false; // Don't support all element types yet. 5236 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 5237 5238 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 5239 MachineInstr *ScalarToVec = 5240 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, 5241 I.getOperand(1).getReg(), MIB); 5242 if (!ScalarToVec) 5243 return false; 5244 5245 Register DstVec = ScalarToVec->getOperand(0).getReg(); 5246 unsigned DstSize = DstTy.getSizeInBits(); 5247 5248 // Keep track of the last MI we inserted. Later on, we might be able to save 5249 // a copy using it. 5250 MachineInstr *PrevMI = nullptr; 5251 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { 5252 // Note that if we don't do a subregister copy, we can end up making an 5253 // extra register. 5254 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB, 5255 MIB); 5256 DstVec = PrevMI->getOperand(0).getReg(); 5257 } 5258 5259 // If DstTy's size in bits is less than 128, then emit a subregister copy 5260 // from DstVec to the last register we've defined. 5261 if (DstSize < 128) { 5262 // Force this to be FPR using the destination vector. 5263 const TargetRegisterClass *RC = 5264 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize); 5265 if (!RC) 5266 return false; 5267 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 5268 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 5269 return false; 5270 } 5271 5272 unsigned SubReg = 0; 5273 if (!getSubRegForClass(RC, TRI, SubReg)) 5274 return false; 5275 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 5276 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize 5277 << "\n"); 5278 return false; 5279 } 5280 5281 Register Reg = MRI.createVirtualRegister(RC); 5282 Register DstReg = I.getOperand(0).getReg(); 5283 5284 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg); 5285 MachineOperand &RegOp = I.getOperand(1); 5286 RegOp.setReg(Reg); 5287 RBI.constrainGenericRegister(DstReg, *RC, MRI); 5288 } else { 5289 // We don't need a subregister copy. Save a copy by re-using the 5290 // destination register on the final insert. 5291 assert(PrevMI && "PrevMI was null?"); 5292 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); 5293 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); 5294 } 5295 5296 I.eraseFromParent(); 5297 return true; 5298 } 5299 5300 bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc, 5301 unsigned NumVecs, 5302 MachineInstr &I) { 5303 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); 5304 assert(Opc && "Expected an opcode?"); 5305 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors"); 5306 auto &MRI = *MIB.getMRI(); 5307 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 5308 unsigned Size = Ty.getSizeInBits(); 5309 assert((Size == 64 || Size == 128) && 5310 "Destination must be 64 bits or 128 bits?"); 5311 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0; 5312 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg(); 5313 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?"); 5314 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr}); 5315 Load.cloneMemRefs(I); 5316 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); 5317 Register SelectedLoadDst = Load->getOperand(0).getReg(); 5318 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { 5319 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {}) 5320 .addReg(SelectedLoadDst, 0, SubReg + Idx); 5321 // Emit the subreg copies and immediately select them. 5322 // FIXME: We should refactor our copy code into an emitCopy helper and 5323 // clean up uses of this pattern elsewhere in the selector. 5324 selectCopy(*Vec, TII, MRI, TRI, RBI); 5325 } 5326 return true; 5327 } 5328 5329 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( 5330 MachineInstr &I, MachineRegisterInfo &MRI) { 5331 // Find the intrinsic ID. 5332 unsigned IntrinID = I.getIntrinsicID(); 5333 5334 const LLT S8 = LLT::scalar(8); 5335 const LLT S16 = LLT::scalar(16); 5336 const LLT S32 = LLT::scalar(32); 5337 const LLT S64 = LLT::scalar(64); 5338 const LLT P0 = LLT::pointer(0, 64); 5339 // Select the instruction. 5340 switch (IntrinID) { 5341 default: 5342 return false; 5343 case Intrinsic::aarch64_ldxp: 5344 case Intrinsic::aarch64_ldaxp: { 5345 auto NewI = MIB.buildInstr( 5346 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, 5347 {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, 5348 {I.getOperand(3)}); 5349 NewI.cloneMemRefs(I); 5350 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 5351 break; 5352 } 5353 case Intrinsic::trap: 5354 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1); 5355 break; 5356 case Intrinsic::debugtrap: 5357 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); 5358 break; 5359 case Intrinsic::ubsantrap: 5360 MIB.buildInstr(AArch64::BRK, {}, {}) 5361 .addImm(I.getOperand(1).getImm() | ('U' << 8)); 5362 break; 5363 case Intrinsic::aarch64_neon_ld2: { 5364 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 5365 unsigned Opc = 0; 5366 if (Ty == LLT::fixed_vector(8, S8)) 5367 Opc = AArch64::LD2Twov8b; 5368 else if (Ty == LLT::fixed_vector(16, S8)) 5369 Opc = AArch64::LD2Twov16b; 5370 else if (Ty == LLT::fixed_vector(4, S16)) 5371 Opc = AArch64::LD2Twov4h; 5372 else if (Ty == LLT::fixed_vector(8, S16)) 5373 Opc = AArch64::LD2Twov8h; 5374 else if (Ty == LLT::fixed_vector(2, S32)) 5375 Opc = AArch64::LD2Twov2s; 5376 else if (Ty == LLT::fixed_vector(4, S32)) 5377 Opc = AArch64::LD2Twov4s; 5378 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) 5379 Opc = AArch64::LD2Twov2d; 5380 else if (Ty == S64 || Ty == P0) 5381 Opc = AArch64::LD1Twov1d; 5382 else 5383 llvm_unreachable("Unexpected type for ld2!"); 5384 selectVectorLoadIntrinsic(Opc, 2, I); 5385 break; 5386 } 5387 case Intrinsic::aarch64_neon_ld4: { 5388 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 5389 unsigned Opc = 0; 5390 if (Ty == LLT::fixed_vector(8, S8)) 5391 Opc = AArch64::LD4Fourv8b; 5392 else if (Ty == LLT::fixed_vector(16, S8)) 5393 Opc = AArch64::LD4Fourv16b; 5394 else if (Ty == LLT::fixed_vector(4, S16)) 5395 Opc = AArch64::LD4Fourv4h; 5396 else if (Ty == LLT::fixed_vector(8, S16)) 5397 Opc = AArch64::LD4Fourv8h; 5398 else if (Ty == LLT::fixed_vector(2, S32)) 5399 Opc = AArch64::LD4Fourv2s; 5400 else if (Ty == LLT::fixed_vector(4, S32)) 5401 Opc = AArch64::LD4Fourv4s; 5402 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) 5403 Opc = AArch64::LD4Fourv2d; 5404 else if (Ty == S64 || Ty == P0) 5405 Opc = AArch64::LD1Fourv1d; 5406 else 5407 llvm_unreachable("Unexpected type for ld4!"); 5408 selectVectorLoadIntrinsic(Opc, 4, I); 5409 break; 5410 } 5411 case Intrinsic::aarch64_neon_st2: { 5412 Register Src1 = I.getOperand(1).getReg(); 5413 Register Src2 = I.getOperand(2).getReg(); 5414 Register Ptr = I.getOperand(3).getReg(); 5415 LLT Ty = MRI.getType(Src1); 5416 unsigned Opc; 5417 if (Ty == LLT::fixed_vector(8, S8)) 5418 Opc = AArch64::ST2Twov8b; 5419 else if (Ty == LLT::fixed_vector(16, S8)) 5420 Opc = AArch64::ST2Twov16b; 5421 else if (Ty == LLT::fixed_vector(4, S16)) 5422 Opc = AArch64::ST2Twov4h; 5423 else if (Ty == LLT::fixed_vector(8, S16)) 5424 Opc = AArch64::ST2Twov8h; 5425 else if (Ty == LLT::fixed_vector(2, S32)) 5426 Opc = AArch64::ST2Twov2s; 5427 else if (Ty == LLT::fixed_vector(4, S32)) 5428 Opc = AArch64::ST2Twov4s; 5429 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) 5430 Opc = AArch64::ST2Twov2d; 5431 else if (Ty == S64 || Ty == P0) 5432 Opc = AArch64::ST1Twov1d; 5433 else 5434 llvm_unreachable("Unexpected type for st2!"); 5435 SmallVector<Register, 2> Regs = {Src1, Src2}; 5436 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) 5437 : createDTuple(Regs, MIB); 5438 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr}); 5439 Store.cloneMemRefs(I); 5440 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); 5441 break; 5442 } 5443 case Intrinsic::aarch64_mops_memset_tag: { 5444 // Transform 5445 // %dst:gpr(p0) = \ 5446 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), 5447 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) 5448 // where %dst is updated, into 5449 // %Rd:GPR64common, %Rn:GPR64) = \ 5450 // MOPSMemorySetTaggingPseudo \ 5451 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 5452 // where Rd and Rn are tied. 5453 // It is expected that %val has been extended to s64 in legalization. 5454 // Note that the order of the size/value operands are swapped. 5455 5456 Register DstDef = I.getOperand(0).getReg(); 5457 // I.getOperand(1) is the intrinsic function 5458 Register DstUse = I.getOperand(2).getReg(); 5459 Register ValUse = I.getOperand(3).getReg(); 5460 Register SizeUse = I.getOperand(4).getReg(); 5461 5462 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one. 5463 // Therefore an additional virtual register is requried for the updated size 5464 // operand. This value is not accessible via the semantics of the intrinsic. 5465 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64)); 5466 5467 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo, 5468 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse}); 5469 Memset.cloneMemRefs(I); 5470 constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI); 5471 break; 5472 } 5473 } 5474 5475 I.eraseFromParent(); 5476 return true; 5477 } 5478 5479 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, 5480 MachineRegisterInfo &MRI) { 5481 unsigned IntrinID = I.getIntrinsicID(); 5482 5483 switch (IntrinID) { 5484 default: 5485 break; 5486 case Intrinsic::aarch64_crypto_sha1h: { 5487 Register DstReg = I.getOperand(0).getReg(); 5488 Register SrcReg = I.getOperand(2).getReg(); 5489 5490 // FIXME: Should this be an assert? 5491 if (MRI.getType(DstReg).getSizeInBits() != 32 || 5492 MRI.getType(SrcReg).getSizeInBits() != 32) 5493 return false; 5494 5495 // The operation has to happen on FPRs. Set up some new FPR registers for 5496 // the source and destination if they are on GPRs. 5497 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 5498 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 5499 MIB.buildCopy({SrcReg}, {I.getOperand(2)}); 5500 5501 // Make sure the copy ends up getting constrained properly. 5502 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 5503 AArch64::GPR32RegClass, MRI); 5504 } 5505 5506 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) 5507 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 5508 5509 // Actually insert the instruction. 5510 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); 5511 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); 5512 5513 // Did we create a new register for the destination? 5514 if (DstReg != I.getOperand(0).getReg()) { 5515 // Yep. Copy the result of the instruction back into the original 5516 // destination. 5517 MIB.buildCopy({I.getOperand(0)}, {DstReg}); 5518 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 5519 AArch64::GPR32RegClass, MRI); 5520 } 5521 5522 I.eraseFromParent(); 5523 return true; 5524 } 5525 case Intrinsic::ptrauth_sign: { 5526 Register DstReg = I.getOperand(0).getReg(); 5527 Register ValReg = I.getOperand(2).getReg(); 5528 uint64_t Key = I.getOperand(3).getImm(); 5529 Register DiscReg = I.getOperand(4).getReg(); 5530 auto DiscVal = getIConstantVRegVal(DiscReg, MRI); 5531 bool IsDiscZero = DiscVal.hasValue() && DiscVal->isNullValue(); 5532 5533 if (Key > 3) 5534 return false; 5535 5536 unsigned Opcodes[][4] = { 5537 {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB}, 5538 {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}}; 5539 unsigned Opcode = Opcodes[IsDiscZero][Key]; 5540 5541 auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg}); 5542 5543 if (!IsDiscZero) { 5544 PAC.addUse(DiscReg); 5545 RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI); 5546 } 5547 5548 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); 5549 I.eraseFromParent(); 5550 return true; 5551 } 5552 case Intrinsic::frameaddress: 5553 case Intrinsic::returnaddress: { 5554 MachineFunction &MF = *I.getParent()->getParent(); 5555 MachineFrameInfo &MFI = MF.getFrameInfo(); 5556 5557 unsigned Depth = I.getOperand(2).getImm(); 5558 Register DstReg = I.getOperand(0).getReg(); 5559 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); 5560 5561 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { 5562 if (!MFReturnAddr) { 5563 // Insert the copy from LR/X30 into the entry block, before it can be 5564 // clobbered by anything. 5565 MFI.setReturnAddressIsTaken(true); 5566 MFReturnAddr = getFunctionLiveInPhysReg( 5567 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc()); 5568 } 5569 5570 if (STI.hasPAuth()) { 5571 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); 5572 } else { 5573 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); 5574 MIB.buildInstr(AArch64::XPACLRI); 5575 MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); 5576 } 5577 5578 I.eraseFromParent(); 5579 return true; 5580 } 5581 5582 MFI.setFrameAddressIsTaken(true); 5583 Register FrameAddr(AArch64::FP); 5584 while (Depth--) { 5585 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 5586 auto Ldr = 5587 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0); 5588 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); 5589 FrameAddr = NextFrame; 5590 } 5591 5592 if (IntrinID == Intrinsic::frameaddress) 5593 MIB.buildCopy({DstReg}, {FrameAddr}); 5594 else { 5595 MFI.setReturnAddressIsTaken(true); 5596 5597 if (STI.hasPAuth()) { 5598 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 5599 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); 5600 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); 5601 } else { 5602 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}) 5603 .addImm(1); 5604 MIB.buildInstr(AArch64::XPACLRI); 5605 MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); 5606 } 5607 } 5608 5609 I.eraseFromParent(); 5610 return true; 5611 } 5612 case Intrinsic::swift_async_context_addr: 5613 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()}, 5614 {Register(AArch64::FP)}) 5615 .addImm(8) 5616 .addImm(0); 5617 constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI); 5618 5619 MF->getFrameInfo().setFrameAddressIsTaken(true); 5620 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 5621 I.eraseFromParent(); 5622 return true; 5623 } 5624 return false; 5625 } 5626 5627 InstructionSelector::ComplexRendererFns 5628 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { 5629 auto MaybeImmed = getImmedFromMO(Root); 5630 if (MaybeImmed == None || *MaybeImmed > 31) 5631 return None; 5632 uint64_t Enc = (32 - *MaybeImmed) & 0x1f; 5633 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5634 } 5635 5636 InstructionSelector::ComplexRendererFns 5637 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { 5638 auto MaybeImmed = getImmedFromMO(Root); 5639 if (MaybeImmed == None || *MaybeImmed > 31) 5640 return None; 5641 uint64_t Enc = 31 - *MaybeImmed; 5642 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5643 } 5644 5645 InstructionSelector::ComplexRendererFns 5646 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { 5647 auto MaybeImmed = getImmedFromMO(Root); 5648 if (MaybeImmed == None || *MaybeImmed > 63) 5649 return None; 5650 uint64_t Enc = (64 - *MaybeImmed) & 0x3f; 5651 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5652 } 5653 5654 InstructionSelector::ComplexRendererFns 5655 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { 5656 auto MaybeImmed = getImmedFromMO(Root); 5657 if (MaybeImmed == None || *MaybeImmed > 63) 5658 return None; 5659 uint64_t Enc = 63 - *MaybeImmed; 5660 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5661 } 5662 5663 /// Helper to select an immediate value that can be represented as a 12-bit 5664 /// value shifted left by either 0 or 12. If it is possible to do so, return 5665 /// the immediate and shift value. If not, return None. 5666 /// 5667 /// Used by selectArithImmed and selectNegArithImmed. 5668 InstructionSelector::ComplexRendererFns 5669 AArch64InstructionSelector::select12BitValueWithLeftShift( 5670 uint64_t Immed) const { 5671 unsigned ShiftAmt; 5672 if (Immed >> 12 == 0) { 5673 ShiftAmt = 0; 5674 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 5675 ShiftAmt = 12; 5676 Immed = Immed >> 12; 5677 } else 5678 return None; 5679 5680 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 5681 return {{ 5682 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, 5683 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, 5684 }}; 5685 } 5686 5687 /// SelectArithImmed - Select an immediate value that can be represented as 5688 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 5689 /// Val set to the 12-bit value and Shift set to the shifter operand. 5690 InstructionSelector::ComplexRendererFns 5691 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { 5692 // This function is called from the addsub_shifted_imm ComplexPattern, 5693 // which lists [imm] as the list of opcode it's interested in, however 5694 // we still need to check whether the operand is actually an immediate 5695 // here because the ComplexPattern opcode list is only used in 5696 // root-level opcode matching. 5697 auto MaybeImmed = getImmedFromMO(Root); 5698 if (MaybeImmed == None) 5699 return None; 5700 return select12BitValueWithLeftShift(*MaybeImmed); 5701 } 5702 5703 /// SelectNegArithImmed - As above, but negates the value before trying to 5704 /// select it. 5705 InstructionSelector::ComplexRendererFns 5706 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { 5707 // We need a register here, because we need to know if we have a 64 or 32 5708 // bit immediate. 5709 if (!Root.isReg()) 5710 return None; 5711 auto MaybeImmed = getImmedFromMO(Root); 5712 if (MaybeImmed == None) 5713 return None; 5714 uint64_t Immed = *MaybeImmed; 5715 5716 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 5717 // have the opposite effect on the C flag, so this pattern mustn't match under 5718 // those circumstances. 5719 if (Immed == 0) 5720 return None; 5721 5722 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on 5723 // the root. 5724 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5725 if (MRI.getType(Root.getReg()).getSizeInBits() == 32) 5726 Immed = ~((uint32_t)Immed) + 1; 5727 else 5728 Immed = ~Immed + 1ULL; 5729 5730 if (Immed & 0xFFFFFFFFFF000000ULL) 5731 return None; 5732 5733 Immed &= 0xFFFFFFULL; 5734 return select12BitValueWithLeftShift(Immed); 5735 } 5736 5737 /// Return true if it is worth folding MI into an extended register. That is, 5738 /// if it's safe to pull it into the addressing mode of a load or store as a 5739 /// shift. 5740 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( 5741 MachineInstr &MI, const MachineRegisterInfo &MRI) const { 5742 // Always fold if there is one use, or if we're optimizing for size. 5743 Register DefReg = MI.getOperand(0).getReg(); 5744 if (MRI.hasOneNonDBGUse(DefReg) || 5745 MI.getParent()->getParent()->getFunction().hasOptSize()) 5746 return true; 5747 5748 // It's better to avoid folding and recomputing shifts when we don't have a 5749 // fastpath. 5750 if (!STI.hasLSLFast()) 5751 return false; 5752 5753 // We have a fastpath, so folding a shift in and potentially computing it 5754 // many times may be beneficial. Check if this is only used in memory ops. 5755 // If it is, then we should fold. 5756 return all_of(MRI.use_nodbg_instructions(DefReg), 5757 [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); 5758 } 5759 5760 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { 5761 switch (Type) { 5762 case AArch64_AM::SXTB: 5763 case AArch64_AM::SXTH: 5764 case AArch64_AM::SXTW: 5765 return true; 5766 default: 5767 return false; 5768 } 5769 } 5770 5771 InstructionSelector::ComplexRendererFns 5772 AArch64InstructionSelector::selectExtendedSHL( 5773 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, 5774 unsigned SizeInBytes, bool WantsExt) const { 5775 assert(Base.isReg() && "Expected base to be a register operand"); 5776 assert(Offset.isReg() && "Expected offset to be a register operand"); 5777 5778 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5779 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); 5780 if (!OffsetInst) 5781 return None; 5782 5783 unsigned OffsetOpc = OffsetInst->getOpcode(); 5784 bool LookedThroughZExt = false; 5785 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { 5786 // Try to look through a ZEXT. 5787 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) 5788 return None; 5789 5790 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg()); 5791 OffsetOpc = OffsetInst->getOpcode(); 5792 LookedThroughZExt = true; 5793 5794 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) 5795 return None; 5796 } 5797 // Make sure that the memory op is a valid size. 5798 int64_t LegalShiftVal = Log2_32(SizeInBytes); 5799 if (LegalShiftVal == 0) 5800 return None; 5801 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 5802 return None; 5803 5804 // Now, try to find the specific G_CONSTANT. Start by assuming that the 5805 // register we will offset is the LHS, and the register containing the 5806 // constant is the RHS. 5807 Register OffsetReg = OffsetInst->getOperand(1).getReg(); 5808 Register ConstantReg = OffsetInst->getOperand(2).getReg(); 5809 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI); 5810 if (!ValAndVReg) { 5811 // We didn't get a constant on the RHS. If the opcode is a shift, then 5812 // we're done. 5813 if (OffsetOpc == TargetOpcode::G_SHL) 5814 return None; 5815 5816 // If we have a G_MUL, we can use either register. Try looking at the RHS. 5817 std::swap(OffsetReg, ConstantReg); 5818 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI); 5819 if (!ValAndVReg) 5820 return None; 5821 } 5822 5823 // The value must fit into 3 bits, and must be positive. Make sure that is 5824 // true. 5825 int64_t ImmVal = ValAndVReg->Value.getSExtValue(); 5826 5827 // Since we're going to pull this into a shift, the constant value must be 5828 // a power of 2. If we got a multiply, then we need to check this. 5829 if (OffsetOpc == TargetOpcode::G_MUL) { 5830 if (!isPowerOf2_32(ImmVal)) 5831 return None; 5832 5833 // Got a power of 2. So, the amount we'll shift is the log base-2 of that. 5834 ImmVal = Log2_32(ImmVal); 5835 } 5836 5837 if ((ImmVal & 0x7) != ImmVal) 5838 return None; 5839 5840 // We are only allowed to shift by LegalShiftVal. This shift value is built 5841 // into the instruction, so we can't just use whatever we want. 5842 if (ImmVal != LegalShiftVal) 5843 return None; 5844 5845 unsigned SignExtend = 0; 5846 if (WantsExt) { 5847 // Check if the offset is defined by an extend, unless we looked through a 5848 // G_ZEXT earlier. 5849 if (!LookedThroughZExt) { 5850 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); 5851 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); 5852 if (Ext == AArch64_AM::InvalidShiftExtend) 5853 return None; 5854 5855 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; 5856 // We only support SXTW for signed extension here. 5857 if (SignExtend && Ext != AArch64_AM::SXTW) 5858 return None; 5859 OffsetReg = ExtInst->getOperand(1).getReg(); 5860 } 5861 5862 // Need a 32-bit wide register here. 5863 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); 5864 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); 5865 } 5866 5867 // We can use the LHS of the GEP as the base, and the LHS of the shift as an 5868 // offset. Signify that we are shifting by setting the shift flag to 1. 5869 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, 5870 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, 5871 [=](MachineInstrBuilder &MIB) { 5872 // Need to add both immediates here to make sure that they are both 5873 // added to the instruction. 5874 MIB.addImm(SignExtend); 5875 MIB.addImm(1); 5876 }}}; 5877 } 5878 5879 /// This is used for computing addresses like this: 5880 /// 5881 /// ldr x1, [x2, x3, lsl #3] 5882 /// 5883 /// Where x2 is the base register, and x3 is an offset register. The shift-left 5884 /// is a constant value specific to this load instruction. That is, we'll never 5885 /// see anything other than a 3 here (which corresponds to the size of the 5886 /// element being loaded.) 5887 InstructionSelector::ComplexRendererFns 5888 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( 5889 MachineOperand &Root, unsigned SizeInBytes) const { 5890 if (!Root.isReg()) 5891 return None; 5892 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5893 5894 // We want to find something like this: 5895 // 5896 // val = G_CONSTANT LegalShiftVal 5897 // shift = G_SHL off_reg val 5898 // ptr = G_PTR_ADD base_reg shift 5899 // x = G_LOAD ptr 5900 // 5901 // And fold it into this addressing mode: 5902 // 5903 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] 5904 5905 // Check if we can find the G_PTR_ADD. 5906 MachineInstr *PtrAdd = 5907 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5908 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 5909 return None; 5910 5911 // Now, try to match an opcode which will match our specific offset. 5912 // We want a G_SHL or a G_MUL. 5913 MachineInstr *OffsetInst = 5914 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); 5915 return selectExtendedSHL(Root, PtrAdd->getOperand(1), 5916 OffsetInst->getOperand(0), SizeInBytes, 5917 /*WantsExt=*/false); 5918 } 5919 5920 /// This is used for computing addresses like this: 5921 /// 5922 /// ldr x1, [x2, x3] 5923 /// 5924 /// Where x2 is the base register, and x3 is an offset register. 5925 /// 5926 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, 5927 /// this will do so. Otherwise, it will return None. 5928 InstructionSelector::ComplexRendererFns 5929 AArch64InstructionSelector::selectAddrModeRegisterOffset( 5930 MachineOperand &Root) const { 5931 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5932 5933 // We need a GEP. 5934 MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); 5935 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD) 5936 return None; 5937 5938 // If this is used more than once, let's not bother folding. 5939 // TODO: Check if they are memory ops. If they are, then we can still fold 5940 // without having to recompute anything. 5941 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg())) 5942 return None; 5943 5944 // Base is the GEP's LHS, offset is its RHS. 5945 return {{[=](MachineInstrBuilder &MIB) { 5946 MIB.addUse(Gep->getOperand(1).getReg()); 5947 }, 5948 [=](MachineInstrBuilder &MIB) { 5949 MIB.addUse(Gep->getOperand(2).getReg()); 5950 }, 5951 [=](MachineInstrBuilder &MIB) { 5952 // Need to add both immediates here to make sure that they are both 5953 // added to the instruction. 5954 MIB.addImm(0); 5955 MIB.addImm(0); 5956 }}}; 5957 } 5958 5959 /// This is intended to be equivalent to selectAddrModeXRO in 5960 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. 5961 InstructionSelector::ComplexRendererFns 5962 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, 5963 unsigned SizeInBytes) const { 5964 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5965 if (!Root.isReg()) 5966 return None; 5967 MachineInstr *PtrAdd = 5968 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5969 if (!PtrAdd) 5970 return None; 5971 5972 // Check for an immediates which cannot be encoded in the [base + imm] 5973 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll 5974 // end up with code like: 5975 // 5976 // mov x0, wide 5977 // add x1 base, x0 5978 // ldr x2, [x1, x0] 5979 // 5980 // In this situation, we can use the [base, xreg] addressing mode to save an 5981 // add/sub: 5982 // 5983 // mov x0, wide 5984 // ldr x2, [base, x0] 5985 auto ValAndVReg = 5986 getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); 5987 if (ValAndVReg) { 5988 unsigned Scale = Log2_32(SizeInBytes); 5989 int64_t ImmOff = ValAndVReg->Value.getSExtValue(); 5990 5991 // Skip immediates that can be selected in the load/store addresing 5992 // mode. 5993 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && 5994 ImmOff < (0x1000 << Scale)) 5995 return None; 5996 5997 // Helper lambda to decide whether or not it is preferable to emit an add. 5998 auto isPreferredADD = [](int64_t ImmOff) { 5999 // Constants in [0x0, 0xfff] can be encoded in an add. 6000 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 6001 return true; 6002 6003 // Can it be encoded in an add lsl #12? 6004 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) 6005 return false; 6006 6007 // It can be encoded in an add lsl #12, but we may not want to. If it is 6008 // possible to select this as a single movz, then prefer that. A single 6009 // movz is faster than an add with a shift. 6010 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 6011 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 6012 }; 6013 6014 // If the immediate can be encoded in a single add/sub, then bail out. 6015 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 6016 return None; 6017 } 6018 6019 // Try to fold shifts into the addressing mode. 6020 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); 6021 if (AddrModeFns) 6022 return AddrModeFns; 6023 6024 // If that doesn't work, see if it's possible to fold in registers from 6025 // a GEP. 6026 return selectAddrModeRegisterOffset(Root); 6027 } 6028 6029 /// This is used for computing addresses like this: 6030 /// 6031 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] 6032 /// 6033 /// Where we have a 64-bit base register, a 32-bit offset register, and an 6034 /// extend (which may or may not be signed). 6035 InstructionSelector::ComplexRendererFns 6036 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, 6037 unsigned SizeInBytes) const { 6038 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 6039 6040 MachineInstr *PtrAdd = 6041 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 6042 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 6043 return None; 6044 6045 MachineOperand &LHS = PtrAdd->getOperand(1); 6046 MachineOperand &RHS = PtrAdd->getOperand(2); 6047 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); 6048 6049 // The first case is the same as selectAddrModeXRO, except we need an extend. 6050 // In this case, we try to find a shift and extend, and fold them into the 6051 // addressing mode. 6052 // 6053 // E.g. 6054 // 6055 // off_reg = G_Z/S/ANYEXT ext_reg 6056 // val = G_CONSTANT LegalShiftVal 6057 // shift = G_SHL off_reg val 6058 // ptr = G_PTR_ADD base_reg shift 6059 // x = G_LOAD ptr 6060 // 6061 // In this case we can get a load like this: 6062 // 6063 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] 6064 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), 6065 SizeInBytes, /*WantsExt=*/true); 6066 if (ExtendedShl) 6067 return ExtendedShl; 6068 6069 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. 6070 // 6071 // e.g. 6072 // ldr something, [base_reg, ext_reg, sxtw] 6073 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 6074 return None; 6075 6076 // Check if this is an extend. We'll get an extend type if it is. 6077 AArch64_AM::ShiftExtendType Ext = 6078 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); 6079 if (Ext == AArch64_AM::InvalidShiftExtend) 6080 return None; 6081 6082 // Need a 32-bit wide register. 6083 MachineIRBuilder MIB(*PtrAdd); 6084 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), 6085 AArch64::GPR32RegClass, MIB); 6086 unsigned SignExtend = Ext == AArch64_AM::SXTW; 6087 6088 // Base is LHS, offset is ExtReg. 6089 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, 6090 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 6091 [=](MachineInstrBuilder &MIB) { 6092 MIB.addImm(SignExtend); 6093 MIB.addImm(0); 6094 }}}; 6095 } 6096 6097 /// Select a "register plus unscaled signed 9-bit immediate" address. This 6098 /// should only match when there is an offset that is not valid for a scaled 6099 /// immediate addressing mode. The "Size" argument is the size in bytes of the 6100 /// memory reference, which is needed here to know what is valid for a scaled 6101 /// immediate. 6102 InstructionSelector::ComplexRendererFns 6103 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, 6104 unsigned Size) const { 6105 MachineRegisterInfo &MRI = 6106 Root.getParent()->getParent()->getParent()->getRegInfo(); 6107 6108 if (!Root.isReg()) 6109 return None; 6110 6111 if (!isBaseWithConstantOffset(Root, MRI)) 6112 return None; 6113 6114 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 6115 if (!RootDef) 6116 return None; 6117 6118 MachineOperand &OffImm = RootDef->getOperand(2); 6119 if (!OffImm.isReg()) 6120 return None; 6121 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); 6122 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) 6123 return None; 6124 int64_t RHSC; 6125 MachineOperand &RHSOp1 = RHS->getOperand(1); 6126 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) 6127 return None; 6128 RHSC = RHSOp1.getCImm()->getSExtValue(); 6129 6130 // If the offset is valid as a scaled immediate, don't match here. 6131 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) 6132 return None; 6133 if (RHSC >= -256 && RHSC < 256) { 6134 MachineOperand &Base = RootDef->getOperand(1); 6135 return {{ 6136 [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, 6137 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, 6138 }}; 6139 } 6140 return None; 6141 } 6142 6143 InstructionSelector::ComplexRendererFns 6144 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, 6145 unsigned Size, 6146 MachineRegisterInfo &MRI) const { 6147 if (RootDef.getOpcode() != AArch64::G_ADD_LOW) 6148 return None; 6149 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); 6150 if (Adrp.getOpcode() != AArch64::ADRP) 6151 return None; 6152 6153 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. 6154 auto Offset = Adrp.getOperand(1).getOffset(); 6155 if (Offset % Size != 0) 6156 return None; 6157 6158 auto GV = Adrp.getOperand(1).getGlobal(); 6159 if (GV->isThreadLocal()) 6160 return None; 6161 6162 auto &MF = *RootDef.getParent()->getParent(); 6163 if (GV->getPointerAlignment(MF.getDataLayout()) < Size) 6164 return None; 6165 6166 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); 6167 MachineIRBuilder MIRBuilder(RootDef); 6168 Register AdrpReg = Adrp.getOperand(0).getReg(); 6169 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, 6170 [=](MachineInstrBuilder &MIB) { 6171 MIB.addGlobalAddress(GV, Offset, 6172 OpFlags | AArch64II::MO_PAGEOFF | 6173 AArch64II::MO_NC); 6174 }}}; 6175 } 6176 6177 /// Select a "register plus scaled unsigned 12-bit immediate" address. The 6178 /// "Size" argument is the size in bytes of the memory reference, which 6179 /// determines the scale. 6180 InstructionSelector::ComplexRendererFns 6181 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, 6182 unsigned Size) const { 6183 MachineFunction &MF = *Root.getParent()->getParent()->getParent(); 6184 MachineRegisterInfo &MRI = MF.getRegInfo(); 6185 6186 if (!Root.isReg()) 6187 return None; 6188 6189 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 6190 if (!RootDef) 6191 return None; 6192 6193 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 6194 return {{ 6195 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, 6196 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 6197 }}; 6198 } 6199 6200 CodeModel::Model CM = MF.getTarget().getCodeModel(); 6201 // Check if we can fold in the ADD of small code model ADRP + ADD address. 6202 if (CM == CodeModel::Small) { 6203 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI); 6204 if (OpFns) 6205 return OpFns; 6206 } 6207 6208 if (isBaseWithConstantOffset(Root, MRI)) { 6209 MachineOperand &LHS = RootDef->getOperand(1); 6210 MachineOperand &RHS = RootDef->getOperand(2); 6211 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 6212 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 6213 if (LHSDef && RHSDef) { 6214 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); 6215 unsigned Scale = Log2_32(Size); 6216 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 6217 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 6218 return {{ 6219 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, 6220 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 6221 }}; 6222 6223 return {{ 6224 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 6225 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 6226 }}; 6227 } 6228 } 6229 } 6230 6231 // Before falling back to our general case, check if the unscaled 6232 // instructions can handle this. If so, that's preferable. 6233 if (selectAddrModeUnscaled(Root, Size).hasValue()) 6234 return None; 6235 6236 return {{ 6237 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 6238 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 6239 }}; 6240 } 6241 6242 /// Given a shift instruction, return the correct shift type for that 6243 /// instruction. 6244 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { 6245 switch (MI.getOpcode()) { 6246 default: 6247 return AArch64_AM::InvalidShiftExtend; 6248 case TargetOpcode::G_SHL: 6249 return AArch64_AM::LSL; 6250 case TargetOpcode::G_LSHR: 6251 return AArch64_AM::LSR; 6252 case TargetOpcode::G_ASHR: 6253 return AArch64_AM::ASR; 6254 case TargetOpcode::G_ROTR: 6255 return AArch64_AM::ROR; 6256 } 6257 } 6258 6259 /// Select a "shifted register" operand. If the value is not shifted, set the 6260 /// shift operand to a default value of "lsl 0". 6261 InstructionSelector::ComplexRendererFns 6262 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root, 6263 bool AllowROR) const { 6264 if (!Root.isReg()) 6265 return None; 6266 MachineRegisterInfo &MRI = 6267 Root.getParent()->getParent()->getParent()->getRegInfo(); 6268 6269 // Check if the operand is defined by an instruction which corresponds to 6270 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. 6271 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); 6272 if (!ShiftInst) 6273 return None; 6274 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); 6275 if (ShType == AArch64_AM::InvalidShiftExtend) 6276 return None; 6277 if (ShType == AArch64_AM::ROR && !AllowROR) 6278 return None; 6279 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) 6280 return None; 6281 6282 // Need an immediate on the RHS. 6283 MachineOperand &ShiftRHS = ShiftInst->getOperand(2); 6284 auto Immed = getImmedFromMO(ShiftRHS); 6285 if (!Immed) 6286 return None; 6287 6288 // We have something that we can fold. Fold in the shift's LHS and RHS into 6289 // the instruction. 6290 MachineOperand &ShiftLHS = ShiftInst->getOperand(1); 6291 Register ShiftReg = ShiftLHS.getReg(); 6292 6293 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); 6294 unsigned Val = *Immed & (NumBits - 1); 6295 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); 6296 6297 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, 6298 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; 6299 } 6300 6301 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( 6302 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { 6303 unsigned Opc = MI.getOpcode(); 6304 6305 // Handle explicit extend instructions first. 6306 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { 6307 unsigned Size; 6308 if (Opc == TargetOpcode::G_SEXT) 6309 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 6310 else 6311 Size = MI.getOperand(2).getImm(); 6312 assert(Size != 64 && "Extend from 64 bits?"); 6313 switch (Size) { 6314 case 8: 6315 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB; 6316 case 16: 6317 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH; 6318 case 32: 6319 return AArch64_AM::SXTW; 6320 default: 6321 return AArch64_AM::InvalidShiftExtend; 6322 } 6323 } 6324 6325 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { 6326 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 6327 assert(Size != 64 && "Extend from 64 bits?"); 6328 switch (Size) { 6329 case 8: 6330 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB; 6331 case 16: 6332 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH; 6333 case 32: 6334 return AArch64_AM::UXTW; 6335 default: 6336 return AArch64_AM::InvalidShiftExtend; 6337 } 6338 } 6339 6340 // Don't have an explicit extend. Try to handle a G_AND with a constant mask 6341 // on the RHS. 6342 if (Opc != TargetOpcode::G_AND) 6343 return AArch64_AM::InvalidShiftExtend; 6344 6345 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); 6346 if (!MaybeAndMask) 6347 return AArch64_AM::InvalidShiftExtend; 6348 uint64_t AndMask = *MaybeAndMask; 6349 switch (AndMask) { 6350 default: 6351 return AArch64_AM::InvalidShiftExtend; 6352 case 0xFF: 6353 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 6354 case 0xFFFF: 6355 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 6356 case 0xFFFFFFFF: 6357 return AArch64_AM::UXTW; 6358 } 6359 } 6360 6361 Register AArch64InstructionSelector::moveScalarRegClass( 6362 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { 6363 MachineRegisterInfo &MRI = *MIB.getMRI(); 6364 auto Ty = MRI.getType(Reg); 6365 assert(!Ty.isVector() && "Expected scalars only!"); 6366 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) 6367 return Reg; 6368 6369 // Create a copy and immediately select it. 6370 // FIXME: We should have an emitCopy function? 6371 auto Copy = MIB.buildCopy({&RC}, {Reg}); 6372 selectCopy(*Copy, TII, MRI, TRI, RBI); 6373 return Copy.getReg(0); 6374 } 6375 6376 /// Select an "extended register" operand. This operand folds in an extend 6377 /// followed by an optional left shift. 6378 InstructionSelector::ComplexRendererFns 6379 AArch64InstructionSelector::selectArithExtendedRegister( 6380 MachineOperand &Root) const { 6381 if (!Root.isReg()) 6382 return None; 6383 MachineRegisterInfo &MRI = 6384 Root.getParent()->getParent()->getParent()->getRegInfo(); 6385 6386 uint64_t ShiftVal = 0; 6387 Register ExtReg; 6388 AArch64_AM::ShiftExtendType Ext; 6389 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); 6390 if (!RootDef) 6391 return None; 6392 6393 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) 6394 return None; 6395 6396 // Check if we can fold a shift and an extend. 6397 if (RootDef->getOpcode() == TargetOpcode::G_SHL) { 6398 // Look for a constant on the RHS of the shift. 6399 MachineOperand &RHS = RootDef->getOperand(2); 6400 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); 6401 if (!MaybeShiftVal) 6402 return None; 6403 ShiftVal = *MaybeShiftVal; 6404 if (ShiftVal > 4) 6405 return None; 6406 // Look for a valid extend instruction on the LHS of the shift. 6407 MachineOperand &LHS = RootDef->getOperand(1); 6408 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); 6409 if (!ExtDef) 6410 return None; 6411 Ext = getExtendTypeForInst(*ExtDef, MRI); 6412 if (Ext == AArch64_AM::InvalidShiftExtend) 6413 return None; 6414 ExtReg = ExtDef->getOperand(1).getReg(); 6415 } else { 6416 // Didn't get a shift. Try just folding an extend. 6417 Ext = getExtendTypeForInst(*RootDef, MRI); 6418 if (Ext == AArch64_AM::InvalidShiftExtend) 6419 return None; 6420 ExtReg = RootDef->getOperand(1).getReg(); 6421 6422 // If we have a 32 bit instruction which zeroes out the high half of a 6423 // register, we get an implicit zero extend for free. Check if we have one. 6424 // FIXME: We actually emit the extend right now even though we don't have 6425 // to. 6426 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { 6427 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); 6428 if (ExtInst && isDef32(*ExtInst)) 6429 return None; 6430 } 6431 } 6432 6433 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister 6434 // copy. 6435 MachineIRBuilder MIB(*RootDef); 6436 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); 6437 6438 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 6439 [=](MachineInstrBuilder &MIB) { 6440 MIB.addImm(getArithExtendImm(Ext, ShiftVal)); 6441 }}}; 6442 } 6443 6444 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, 6445 const MachineInstr &MI, 6446 int OpIdx) const { 6447 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 6448 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 6449 "Expected G_CONSTANT"); 6450 Optional<int64_t> CstVal = 6451 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); 6452 assert(CstVal && "Expected constant value"); 6453 MIB.addImm(CstVal.getValue()); 6454 } 6455 6456 void AArch64InstructionSelector::renderLogicalImm32( 6457 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 6458 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 6459 "Expected G_CONSTANT"); 6460 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 6461 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); 6462 MIB.addImm(Enc); 6463 } 6464 6465 void AArch64InstructionSelector::renderLogicalImm64( 6466 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 6467 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 6468 "Expected G_CONSTANT"); 6469 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 6470 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); 6471 MIB.addImm(Enc); 6472 } 6473 6474 void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB, 6475 const MachineInstr &MI, 6476 int OpIdx) const { 6477 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && 6478 "Expected G_FCONSTANT"); 6479 MIB.addImm( 6480 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF())); 6481 } 6482 6483 void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB, 6484 const MachineInstr &MI, 6485 int OpIdx) const { 6486 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && 6487 "Expected G_FCONSTANT"); 6488 MIB.addImm( 6489 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF())); 6490 } 6491 6492 void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, 6493 const MachineInstr &MI, 6494 int OpIdx) const { 6495 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && 6496 "Expected G_FCONSTANT"); 6497 MIB.addImm( 6498 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF())); 6499 } 6500 6501 bool AArch64InstructionSelector::isLoadStoreOfNumBytes( 6502 const MachineInstr &MI, unsigned NumBytes) const { 6503 if (!MI.mayLoadOrStore()) 6504 return false; 6505 assert(MI.hasOneMemOperand() && 6506 "Expected load/store to have only one mem op!"); 6507 return (*MI.memoperands_begin())->getSize() == NumBytes; 6508 } 6509 6510 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { 6511 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 6512 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) 6513 return false; 6514 6515 // Only return true if we know the operation will zero-out the high half of 6516 // the 64-bit register. Truncates can be subregister copies, which don't 6517 // zero out the high bits. Copies and other copy-like instructions can be 6518 // fed by truncates, or could be lowered as subregister copies. 6519 switch (MI.getOpcode()) { 6520 default: 6521 return true; 6522 case TargetOpcode::COPY: 6523 case TargetOpcode::G_BITCAST: 6524 case TargetOpcode::G_TRUNC: 6525 case TargetOpcode::G_PHI: 6526 return false; 6527 } 6528 } 6529 6530 6531 // Perform fixups on the given PHI instruction's operands to force them all 6532 // to be the same as the destination regbank. 6533 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, 6534 const AArch64RegisterBankInfo &RBI) { 6535 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI"); 6536 Register DstReg = MI.getOperand(0).getReg(); 6537 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg); 6538 assert(DstRB && "Expected PHI dst to have regbank assigned"); 6539 MachineIRBuilder MIB(MI); 6540 6541 // Go through each operand and ensure it has the same regbank. 6542 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) { 6543 if (!MO.isReg()) 6544 continue; 6545 Register OpReg = MO.getReg(); 6546 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg); 6547 if (RB != DstRB) { 6548 // Insert a cross-bank copy. 6549 auto *OpDef = MRI.getVRegDef(OpReg); 6550 const LLT &Ty = MRI.getType(OpReg); 6551 MachineBasicBlock &OpDefBB = *OpDef->getParent(); 6552 6553 // Any instruction we insert must appear after all PHIs in the block 6554 // for the block to be valid MIR. 6555 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator()); 6556 if (InsertPt != OpDefBB.end() && InsertPt->isPHI()) 6557 InsertPt = OpDefBB.getFirstNonPHI(); 6558 MIB.setInsertPt(*OpDef->getParent(), InsertPt); 6559 auto Copy = MIB.buildCopy(Ty, OpReg); 6560 MRI.setRegBank(Copy.getReg(0), *DstRB); 6561 MO.setReg(Copy.getReg(0)); 6562 } 6563 } 6564 } 6565 6566 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { 6567 // We're looking for PHIs, build a list so we don't invalidate iterators. 6568 MachineRegisterInfo &MRI = MF.getRegInfo(); 6569 SmallVector<MachineInstr *, 32> Phis; 6570 for (auto &BB : MF) { 6571 for (auto &MI : BB) { 6572 if (MI.getOpcode() == TargetOpcode::G_PHI) 6573 Phis.emplace_back(&MI); 6574 } 6575 } 6576 6577 for (auto *MI : Phis) { 6578 // We need to do some work here if the operand types are < 16 bit and they 6579 // are split across fpr/gpr banks. Since all types <32b on gpr 6580 // end up being assigned gpr32 regclasses, we can end up with PHIs here 6581 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't 6582 // be selecting heterogenous regbanks for operands if possible, but we 6583 // still need to be able to deal with it here. 6584 // 6585 // To fix this, if we have a gpr-bank operand < 32b in size and at least 6586 // one other operand is on the fpr bank, then we add cross-bank copies 6587 // to homogenize the operand banks. For simplicity the bank that we choose 6588 // to settle on is whatever bank the def operand has. For example: 6589 // 6590 // %endbb: 6591 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 6592 // => 6593 // %bb2: 6594 // ... 6595 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) 6596 // ... 6597 // %endbb: 6598 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 6599 bool HasGPROp = false, HasFPROp = false; 6600 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { 6601 if (!MO.isReg()) 6602 continue; 6603 const LLT &Ty = MRI.getType(MO.getReg()); 6604 if (!Ty.isValid() || !Ty.isScalar()) 6605 break; 6606 if (Ty.getSizeInBits() >= 32) 6607 break; 6608 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); 6609 // If for some reason we don't have a regbank yet. Don't try anything. 6610 if (!RB) 6611 break; 6612 6613 if (RB->getID() == AArch64::GPRRegBankID) 6614 HasGPROp = true; 6615 else 6616 HasFPROp = true; 6617 } 6618 // We have heterogenous regbanks, need to fixup. 6619 if (HasGPROp && HasFPROp) 6620 fixupPHIOpBanks(*MI, MRI, RBI); 6621 } 6622 } 6623 6624 namespace llvm { 6625 InstructionSelector * 6626 createAArch64InstructionSelector(const AArch64TargetMachine &TM, 6627 AArch64Subtarget &Subtarget, 6628 AArch64RegisterBankInfo &RBI) { 6629 return new AArch64InstructionSelector(TM, Subtarget, RBI); 6630 } 6631 } 6632