1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64InstrInfo.h" 15 #include "AArch64MachineFunctionInfo.h" 16 #include "AArch64RegisterBankInfo.h" 17 #include "AArch64RegisterInfo.h" 18 #include "AArch64Subtarget.h" 19 #include "AArch64TargetMachine.h" 20 #include "MCTargetDesc/AArch64AddressingModes.h" 21 #include "MCTargetDesc/AArch64MCTargetDesc.h" 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h" 25 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 26 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 27 #include "llvm/CodeGen/GlobalISel/Utils.h" 28 #include "llvm/CodeGen/MachineBasicBlock.h" 29 #include "llvm/CodeGen/MachineConstantPool.h" 30 #include "llvm/CodeGen/MachineFunction.h" 31 #include "llvm/CodeGen/MachineInstr.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineOperand.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/TargetOpcodes.h" 36 #include "llvm/IR/Constants.h" 37 #include "llvm/IR/Instructions.h" 38 #include "llvm/IR/PatternMatch.h" 39 #include "llvm/IR/Type.h" 40 #include "llvm/IR/IntrinsicsAArch64.h" 41 #include "llvm/Pass.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/raw_ostream.h" 44 45 #define DEBUG_TYPE "aarch64-isel" 46 47 using namespace llvm; 48 using namespace MIPatternMatch; 49 50 namespace { 51 52 #define GET_GLOBALISEL_PREDICATE_BITSET 53 #include "AArch64GenGlobalISel.inc" 54 #undef GET_GLOBALISEL_PREDICATE_BITSET 55 56 class AArch64InstructionSelector : public InstructionSelector { 57 public: 58 AArch64InstructionSelector(const AArch64TargetMachine &TM, 59 const AArch64Subtarget &STI, 60 const AArch64RegisterBankInfo &RBI); 61 62 bool select(MachineInstr &I) override; 63 static const char *getName() { return DEBUG_TYPE; } 64 65 void setupMF(MachineFunction &MF, GISelKnownBits &KB, 66 CodeGenCoverage &CoverageInfo) override { 67 InstructionSelector::setupMF(MF, KB, CoverageInfo); 68 69 // hasFnAttribute() is expensive to call on every BRCOND selection, so 70 // cache it here for each run of the selector. 71 ProduceNonFlagSettingCondBr = 72 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); 73 MFReturnAddr = Register(); 74 75 processPHIs(MF); 76 } 77 78 private: 79 /// tblgen-erated 'select' implementation, used as the initial selector for 80 /// the patterns that don't require complex C++. 81 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 82 83 // A lowering phase that runs before any selection attempts. 84 // Returns true if the instruction was modified. 85 bool preISelLower(MachineInstr &I); 86 87 // An early selection function that runs before the selectImpl() call. 88 bool earlySelect(MachineInstr &I) const; 89 90 // Do some preprocessing of G_PHIs before we begin selection. 91 void processPHIs(MachineFunction &MF); 92 93 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; 94 95 /// Eliminate same-sized cross-bank copies into stores before selectImpl(). 96 bool contractCrossBankCopyIntoStore(MachineInstr &I, 97 MachineRegisterInfo &MRI); 98 99 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); 100 101 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, 102 MachineRegisterInfo &MRI) const; 103 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, 104 MachineRegisterInfo &MRI) const; 105 106 ///@{ 107 /// Helper functions for selectCompareBranch. 108 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, 109 MachineIRBuilder &MIB) const; 110 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 111 MachineIRBuilder &MIB) const; 112 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 113 MachineIRBuilder &MIB) const; 114 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, 115 MachineBasicBlock *DstMBB, 116 MachineIRBuilder &MIB) const; 117 ///@} 118 119 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, 120 MachineRegisterInfo &MRI) const; 121 122 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI) const; 123 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI) const; 124 125 // Helper to generate an equivalent of scalar_to_vector into a new register, 126 // returned via 'Dst'. 127 MachineInstr *emitScalarToVector(unsigned EltSize, 128 const TargetRegisterClass *DstRC, 129 Register Scalar, 130 MachineIRBuilder &MIRBuilder) const; 131 132 /// Emit a lane insert into \p DstReg, or a new vector register if None is 133 /// provided. 134 /// 135 /// The lane inserted into is defined by \p LaneIdx. The vector source 136 /// register is given by \p SrcReg. The register containing the element is 137 /// given by \p EltReg. 138 MachineInstr *emitLaneInsert(Optional<Register> DstReg, Register SrcReg, 139 Register EltReg, unsigned LaneIdx, 140 const RegisterBank &RB, 141 MachineIRBuilder &MIRBuilder) const; 142 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI) const; 143 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, 144 MachineRegisterInfo &MRI) const; 145 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI) const; 146 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; 147 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI) const; 148 149 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI) const; 150 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI) const; 151 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI) const; 152 bool selectSplitVectorUnmerge(MachineInstr &I, 153 MachineRegisterInfo &MRI) const; 154 bool selectIntrinsicWithSideEffects(MachineInstr &I, 155 MachineRegisterInfo &MRI) const; 156 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); 157 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI) const; 158 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; 159 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; 160 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI) const; 161 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI) const; 162 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI) const; 163 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI) const; 164 165 unsigned emitConstantPoolEntry(const Constant *CPVal, 166 MachineFunction &MF) const; 167 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, 168 MachineIRBuilder &MIRBuilder) const; 169 170 // Emit a vector concat operation. 171 MachineInstr *emitVectorConcat(Optional<Register> Dst, Register Op1, 172 Register Op2, 173 MachineIRBuilder &MIRBuilder) const; 174 175 // Emit an integer compare between LHS and RHS, which checks for Predicate. 176 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 177 MachineOperand &Predicate, 178 MachineIRBuilder &MIRBuilder) const; 179 180 /// Emit a floating point comparison between \p LHS and \p RHS. 181 /// \p Pred if given is the intended predicate to use. 182 MachineInstr *emitFPCompare(Register LHS, Register RHS, 183 MachineIRBuilder &MIRBuilder, 184 Optional<CmpInst::Predicate> = None) const; 185 186 MachineInstr *emitInstr(unsigned Opcode, 187 std::initializer_list<llvm::DstOp> DstOps, 188 std::initializer_list<llvm::SrcOp> SrcOps, 189 MachineIRBuilder &MIRBuilder, 190 const ComplexRendererFns &RenderFns = None) const; 191 /// Helper function to emit an add or sub instruction. 192 /// 193 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above 194 /// in a specific order. 195 /// 196 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. 197 /// 198 /// \code 199 /// const std::array<std::array<unsigned, 2>, 4> Table { 200 /// {{AArch64::ADDXri, AArch64::ADDWri}, 201 /// {AArch64::ADDXrs, AArch64::ADDWrs}, 202 /// {AArch64::ADDXrr, AArch64::ADDWrr}, 203 /// {AArch64::SUBXri, AArch64::SUBWri}, 204 /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; 205 /// \endcode 206 /// 207 /// Each row in the table corresponds to a different addressing mode. Each 208 /// column corresponds to a different register size. 209 /// 210 /// \attention Rows must be structured as follows: 211 /// - Row 0: The ri opcode variants 212 /// - Row 1: The rs opcode variants 213 /// - Row 2: The rr opcode variants 214 /// - Row 3: The ri opcode variants for negative immediates 215 /// - Row 4: The rx opcode variants 216 /// 217 /// \attention Columns must be structured as follows: 218 /// - Column 0: The 64-bit opcode variants 219 /// - Column 1: The 32-bit opcode variants 220 /// 221 /// \p Dst is the destination register of the binop to emit. 222 /// \p LHS is the left-hand operand of the binop to emit. 223 /// \p RHS is the right-hand operand of the binop to emit. 224 MachineInstr *emitAddSub( 225 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 226 Register Dst, MachineOperand &LHS, MachineOperand &RHS, 227 MachineIRBuilder &MIRBuilder) const; 228 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, 229 MachineOperand &RHS, 230 MachineIRBuilder &MIRBuilder) const; 231 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 232 MachineIRBuilder &MIRBuilder) const; 233 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 234 MachineIRBuilder &MIRBuilder) const; 235 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, 236 MachineIRBuilder &MIRBuilder) const; 237 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, 238 MachineIRBuilder &MIRBuilder) const; 239 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, 240 AArch64CC::CondCode CC, 241 MachineIRBuilder &MIRBuilder) const; 242 MachineInstr *emitExtractVectorElt(Optional<Register> DstReg, 243 const RegisterBank &DstRB, LLT ScalarTy, 244 Register VecReg, unsigned LaneIdx, 245 MachineIRBuilder &MIRBuilder) const; 246 247 /// Helper function for selecting G_FCONSTANT. If the G_FCONSTANT can be 248 /// materialized using a FMOV instruction, then update MI and return it. 249 /// Otherwise, do nothing and return a nullptr. 250 MachineInstr *emitFMovForFConstant(MachineInstr &MI, 251 MachineRegisterInfo &MRI) const; 252 253 /// Emit a CSet for an integer compare. 254 /// 255 /// \p DefReg is expected to be a 32-bit scalar register. 256 MachineInstr *emitCSetForICMP(Register DefReg, unsigned Pred, 257 MachineIRBuilder &MIRBuilder) const; 258 /// Emit a CSet for a FP compare. 259 /// 260 /// \p Dst is expected to be a 32-bit scalar register. 261 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, 262 MachineIRBuilder &MIRBuilder) const; 263 264 /// Emit the overflow op for \p Opcode. 265 /// 266 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, 267 /// G_USUBO, etc. 268 std::pair<MachineInstr *, AArch64CC::CondCode> 269 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, 270 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; 271 272 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. 273 /// \p IsNegative is true if the test should be "not zero". 274 /// This will also optimize the test bit instruction when possible. 275 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, 276 MachineBasicBlock *DstMBB, 277 MachineIRBuilder &MIB) const; 278 279 /// Emit a CB(N)Z instruction which branches to \p DestMBB. 280 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, 281 MachineBasicBlock *DestMBB, 282 MachineIRBuilder &MIB) const; 283 284 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. 285 // We use these manually instead of using the importer since it doesn't 286 // support SDNodeXForm. 287 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; 288 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; 289 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; 290 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; 291 292 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; 293 ComplexRendererFns selectArithImmed(MachineOperand &Root) const; 294 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; 295 296 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, 297 unsigned Size) const; 298 299 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { 300 return selectAddrModeUnscaled(Root, 1); 301 } 302 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { 303 return selectAddrModeUnscaled(Root, 2); 304 } 305 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { 306 return selectAddrModeUnscaled(Root, 4); 307 } 308 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { 309 return selectAddrModeUnscaled(Root, 8); 310 } 311 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { 312 return selectAddrModeUnscaled(Root, 16); 313 } 314 315 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used 316 /// from complex pattern matchers like selectAddrModeIndexed(). 317 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, 318 MachineRegisterInfo &MRI) const; 319 320 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, 321 unsigned Size) const; 322 template <int Width> 323 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { 324 return selectAddrModeIndexed(Root, Width / 8); 325 } 326 327 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, 328 const MachineRegisterInfo &MRI) const; 329 ComplexRendererFns 330 selectAddrModeShiftedExtendXReg(MachineOperand &Root, 331 unsigned SizeInBytes) const; 332 333 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether 334 /// or not a shift + extend should be folded into an addressing mode. Returns 335 /// None when this is not profitable or possible. 336 ComplexRendererFns 337 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, 338 MachineOperand &Offset, unsigned SizeInBytes, 339 bool WantsExt) const; 340 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; 341 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, 342 unsigned SizeInBytes) const; 343 template <int Width> 344 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { 345 return selectAddrModeXRO(Root, Width / 8); 346 } 347 348 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, 349 unsigned SizeInBytes) const; 350 template <int Width> 351 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { 352 return selectAddrModeWRO(Root, Width / 8); 353 } 354 355 ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const; 356 357 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { 358 return selectShiftedRegister(Root); 359 } 360 361 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { 362 // TODO: selectShiftedRegister should allow for rotates on logical shifts. 363 // For now, make them the same. The only difference between the two is that 364 // logical shifts are allowed to fold in rotates. Otherwise, these are 365 // functionally the same. 366 return selectShiftedRegister(Root); 367 } 368 369 /// Given an extend instruction, determine the correct shift-extend type for 370 /// that instruction. 371 /// 372 /// If the instruction is going to be used in a load or store, pass 373 /// \p IsLoadStore = true. 374 AArch64_AM::ShiftExtendType 375 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, 376 bool IsLoadStore = false) const; 377 378 /// Move \p Reg to \p RC if \p Reg is not already on \p RC. 379 /// 380 /// \returns Either \p Reg if no change was necessary, or the new register 381 /// created by moving \p Reg. 382 /// 383 /// Note: This uses emitCopy right now. 384 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, 385 MachineIRBuilder &MIB) const; 386 387 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; 388 389 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 390 int OpIdx = -1) const; 391 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, 392 int OpIdx = -1) const; 393 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, 394 int OpIdx = -1) const; 395 396 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. 397 void materializeLargeCMVal(MachineInstr &I, const Value *V, 398 unsigned OpFlags) const; 399 400 // Optimization methods. 401 bool tryOptSelect(MachineInstr &MI) const; 402 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 403 MachineOperand &Predicate, 404 MachineIRBuilder &MIRBuilder) const; 405 406 /// Return true if \p MI is a load or store of \p NumBytes bytes. 407 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; 408 409 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit 410 /// register zeroed out. In other words, the result of MI has been explicitly 411 /// zero extended. 412 bool isDef32(const MachineInstr &MI) const; 413 414 const AArch64TargetMachine &TM; 415 const AArch64Subtarget &STI; 416 const AArch64InstrInfo &TII; 417 const AArch64RegisterInfo &TRI; 418 const AArch64RegisterBankInfo &RBI; 419 420 bool ProduceNonFlagSettingCondBr = false; 421 422 // Some cached values used during selection. 423 // We use LR as a live-in register, and we keep track of it here as it can be 424 // clobbered by calls. 425 Register MFReturnAddr; 426 427 #define GET_GLOBALISEL_PREDICATES_DECL 428 #include "AArch64GenGlobalISel.inc" 429 #undef GET_GLOBALISEL_PREDICATES_DECL 430 431 // We declare the temporaries used by selectImpl() in the class to minimize the 432 // cost of constructing placeholder values. 433 #define GET_GLOBALISEL_TEMPORARIES_DECL 434 #include "AArch64GenGlobalISel.inc" 435 #undef GET_GLOBALISEL_TEMPORARIES_DECL 436 }; 437 438 } // end anonymous namespace 439 440 #define GET_GLOBALISEL_IMPL 441 #include "AArch64GenGlobalISel.inc" 442 #undef GET_GLOBALISEL_IMPL 443 444 AArch64InstructionSelector::AArch64InstructionSelector( 445 const AArch64TargetMachine &TM, const AArch64Subtarget &STI, 446 const AArch64RegisterBankInfo &RBI) 447 : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()), 448 TRI(*STI.getRegisterInfo()), RBI(RBI), 449 #define GET_GLOBALISEL_PREDICATES_INIT 450 #include "AArch64GenGlobalISel.inc" 451 #undef GET_GLOBALISEL_PREDICATES_INIT 452 #define GET_GLOBALISEL_TEMPORARIES_INIT 453 #include "AArch64GenGlobalISel.inc" 454 #undef GET_GLOBALISEL_TEMPORARIES_INIT 455 { 456 } 457 458 // FIXME: This should be target-independent, inferred from the types declared 459 // for each class in the bank. 460 static const TargetRegisterClass * 461 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, 462 const RegisterBankInfo &RBI, 463 bool GetAllRegSet = false) { 464 if (RB.getID() == AArch64::GPRRegBankID) { 465 if (Ty.getSizeInBits() <= 32) 466 return GetAllRegSet ? &AArch64::GPR32allRegClass 467 : &AArch64::GPR32RegClass; 468 if (Ty.getSizeInBits() == 64) 469 return GetAllRegSet ? &AArch64::GPR64allRegClass 470 : &AArch64::GPR64RegClass; 471 return nullptr; 472 } 473 474 if (RB.getID() == AArch64::FPRRegBankID) { 475 if (Ty.getSizeInBits() <= 16) 476 return &AArch64::FPR16RegClass; 477 if (Ty.getSizeInBits() == 32) 478 return &AArch64::FPR32RegClass; 479 if (Ty.getSizeInBits() == 64) 480 return &AArch64::FPR64RegClass; 481 if (Ty.getSizeInBits() == 128) 482 return &AArch64::FPR128RegClass; 483 return nullptr; 484 } 485 486 return nullptr; 487 } 488 489 /// Given a register bank, and size in bits, return the smallest register class 490 /// that can represent that combination. 491 static const TargetRegisterClass * 492 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, 493 bool GetAllRegSet = false) { 494 unsigned RegBankID = RB.getID(); 495 496 if (RegBankID == AArch64::GPRRegBankID) { 497 if (SizeInBits <= 32) 498 return GetAllRegSet ? &AArch64::GPR32allRegClass 499 : &AArch64::GPR32RegClass; 500 if (SizeInBits == 64) 501 return GetAllRegSet ? &AArch64::GPR64allRegClass 502 : &AArch64::GPR64RegClass; 503 } 504 505 if (RegBankID == AArch64::FPRRegBankID) { 506 switch (SizeInBits) { 507 default: 508 return nullptr; 509 case 8: 510 return &AArch64::FPR8RegClass; 511 case 16: 512 return &AArch64::FPR16RegClass; 513 case 32: 514 return &AArch64::FPR32RegClass; 515 case 64: 516 return &AArch64::FPR64RegClass; 517 case 128: 518 return &AArch64::FPR128RegClass; 519 } 520 } 521 522 return nullptr; 523 } 524 525 /// Returns the correct subregister to use for a given register class. 526 static bool getSubRegForClass(const TargetRegisterClass *RC, 527 const TargetRegisterInfo &TRI, unsigned &SubReg) { 528 switch (TRI.getRegSizeInBits(*RC)) { 529 case 8: 530 SubReg = AArch64::bsub; 531 break; 532 case 16: 533 SubReg = AArch64::hsub; 534 break; 535 case 32: 536 if (RC != &AArch64::FPR32RegClass) 537 SubReg = AArch64::sub_32; 538 else 539 SubReg = AArch64::ssub; 540 break; 541 case 64: 542 SubReg = AArch64::dsub; 543 break; 544 default: 545 LLVM_DEBUG( 546 dbgs() << "Couldn't find appropriate subregister for register class."); 547 return false; 548 } 549 550 return true; 551 } 552 553 /// Returns the minimum size the given register bank can hold. 554 static unsigned getMinSizeForRegBank(const RegisterBank &RB) { 555 switch (RB.getID()) { 556 case AArch64::GPRRegBankID: 557 return 32; 558 case AArch64::FPRRegBankID: 559 return 8; 560 default: 561 llvm_unreachable("Tried to get minimum size for unknown register bank."); 562 } 563 } 564 565 static Optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { 566 auto &MI = *Root.getParent(); 567 auto &MBB = *MI.getParent(); 568 auto &MF = *MBB.getParent(); 569 auto &MRI = MF.getRegInfo(); 570 uint64_t Immed; 571 if (Root.isImm()) 572 Immed = Root.getImm(); 573 else if (Root.isCImm()) 574 Immed = Root.getCImm()->getZExtValue(); 575 else if (Root.isReg()) { 576 auto ValAndVReg = 577 getConstantVRegValWithLookThrough(Root.getReg(), MRI, true); 578 if (!ValAndVReg) 579 return None; 580 Immed = ValAndVReg->Value.getSExtValue(); 581 } else 582 return None; 583 return Immed; 584 } 585 586 /// Check whether \p I is a currently unsupported binary operation: 587 /// - it has an unsized type 588 /// - an operand is not a vreg 589 /// - all operands are not in the same bank 590 /// These are checks that should someday live in the verifier, but right now, 591 /// these are mostly limitations of the aarch64 selector. 592 static bool unsupportedBinOp(const MachineInstr &I, 593 const AArch64RegisterBankInfo &RBI, 594 const MachineRegisterInfo &MRI, 595 const AArch64RegisterInfo &TRI) { 596 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 597 if (!Ty.isValid()) { 598 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n"); 599 return true; 600 } 601 602 const RegisterBank *PrevOpBank = nullptr; 603 for (auto &MO : I.operands()) { 604 // FIXME: Support non-register operands. 605 if (!MO.isReg()) { 606 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n"); 607 return true; 608 } 609 610 // FIXME: Can generic operations have physical registers operands? If 611 // so, this will need to be taught about that, and we'll need to get the 612 // bank out of the minimal class for the register. 613 // Either way, this needs to be documented (and possibly verified). 614 if (!Register::isVirtualRegister(MO.getReg())) { 615 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n"); 616 return true; 617 } 618 619 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); 620 if (!OpBank) { 621 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n"); 622 return true; 623 } 624 625 if (PrevOpBank && OpBank != PrevOpBank) { 626 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n"); 627 return true; 628 } 629 PrevOpBank = OpBank; 630 } 631 return false; 632 } 633 634 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc 635 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID 636 /// and of size \p OpSize. 637 /// \returns \p GenericOpc if the combination is unsupported. 638 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, 639 unsigned OpSize) { 640 switch (RegBankID) { 641 case AArch64::GPRRegBankID: 642 if (OpSize == 32) { 643 switch (GenericOpc) { 644 case TargetOpcode::G_SHL: 645 return AArch64::LSLVWr; 646 case TargetOpcode::G_LSHR: 647 return AArch64::LSRVWr; 648 case TargetOpcode::G_ASHR: 649 return AArch64::ASRVWr; 650 default: 651 return GenericOpc; 652 } 653 } else if (OpSize == 64) { 654 switch (GenericOpc) { 655 case TargetOpcode::G_PTR_ADD: 656 return AArch64::ADDXrr; 657 case TargetOpcode::G_SHL: 658 return AArch64::LSLVXr; 659 case TargetOpcode::G_LSHR: 660 return AArch64::LSRVXr; 661 case TargetOpcode::G_ASHR: 662 return AArch64::ASRVXr; 663 default: 664 return GenericOpc; 665 } 666 } 667 break; 668 case AArch64::FPRRegBankID: 669 switch (OpSize) { 670 case 32: 671 switch (GenericOpc) { 672 case TargetOpcode::G_FADD: 673 return AArch64::FADDSrr; 674 case TargetOpcode::G_FSUB: 675 return AArch64::FSUBSrr; 676 case TargetOpcode::G_FMUL: 677 return AArch64::FMULSrr; 678 case TargetOpcode::G_FDIV: 679 return AArch64::FDIVSrr; 680 default: 681 return GenericOpc; 682 } 683 case 64: 684 switch (GenericOpc) { 685 case TargetOpcode::G_FADD: 686 return AArch64::FADDDrr; 687 case TargetOpcode::G_FSUB: 688 return AArch64::FSUBDrr; 689 case TargetOpcode::G_FMUL: 690 return AArch64::FMULDrr; 691 case TargetOpcode::G_FDIV: 692 return AArch64::FDIVDrr; 693 case TargetOpcode::G_OR: 694 return AArch64::ORRv8i8; 695 default: 696 return GenericOpc; 697 } 698 } 699 break; 700 } 701 return GenericOpc; 702 } 703 704 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, 705 /// appropriate for the (value) register bank \p RegBankID and of memory access 706 /// size \p OpSize. This returns the variant with the base+unsigned-immediate 707 /// addressing mode (e.g., LDRXui). 708 /// \returns \p GenericOpc if the combination is unsupported. 709 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, 710 unsigned OpSize) { 711 const bool isStore = GenericOpc == TargetOpcode::G_STORE; 712 switch (RegBankID) { 713 case AArch64::GPRRegBankID: 714 switch (OpSize) { 715 case 8: 716 return isStore ? AArch64::STRBBui : AArch64::LDRBBui; 717 case 16: 718 return isStore ? AArch64::STRHHui : AArch64::LDRHHui; 719 case 32: 720 return isStore ? AArch64::STRWui : AArch64::LDRWui; 721 case 64: 722 return isStore ? AArch64::STRXui : AArch64::LDRXui; 723 } 724 break; 725 case AArch64::FPRRegBankID: 726 switch (OpSize) { 727 case 8: 728 return isStore ? AArch64::STRBui : AArch64::LDRBui; 729 case 16: 730 return isStore ? AArch64::STRHui : AArch64::LDRHui; 731 case 32: 732 return isStore ? AArch64::STRSui : AArch64::LDRSui; 733 case 64: 734 return isStore ? AArch64::STRDui : AArch64::LDRDui; 735 } 736 break; 737 } 738 return GenericOpc; 739 } 740 741 #ifndef NDEBUG 742 /// Helper function that verifies that we have a valid copy at the end of 743 /// selectCopy. Verifies that the source and dest have the expected sizes and 744 /// then returns true. 745 static bool isValidCopy(const MachineInstr &I, const RegisterBank &DstBank, 746 const MachineRegisterInfo &MRI, 747 const TargetRegisterInfo &TRI, 748 const RegisterBankInfo &RBI) { 749 const Register DstReg = I.getOperand(0).getReg(); 750 const Register SrcReg = I.getOperand(1).getReg(); 751 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 752 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 753 754 // Make sure the size of the source and dest line up. 755 assert( 756 (DstSize == SrcSize || 757 // Copies are a mean to setup initial types, the number of 758 // bits may not exactly match. 759 (Register::isPhysicalRegister(SrcReg) && DstSize <= SrcSize) || 760 // Copies are a mean to copy bits around, as long as we are 761 // on the same register class, that's fine. Otherwise, that 762 // means we need some SUBREG_TO_REG or AND & co. 763 (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) && 764 "Copy with different width?!"); 765 766 // Check the size of the destination. 767 assert((DstSize <= 64 || DstBank.getID() == AArch64::FPRRegBankID) && 768 "GPRs cannot get more than 64-bit width values"); 769 770 return true; 771 } 772 #endif 773 774 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg 775 /// to \p *To. 776 /// 777 /// E.g "To = COPY SrcReg:SubReg" 778 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, 779 const RegisterBankInfo &RBI, Register SrcReg, 780 const TargetRegisterClass *To, unsigned SubReg) { 781 assert(SrcReg.isValid() && "Expected a valid source register?"); 782 assert(To && "Destination register class cannot be null"); 783 assert(SubReg && "Expected a valid subregister"); 784 785 MachineIRBuilder MIB(I); 786 auto SubRegCopy = 787 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg); 788 MachineOperand &RegOp = I.getOperand(1); 789 RegOp.setReg(SubRegCopy.getReg(0)); 790 791 // It's possible that the destination register won't be constrained. Make 792 // sure that happens. 793 if (!Register::isPhysicalRegister(I.getOperand(0).getReg())) 794 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); 795 796 return true; 797 } 798 799 /// Helper function to get the source and destination register classes for a 800 /// copy. Returns a std::pair containing the source register class for the 801 /// copy, and the destination register class for the copy. If a register class 802 /// cannot be determined, then it will be nullptr. 803 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 804 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, 805 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 806 const RegisterBankInfo &RBI) { 807 Register DstReg = I.getOperand(0).getReg(); 808 Register SrcReg = I.getOperand(1).getReg(); 809 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 810 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 811 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 812 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 813 814 // Special casing for cross-bank copies of s1s. We can technically represent 815 // a 1-bit value with any size of register. The minimum size for a GPR is 32 816 // bits. So, we need to put the FPR on 32 bits as well. 817 // 818 // FIXME: I'm not sure if this case holds true outside of copies. If it does, 819 // then we can pull it into the helpers that get the appropriate class for a 820 // register bank. Or make a new helper that carries along some constraint 821 // information. 822 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) 823 SrcSize = DstSize = 32; 824 825 return {getMinClassForRegBank(SrcRegBank, SrcSize, true), 826 getMinClassForRegBank(DstRegBank, DstSize, true)}; 827 } 828 829 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, 830 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 831 const RegisterBankInfo &RBI) { 832 Register DstReg = I.getOperand(0).getReg(); 833 Register SrcReg = I.getOperand(1).getReg(); 834 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 835 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 836 837 // Find the correct register classes for the source and destination registers. 838 const TargetRegisterClass *SrcRC; 839 const TargetRegisterClass *DstRC; 840 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); 841 842 if (!DstRC) { 843 LLVM_DEBUG(dbgs() << "Unexpected dest size " 844 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); 845 return false; 846 } 847 848 // A couple helpers below, for making sure that the copy we produce is valid. 849 850 // Set to true if we insert a SUBREG_TO_REG. If we do this, then we don't want 851 // to verify that the src and dst are the same size, since that's handled by 852 // the SUBREG_TO_REG. 853 bool KnownValid = false; 854 855 // Returns true, or asserts if something we don't expect happens. Instead of 856 // returning true, we return isValidCopy() to ensure that we verify the 857 // result. 858 auto CheckCopy = [&]() { 859 // If we have a bitcast or something, we can't have physical registers. 860 assert((I.isCopy() || 861 (!Register::isPhysicalRegister(I.getOperand(0).getReg()) && 862 !Register::isPhysicalRegister(I.getOperand(1).getReg()))) && 863 "No phys reg on generic operator!"); 864 bool ValidCopy = true; 865 #ifndef NDEBUG 866 ValidCopy = KnownValid || isValidCopy(I, DstRegBank, MRI, TRI, RBI); 867 assert(ValidCopy && "Invalid copy."); 868 (void)KnownValid; 869 #endif 870 return ValidCopy; 871 }; 872 873 // Is this a copy? If so, then we may need to insert a subregister copy. 874 if (I.isCopy()) { 875 // Yes. Check if there's anything to fix up. 876 if (!SrcRC) { 877 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n"); 878 return false; 879 } 880 881 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); 882 unsigned DstSize = TRI.getRegSizeInBits(*DstRC); 883 unsigned SubReg; 884 885 // If the source bank doesn't support a subregister copy small enough, 886 // then we first need to copy to the destination bank. 887 if (getMinSizeForRegBank(SrcRegBank) > DstSize) { 888 const TargetRegisterClass *DstTempRC = 889 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true); 890 getSubRegForClass(DstRC, TRI, SubReg); 891 892 MachineIRBuilder MIB(I); 893 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg}); 894 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); 895 } else if (SrcSize > DstSize) { 896 // If the source register is bigger than the destination we need to 897 // perform a subregister copy. 898 const TargetRegisterClass *SubRegRC = 899 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 900 getSubRegForClass(SubRegRC, TRI, SubReg); 901 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); 902 } else if (DstSize > SrcSize) { 903 // If the destination register is bigger than the source we need to do 904 // a promotion using SUBREG_TO_REG. 905 const TargetRegisterClass *PromotionRC = 906 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 907 getSubRegForClass(SrcRC, TRI, SubReg); 908 909 Register PromoteReg = MRI.createVirtualRegister(PromotionRC); 910 BuildMI(*I.getParent(), I, I.getDebugLoc(), 911 TII.get(AArch64::SUBREG_TO_REG), PromoteReg) 912 .addImm(0) 913 .addUse(SrcReg) 914 .addImm(SubReg); 915 MachineOperand &RegOp = I.getOperand(1); 916 RegOp.setReg(PromoteReg); 917 918 // Promise that the copy is implicitly validated by the SUBREG_TO_REG. 919 KnownValid = true; 920 } 921 922 // If the destination is a physical register, then there's nothing to 923 // change, so we're done. 924 if (Register::isPhysicalRegister(DstReg)) 925 return CheckCopy(); 926 } 927 928 // No need to constrain SrcReg. It will get constrained when we hit another 929 // of its use or its defs. Copies do not have constraints. 930 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 931 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 932 << " operand\n"); 933 return false; 934 } 935 I.setDesc(TII.get(AArch64::COPY)); 936 return CheckCopy(); 937 } 938 939 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { 940 if (!DstTy.isScalar() || !SrcTy.isScalar()) 941 return GenericOpc; 942 943 const unsigned DstSize = DstTy.getSizeInBits(); 944 const unsigned SrcSize = SrcTy.getSizeInBits(); 945 946 switch (DstSize) { 947 case 32: 948 switch (SrcSize) { 949 case 32: 950 switch (GenericOpc) { 951 case TargetOpcode::G_SITOFP: 952 return AArch64::SCVTFUWSri; 953 case TargetOpcode::G_UITOFP: 954 return AArch64::UCVTFUWSri; 955 case TargetOpcode::G_FPTOSI: 956 return AArch64::FCVTZSUWSr; 957 case TargetOpcode::G_FPTOUI: 958 return AArch64::FCVTZUUWSr; 959 default: 960 return GenericOpc; 961 } 962 case 64: 963 switch (GenericOpc) { 964 case TargetOpcode::G_SITOFP: 965 return AArch64::SCVTFUXSri; 966 case TargetOpcode::G_UITOFP: 967 return AArch64::UCVTFUXSri; 968 case TargetOpcode::G_FPTOSI: 969 return AArch64::FCVTZSUWDr; 970 case TargetOpcode::G_FPTOUI: 971 return AArch64::FCVTZUUWDr; 972 default: 973 return GenericOpc; 974 } 975 default: 976 return GenericOpc; 977 } 978 case 64: 979 switch (SrcSize) { 980 case 32: 981 switch (GenericOpc) { 982 case TargetOpcode::G_SITOFP: 983 return AArch64::SCVTFUWDri; 984 case TargetOpcode::G_UITOFP: 985 return AArch64::UCVTFUWDri; 986 case TargetOpcode::G_FPTOSI: 987 return AArch64::FCVTZSUXSr; 988 case TargetOpcode::G_FPTOUI: 989 return AArch64::FCVTZUUXSr; 990 default: 991 return GenericOpc; 992 } 993 case 64: 994 switch (GenericOpc) { 995 case TargetOpcode::G_SITOFP: 996 return AArch64::SCVTFUXDri; 997 case TargetOpcode::G_UITOFP: 998 return AArch64::UCVTFUXDri; 999 case TargetOpcode::G_FPTOSI: 1000 return AArch64::FCVTZSUXDr; 1001 case TargetOpcode::G_FPTOUI: 1002 return AArch64::FCVTZUUXDr; 1003 default: 1004 return GenericOpc; 1005 } 1006 default: 1007 return GenericOpc; 1008 } 1009 default: 1010 return GenericOpc; 1011 }; 1012 return GenericOpc; 1013 } 1014 1015 MachineInstr * 1016 AArch64InstructionSelector::emitSelect(Register Dst, Register True, 1017 Register False, AArch64CC::CondCode CC, 1018 MachineIRBuilder &MIB) const { 1019 MachineRegisterInfo &MRI = *MIB.getMRI(); 1020 assert(RBI.getRegBank(False, MRI, TRI)->getID() == 1021 RBI.getRegBank(True, MRI, TRI)->getID() && 1022 "Expected both select operands to have the same regbank?"); 1023 LLT Ty = MRI.getType(True); 1024 if (Ty.isVector()) 1025 return nullptr; 1026 const unsigned Size = Ty.getSizeInBits(); 1027 assert((Size == 32 || Size == 64) && 1028 "Expected 32 bit or 64 bit select only?"); 1029 const bool Is32Bit = Size == 32; 1030 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { 1031 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; 1032 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 1033 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); 1034 return &*FCSel; 1035 } 1036 1037 // By default, we'll try and emit a CSEL. 1038 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; 1039 bool Optimized = false; 1040 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, 1041 &Optimized](Register &Reg, Register &OtherReg, 1042 bool Invert) { 1043 if (Optimized) 1044 return false; 1045 1046 // Attempt to fold: 1047 // 1048 // %sub = G_SUB 0, %x 1049 // %select = G_SELECT cc, %reg, %sub 1050 // 1051 // Into: 1052 // %select = CSNEG %reg, %x, cc 1053 Register MatchReg; 1054 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { 1055 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; 1056 Reg = MatchReg; 1057 if (Invert) { 1058 CC = AArch64CC::getInvertedCondCode(CC); 1059 std::swap(Reg, OtherReg); 1060 } 1061 return true; 1062 } 1063 1064 // Attempt to fold: 1065 // 1066 // %xor = G_XOR %x, -1 1067 // %select = G_SELECT cc, %reg, %xor 1068 // 1069 // Into: 1070 // %select = CSINV %reg, %x, cc 1071 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { 1072 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1073 Reg = MatchReg; 1074 if (Invert) { 1075 CC = AArch64CC::getInvertedCondCode(CC); 1076 std::swap(Reg, OtherReg); 1077 } 1078 return true; 1079 } 1080 1081 // Attempt to fold: 1082 // 1083 // %add = G_ADD %x, 1 1084 // %select = G_SELECT cc, %reg, %add 1085 // 1086 // Into: 1087 // %select = CSINC %reg, %x, cc 1088 if (mi_match(Reg, MRI, m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)))) { 1089 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1090 Reg = MatchReg; 1091 if (Invert) { 1092 CC = AArch64CC::getInvertedCondCode(CC); 1093 std::swap(Reg, OtherReg); 1094 } 1095 return true; 1096 } 1097 1098 return false; 1099 }; 1100 1101 // Helper lambda which tries to use CSINC/CSINV for the instruction when its 1102 // true/false values are constants. 1103 // FIXME: All of these patterns already exist in tablegen. We should be 1104 // able to import these. 1105 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, 1106 &Optimized]() { 1107 if (Optimized) 1108 return false; 1109 auto TrueCst = getConstantVRegValWithLookThrough(True, MRI); 1110 auto FalseCst = getConstantVRegValWithLookThrough(False, MRI); 1111 if (!TrueCst && !FalseCst) 1112 return false; 1113 1114 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 1115 if (TrueCst && FalseCst) { 1116 int64_t T = TrueCst->Value.getSExtValue(); 1117 int64_t F = FalseCst->Value.getSExtValue(); 1118 1119 if (T == 0 && F == 1) { 1120 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc 1121 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1122 True = ZReg; 1123 False = ZReg; 1124 return true; 1125 } 1126 1127 if (T == 0 && F == -1) { 1128 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc 1129 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1130 True = ZReg; 1131 False = ZReg; 1132 return true; 1133 } 1134 } 1135 1136 if (TrueCst) { 1137 int64_t T = TrueCst->Value.getSExtValue(); 1138 if (T == 1) { 1139 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc 1140 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1141 True = False; 1142 False = ZReg; 1143 CC = AArch64CC::getInvertedCondCode(CC); 1144 return true; 1145 } 1146 1147 if (T == -1) { 1148 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc 1149 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1150 True = False; 1151 False = ZReg; 1152 CC = AArch64CC::getInvertedCondCode(CC); 1153 return true; 1154 } 1155 } 1156 1157 if (FalseCst) { 1158 int64_t F = FalseCst->Value.getSExtValue(); 1159 if (F == 1) { 1160 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc 1161 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1162 False = ZReg; 1163 return true; 1164 } 1165 1166 if (F == -1) { 1167 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc 1168 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1169 False = ZReg; 1170 return true; 1171 } 1172 } 1173 return false; 1174 }; 1175 1176 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); 1177 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); 1178 Optimized |= TryOptSelectCst(); 1179 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 1180 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); 1181 return &*SelectInst; 1182 } 1183 1184 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { 1185 switch (P) { 1186 default: 1187 llvm_unreachable("Unknown condition code!"); 1188 case CmpInst::ICMP_NE: 1189 return AArch64CC::NE; 1190 case CmpInst::ICMP_EQ: 1191 return AArch64CC::EQ; 1192 case CmpInst::ICMP_SGT: 1193 return AArch64CC::GT; 1194 case CmpInst::ICMP_SGE: 1195 return AArch64CC::GE; 1196 case CmpInst::ICMP_SLT: 1197 return AArch64CC::LT; 1198 case CmpInst::ICMP_SLE: 1199 return AArch64CC::LE; 1200 case CmpInst::ICMP_UGT: 1201 return AArch64CC::HI; 1202 case CmpInst::ICMP_UGE: 1203 return AArch64CC::HS; 1204 case CmpInst::ICMP_ULT: 1205 return AArch64CC::LO; 1206 case CmpInst::ICMP_ULE: 1207 return AArch64CC::LS; 1208 } 1209 } 1210 1211 static void changeFCMPPredToAArch64CC(CmpInst::Predicate P, 1212 AArch64CC::CondCode &CondCode, 1213 AArch64CC::CondCode &CondCode2) { 1214 CondCode2 = AArch64CC::AL; 1215 switch (P) { 1216 default: 1217 llvm_unreachable("Unknown FP condition!"); 1218 case CmpInst::FCMP_OEQ: 1219 CondCode = AArch64CC::EQ; 1220 break; 1221 case CmpInst::FCMP_OGT: 1222 CondCode = AArch64CC::GT; 1223 break; 1224 case CmpInst::FCMP_OGE: 1225 CondCode = AArch64CC::GE; 1226 break; 1227 case CmpInst::FCMP_OLT: 1228 CondCode = AArch64CC::MI; 1229 break; 1230 case CmpInst::FCMP_OLE: 1231 CondCode = AArch64CC::LS; 1232 break; 1233 case CmpInst::FCMP_ONE: 1234 CondCode = AArch64CC::MI; 1235 CondCode2 = AArch64CC::GT; 1236 break; 1237 case CmpInst::FCMP_ORD: 1238 CondCode = AArch64CC::VC; 1239 break; 1240 case CmpInst::FCMP_UNO: 1241 CondCode = AArch64CC::VS; 1242 break; 1243 case CmpInst::FCMP_UEQ: 1244 CondCode = AArch64CC::EQ; 1245 CondCode2 = AArch64CC::VS; 1246 break; 1247 case CmpInst::FCMP_UGT: 1248 CondCode = AArch64CC::HI; 1249 break; 1250 case CmpInst::FCMP_UGE: 1251 CondCode = AArch64CC::PL; 1252 break; 1253 case CmpInst::FCMP_ULT: 1254 CondCode = AArch64CC::LT; 1255 break; 1256 case CmpInst::FCMP_ULE: 1257 CondCode = AArch64CC::LE; 1258 break; 1259 case CmpInst::FCMP_UNE: 1260 CondCode = AArch64CC::NE; 1261 break; 1262 } 1263 } 1264 1265 /// Return a register which can be used as a bit to test in a TB(N)Z. 1266 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, 1267 MachineRegisterInfo &MRI) { 1268 assert(Reg.isValid() && "Expected valid register!"); 1269 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { 1270 unsigned Opc = MI->getOpcode(); 1271 1272 if (!MI->getOperand(0).isReg() || 1273 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1274 break; 1275 1276 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. 1277 // 1278 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number 1279 // on the truncated x is the same as the bit number on x. 1280 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || 1281 Opc == TargetOpcode::G_TRUNC) { 1282 Register NextReg = MI->getOperand(1).getReg(); 1283 // Did we find something worth folding? 1284 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg)) 1285 break; 1286 1287 // NextReg is worth folding. Keep looking. 1288 Reg = NextReg; 1289 continue; 1290 } 1291 1292 // Attempt to find a suitable operation with a constant on one side. 1293 Optional<uint64_t> C; 1294 Register TestReg; 1295 switch (Opc) { 1296 default: 1297 break; 1298 case TargetOpcode::G_AND: 1299 case TargetOpcode::G_XOR: { 1300 TestReg = MI->getOperand(1).getReg(); 1301 Register ConstantReg = MI->getOperand(2).getReg(); 1302 auto VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); 1303 if (!VRegAndVal) { 1304 // AND commutes, check the other side for a constant. 1305 // FIXME: Can we canonicalize the constant so that it's always on the 1306 // same side at some point earlier? 1307 std::swap(ConstantReg, TestReg); 1308 VRegAndVal = getConstantVRegValWithLookThrough(ConstantReg, MRI); 1309 } 1310 if (VRegAndVal) 1311 C = VRegAndVal->Value.getSExtValue(); 1312 break; 1313 } 1314 case TargetOpcode::G_ASHR: 1315 case TargetOpcode::G_LSHR: 1316 case TargetOpcode::G_SHL: { 1317 TestReg = MI->getOperand(1).getReg(); 1318 auto VRegAndVal = 1319 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 1320 if (VRegAndVal) 1321 C = VRegAndVal->Value.getSExtValue(); 1322 break; 1323 } 1324 } 1325 1326 // Didn't find a constant or viable register. Bail out of the loop. 1327 if (!C || !TestReg.isValid()) 1328 break; 1329 1330 // We found a suitable instruction with a constant. Check to see if we can 1331 // walk through the instruction. 1332 Register NextReg; 1333 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits(); 1334 switch (Opc) { 1335 default: 1336 break; 1337 case TargetOpcode::G_AND: 1338 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. 1339 if ((*C >> Bit) & 1) 1340 NextReg = TestReg; 1341 break; 1342 case TargetOpcode::G_SHL: 1343 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in 1344 // the type of the register. 1345 if (*C <= Bit && (Bit - *C) < TestRegSize) { 1346 NextReg = TestReg; 1347 Bit = Bit - *C; 1348 } 1349 break; 1350 case TargetOpcode::G_ASHR: 1351 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits 1352 // in x 1353 NextReg = TestReg; 1354 Bit = Bit + *C; 1355 if (Bit >= TestRegSize) 1356 Bit = TestRegSize - 1; 1357 break; 1358 case TargetOpcode::G_LSHR: 1359 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x 1360 if ((Bit + *C) < TestRegSize) { 1361 NextReg = TestReg; 1362 Bit = Bit + *C; 1363 } 1364 break; 1365 case TargetOpcode::G_XOR: 1366 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when 1367 // appropriate. 1368 // 1369 // e.g. If x' = xor x, c, and the b-th bit is set in c then 1370 // 1371 // tbz x', b -> tbnz x, b 1372 // 1373 // Because x' only has the b-th bit set if x does not. 1374 if ((*C >> Bit) & 1) 1375 Invert = !Invert; 1376 NextReg = TestReg; 1377 break; 1378 } 1379 1380 // Check if we found anything worth folding. 1381 if (!NextReg.isValid()) 1382 return Reg; 1383 Reg = NextReg; 1384 } 1385 1386 return Reg; 1387 } 1388 1389 MachineInstr *AArch64InstructionSelector::emitTestBit( 1390 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, 1391 MachineIRBuilder &MIB) const { 1392 assert(TestReg.isValid()); 1393 assert(ProduceNonFlagSettingCondBr && 1394 "Cannot emit TB(N)Z with speculation tracking!"); 1395 MachineRegisterInfo &MRI = *MIB.getMRI(); 1396 1397 // Attempt to optimize the test bit by walking over instructions. 1398 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI); 1399 LLT Ty = MRI.getType(TestReg); 1400 unsigned Size = Ty.getSizeInBits(); 1401 assert(!Ty.isVector() && "Expected a scalar!"); 1402 assert(Bit < 64 && "Bit is too large!"); 1403 1404 // When the test register is a 64-bit register, we have to narrow to make 1405 // TBNZW work. 1406 bool UseWReg = Bit < 32; 1407 unsigned NecessarySize = UseWReg ? 32 : 64; 1408 if (Size != NecessarySize) 1409 TestReg = moveScalarRegClass( 1410 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, 1411 MIB); 1412 1413 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, 1414 {AArch64::TBZW, AArch64::TBNZW}}; 1415 unsigned Opc = OpcTable[UseWReg][IsNegative]; 1416 auto TestBitMI = 1417 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); 1418 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); 1419 return &*TestBitMI; 1420 } 1421 1422 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( 1423 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, 1424 MachineIRBuilder &MIB) const { 1425 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"); 1426 // Given something like this: 1427 // 1428 // %x = ...Something... 1429 // %one = G_CONSTANT i64 1 1430 // %zero = G_CONSTANT i64 0 1431 // %and = G_AND %x, %one 1432 // %cmp = G_ICMP intpred(ne), %and, %zero 1433 // %cmp_trunc = G_TRUNC %cmp 1434 // G_BRCOND %cmp_trunc, %bb.3 1435 // 1436 // We want to try and fold the AND into the G_BRCOND and produce either a 1437 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). 1438 // 1439 // In this case, we'd get 1440 // 1441 // TBNZ %x %bb.3 1442 // 1443 1444 // Check if the AND has a constant on its RHS which we can use as a mask. 1445 // If it's a power of 2, then it's the same as checking a specific bit. 1446 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) 1447 auto MaybeBit = getConstantVRegValWithLookThrough( 1448 AndInst.getOperand(2).getReg(), *MIB.getMRI()); 1449 if (!MaybeBit) 1450 return false; 1451 1452 int32_t Bit = MaybeBit->Value.exactLogBase2(); 1453 if (Bit < 0) 1454 return false; 1455 1456 Register TestReg = AndInst.getOperand(1).getReg(); 1457 1458 // Emit a TB(N)Z. 1459 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB); 1460 return true; 1461 } 1462 1463 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, 1464 bool IsNegative, 1465 MachineBasicBlock *DestMBB, 1466 MachineIRBuilder &MIB) const { 1467 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!"); 1468 MachineRegisterInfo &MRI = *MIB.getMRI(); 1469 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == 1470 AArch64::GPRRegBankID && 1471 "Expected GPRs only?"); 1472 auto Ty = MRI.getType(CompareReg); 1473 unsigned Width = Ty.getSizeInBits(); 1474 assert(!Ty.isVector() && "Expected scalar only?"); 1475 assert(Width <= 64 && "Expected width to be at most 64?"); 1476 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, 1477 {AArch64::CBNZW, AArch64::CBNZX}}; 1478 unsigned Opc = OpcTable[IsNegative][Width == 64]; 1479 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); 1480 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); 1481 return &*BranchMI; 1482 } 1483 1484 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( 1485 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { 1486 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); 1487 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1488 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't 1489 // totally clean. Some of them require two branches to implement. 1490 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); 1491 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, 1492 Pred); 1493 AArch64CC::CondCode CC1, CC2; 1494 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); 1495 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1496 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); 1497 if (CC2 != AArch64CC::AL) 1498 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); 1499 I.eraseFromParent(); 1500 return true; 1501 } 1502 1503 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( 1504 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 1505 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 1506 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1507 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. 1508 // 1509 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1510 // instructions will not be produced, as they are conditional branch 1511 // instructions that do not set flags. 1512 if (!ProduceNonFlagSettingCondBr) 1513 return false; 1514 1515 MachineRegisterInfo &MRI = *MIB.getMRI(); 1516 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1517 auto Pred = 1518 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); 1519 Register LHS = ICmp.getOperand(2).getReg(); 1520 Register RHS = ICmp.getOperand(3).getReg(); 1521 1522 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. 1523 auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 1524 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 1525 1526 // When we can emit a TB(N)Z, prefer that. 1527 // 1528 // Handle non-commutative condition codes first. 1529 // Note that we don't want to do this when we have a G_AND because it can 1530 // become a tst. The tst will make the test bit in the TB(N)Z redundant. 1531 if (VRegAndVal && !AndInst) { 1532 int64_t C = VRegAndVal->Value.getSExtValue(); 1533 1534 // When we have a greater-than comparison, we can just test if the msb is 1535 // zero. 1536 if (C == -1 && Pred == CmpInst::ICMP_SGT) { 1537 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1538 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); 1539 I.eraseFromParent(); 1540 return true; 1541 } 1542 1543 // When we have a less than comparison, we can just test if the msb is not 1544 // zero. 1545 if (C == 0 && Pred == CmpInst::ICMP_SLT) { 1546 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1547 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB); 1548 I.eraseFromParent(); 1549 return true; 1550 } 1551 } 1552 1553 // Attempt to handle commutative condition codes. Right now, that's only 1554 // eq/ne. 1555 if (ICmpInst::isEquality(Pred)) { 1556 if (!VRegAndVal) { 1557 std::swap(RHS, LHS); 1558 VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI); 1559 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 1560 } 1561 1562 if (VRegAndVal && VRegAndVal->Value == 0) { 1563 // If there's a G_AND feeding into this branch, try to fold it away by 1564 // emitting a TB(N)Z instead. 1565 // 1566 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be 1567 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding 1568 // would be redundant. 1569 if (AndInst && 1570 tryOptAndIntoCompareBranch( 1571 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { 1572 I.eraseFromParent(); 1573 return true; 1574 } 1575 1576 // Otherwise, try to emit a CB(N)Z instead. 1577 auto LHSTy = MRI.getType(LHS); 1578 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { 1579 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); 1580 I.eraseFromParent(); 1581 return true; 1582 } 1583 } 1584 } 1585 1586 return false; 1587 } 1588 1589 bool AArch64InstructionSelector::selectCompareBranchFedByICmp( 1590 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 1591 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 1592 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1593 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) 1594 return true; 1595 1596 // Couldn't optimize. Emit a compare + a Bcc. 1597 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1598 auto PredOp = ICmp.getOperand(1); 1599 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); 1600 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( 1601 static_cast<CmpInst::Predicate>(PredOp.getPredicate())); 1602 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); 1603 I.eraseFromParent(); 1604 return true; 1605 } 1606 1607 bool AArch64InstructionSelector::selectCompareBranch( 1608 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1609 Register CondReg = I.getOperand(0).getReg(); 1610 MachineInstr *CCMI = MRI.getVRegDef(CondReg); 1611 if (CCMI->getOpcode() == TargetOpcode::G_TRUNC) { 1612 CondReg = CCMI->getOperand(1).getReg(); 1613 CCMI = MRI.getVRegDef(CondReg); 1614 } 1615 1616 // Try to select the G_BRCOND using whatever is feeding the condition if 1617 // possible. 1618 MachineIRBuilder MIB(I); 1619 unsigned CCMIOpc = CCMI->getOpcode(); 1620 if (CCMIOpc == TargetOpcode::G_FCMP) 1621 return selectCompareBranchFedByFCmp(I, *CCMI, MIB); 1622 if (CCMIOpc == TargetOpcode::G_ICMP) 1623 return selectCompareBranchFedByICmp(I, *CCMI, MIB); 1624 1625 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1626 // instructions will not be produced, as they are conditional branch 1627 // instructions that do not set flags. 1628 if (ProduceNonFlagSettingCondBr) { 1629 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, 1630 I.getOperand(1).getMBB(), MIB); 1631 I.eraseFromParent(); 1632 return true; 1633 } 1634 1635 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. 1636 auto TstMI = 1637 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); 1638 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 1639 auto Bcc = MIB.buildInstr(AArch64::Bcc) 1640 .addImm(AArch64CC::EQ) 1641 .addMBB(I.getOperand(1).getMBB()); 1642 I.eraseFromParent(); 1643 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); 1644 } 1645 1646 /// Returns the element immediate value of a vector shift operand if found. 1647 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. 1648 static Optional<int64_t> getVectorShiftImm(Register Reg, 1649 MachineRegisterInfo &MRI) { 1650 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"); 1651 MachineInstr *OpMI = MRI.getVRegDef(Reg); 1652 assert(OpMI && "Expected to find a vreg def for vector shift operand"); 1653 if (OpMI->getOpcode() != TargetOpcode::G_BUILD_VECTOR) 1654 return None; 1655 1656 // Check all operands are identical immediates. 1657 int64_t ImmVal = 0; 1658 for (unsigned Idx = 1; Idx < OpMI->getNumOperands(); ++Idx) { 1659 auto VRegAndVal = getConstantVRegValWithLookThrough(OpMI->getOperand(Idx).getReg(), MRI); 1660 if (!VRegAndVal) 1661 return None; 1662 1663 if (Idx == 1) 1664 ImmVal = VRegAndVal->Value.getSExtValue(); 1665 if (ImmVal != VRegAndVal->Value.getSExtValue()) 1666 return None; 1667 } 1668 1669 return ImmVal; 1670 } 1671 1672 /// Matches and returns the shift immediate value for a SHL instruction given 1673 /// a shift operand. 1674 static Optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, MachineRegisterInfo &MRI) { 1675 Optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); 1676 if (!ShiftImm) 1677 return None; 1678 // Check the immediate is in range for a SHL. 1679 int64_t Imm = *ShiftImm; 1680 if (Imm < 0) 1681 return None; 1682 switch (SrcTy.getElementType().getSizeInBits()) { 1683 default: 1684 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift"); 1685 return None; 1686 case 8: 1687 if (Imm > 7) 1688 return None; 1689 break; 1690 case 16: 1691 if (Imm > 15) 1692 return None; 1693 break; 1694 case 32: 1695 if (Imm > 31) 1696 return None; 1697 break; 1698 case 64: 1699 if (Imm > 63) 1700 return None; 1701 break; 1702 } 1703 return Imm; 1704 } 1705 1706 bool AArch64InstructionSelector::selectVectorSHL( 1707 MachineInstr &I, MachineRegisterInfo &MRI) const { 1708 assert(I.getOpcode() == TargetOpcode::G_SHL); 1709 Register DstReg = I.getOperand(0).getReg(); 1710 const LLT Ty = MRI.getType(DstReg); 1711 Register Src1Reg = I.getOperand(1).getReg(); 1712 Register Src2Reg = I.getOperand(2).getReg(); 1713 1714 if (!Ty.isVector()) 1715 return false; 1716 1717 // Check if we have a vector of constants on RHS that we can select as the 1718 // immediate form. 1719 Optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); 1720 1721 unsigned Opc = 0; 1722 if (Ty == LLT::vector(2, 64)) { 1723 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; 1724 } else if (Ty == LLT::vector(4, 32)) { 1725 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; 1726 } else if (Ty == LLT::vector(2, 32)) { 1727 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; 1728 } else if (Ty == LLT::vector(4, 16)) { 1729 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; 1730 } else if (Ty == LLT::vector(8, 16)) { 1731 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; 1732 } else if (Ty == LLT::vector(16, 8)) { 1733 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; 1734 } else if (Ty == LLT::vector(8, 8)) { 1735 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; 1736 } else { 1737 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); 1738 return false; 1739 } 1740 1741 MachineIRBuilder MIB(I); 1742 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); 1743 if (ImmVal) 1744 Shl.addImm(*ImmVal); 1745 else 1746 Shl.addUse(Src2Reg); 1747 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); 1748 I.eraseFromParent(); 1749 return true; 1750 } 1751 1752 bool AArch64InstructionSelector::selectVectorAshrLshr( 1753 MachineInstr &I, MachineRegisterInfo &MRI) const { 1754 assert(I.getOpcode() == TargetOpcode::G_ASHR || 1755 I.getOpcode() == TargetOpcode::G_LSHR); 1756 Register DstReg = I.getOperand(0).getReg(); 1757 const LLT Ty = MRI.getType(DstReg); 1758 Register Src1Reg = I.getOperand(1).getReg(); 1759 Register Src2Reg = I.getOperand(2).getReg(); 1760 1761 if (!Ty.isVector()) 1762 return false; 1763 1764 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; 1765 1766 // We expect the immediate case to be lowered in the PostLegalCombiner to 1767 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. 1768 1769 // There is not a shift right register instruction, but the shift left 1770 // register instruction takes a signed value, where negative numbers specify a 1771 // right shift. 1772 1773 unsigned Opc = 0; 1774 unsigned NegOpc = 0; 1775 const TargetRegisterClass *RC = 1776 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID), RBI); 1777 if (Ty == LLT::vector(2, 64)) { 1778 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; 1779 NegOpc = AArch64::NEGv2i64; 1780 } else if (Ty == LLT::vector(4, 32)) { 1781 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; 1782 NegOpc = AArch64::NEGv4i32; 1783 } else if (Ty == LLT::vector(2, 32)) { 1784 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; 1785 NegOpc = AArch64::NEGv2i32; 1786 } else if (Ty == LLT::vector(4, 16)) { 1787 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; 1788 NegOpc = AArch64::NEGv4i16; 1789 } else if (Ty == LLT::vector(8, 16)) { 1790 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; 1791 NegOpc = AArch64::NEGv8i16; 1792 } else if (Ty == LLT::vector(16, 8)) { 1793 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; 1794 NegOpc = AArch64::NEGv16i8; 1795 } else if (Ty == LLT::vector(8, 8)) { 1796 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; 1797 NegOpc = AArch64::NEGv8i8; 1798 } else { 1799 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); 1800 return false; 1801 } 1802 1803 MachineIRBuilder MIB(I); 1804 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); 1805 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); 1806 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); 1807 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); 1808 I.eraseFromParent(); 1809 return true; 1810 } 1811 1812 bool AArch64InstructionSelector::selectVaStartAAPCS( 1813 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1814 return false; 1815 } 1816 1817 bool AArch64InstructionSelector::selectVaStartDarwin( 1818 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1819 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 1820 Register ListReg = I.getOperand(0).getReg(); 1821 1822 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1823 1824 auto MIB = 1825 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) 1826 .addDef(ArgsAddrReg) 1827 .addFrameIndex(FuncInfo->getVarArgsStackIndex()) 1828 .addImm(0) 1829 .addImm(0); 1830 1831 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1832 1833 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) 1834 .addUse(ArgsAddrReg) 1835 .addUse(ListReg) 1836 .addImm(0) 1837 .addMemOperand(*I.memoperands_begin()); 1838 1839 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1840 I.eraseFromParent(); 1841 return true; 1842 } 1843 1844 void AArch64InstructionSelector::materializeLargeCMVal( 1845 MachineInstr &I, const Value *V, unsigned OpFlags) const { 1846 MachineBasicBlock &MBB = *I.getParent(); 1847 MachineFunction &MF = *MBB.getParent(); 1848 MachineRegisterInfo &MRI = MF.getRegInfo(); 1849 MachineIRBuilder MIB(I); 1850 1851 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); 1852 MovZ->addOperand(MF, I.getOperand(1)); 1853 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | 1854 AArch64II::MO_NC); 1855 MovZ->addOperand(MF, MachineOperand::CreateImm(0)); 1856 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); 1857 1858 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, 1859 Register ForceDstReg) { 1860 Register DstReg = ForceDstReg 1861 ? ForceDstReg 1862 : MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1863 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); 1864 if (auto *GV = dyn_cast<GlobalValue>(V)) { 1865 MovI->addOperand(MF, MachineOperand::CreateGA( 1866 GV, MovZ->getOperand(1).getOffset(), Flags)); 1867 } else { 1868 MovI->addOperand( 1869 MF, MachineOperand::CreateBA(cast<BlockAddress>(V), 1870 MovZ->getOperand(1).getOffset(), Flags)); 1871 } 1872 MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); 1873 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); 1874 return DstReg; 1875 }; 1876 Register DstReg = BuildMovK(MovZ.getReg(0), 1877 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); 1878 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); 1879 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); 1880 } 1881 1882 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { 1883 MachineBasicBlock &MBB = *I.getParent(); 1884 MachineFunction &MF = *MBB.getParent(); 1885 MachineRegisterInfo &MRI = MF.getRegInfo(); 1886 1887 switch (I.getOpcode()) { 1888 case TargetOpcode::G_SHL: 1889 case TargetOpcode::G_ASHR: 1890 case TargetOpcode::G_LSHR: { 1891 // These shifts are legalized to have 64 bit shift amounts because we want 1892 // to take advantage of the existing imported selection patterns that assume 1893 // the immediates are s64s. However, if the shifted type is 32 bits and for 1894 // some reason we receive input GMIR that has an s64 shift amount that's not 1895 // a G_CONSTANT, insert a truncate so that we can still select the s32 1896 // register-register variant. 1897 Register SrcReg = I.getOperand(1).getReg(); 1898 Register ShiftReg = I.getOperand(2).getReg(); 1899 const LLT ShiftTy = MRI.getType(ShiftReg); 1900 const LLT SrcTy = MRI.getType(SrcReg); 1901 if (SrcTy.isVector()) 1902 return false; 1903 assert(!ShiftTy.isVector() && "unexpected vector shift ty"); 1904 if (SrcTy.getSizeInBits() != 32 || ShiftTy.getSizeInBits() != 64) 1905 return false; 1906 auto *AmtMI = MRI.getVRegDef(ShiftReg); 1907 assert(AmtMI && "could not find a vreg definition for shift amount"); 1908 if (AmtMI->getOpcode() != TargetOpcode::G_CONSTANT) { 1909 // Insert a subregister copy to implement a 64->32 trunc 1910 MachineIRBuilder MIB(I); 1911 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) 1912 .addReg(ShiftReg, 0, AArch64::sub_32); 1913 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 1914 I.getOperand(2).setReg(Trunc.getReg(0)); 1915 } 1916 return true; 1917 } 1918 case TargetOpcode::G_STORE: 1919 return contractCrossBankCopyIntoStore(I, MRI); 1920 case TargetOpcode::G_PTR_ADD: 1921 return convertPtrAddToAdd(I, MRI); 1922 case TargetOpcode::G_LOAD: { 1923 // For scalar loads of pointers, we try to convert the dest type from p0 1924 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD 1925 // conversion, this should be ok because all users should have been 1926 // selected already, so the type doesn't matter for them. 1927 Register DstReg = I.getOperand(0).getReg(); 1928 const LLT DstTy = MRI.getType(DstReg); 1929 if (!DstTy.isPointer()) 1930 return false; 1931 MRI.setType(DstReg, LLT::scalar(64)); 1932 return true; 1933 } 1934 case AArch64::G_DUP: { 1935 // Convert the type from p0 to s64 to help selection. 1936 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1937 if (!DstTy.getElementType().isPointer()) 1938 return false; 1939 MachineIRBuilder MIB(I); 1940 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); 1941 MRI.setType(I.getOperand(0).getReg(), 1942 DstTy.changeElementType(LLT::scalar(64))); 1943 MRI.setRegBank(NewSrc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 1944 I.getOperand(1).setReg(NewSrc.getReg(0)); 1945 return true; 1946 } 1947 case TargetOpcode::G_UITOFP: 1948 case TargetOpcode::G_SITOFP: { 1949 // If both source and destination regbanks are FPR, then convert the opcode 1950 // to G_SITOF so that the importer can select it to an fpr variant. 1951 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank 1952 // copy. 1953 Register SrcReg = I.getOperand(1).getReg(); 1954 LLT SrcTy = MRI.getType(SrcReg); 1955 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 1956 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) 1957 return false; 1958 1959 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { 1960 if (I.getOpcode() == TargetOpcode::G_SITOFP) 1961 I.setDesc(TII.get(AArch64::G_SITOF)); 1962 else 1963 I.setDesc(TII.get(AArch64::G_UITOF)); 1964 return true; 1965 } 1966 return false; 1967 } 1968 default: 1969 return false; 1970 } 1971 } 1972 1973 /// This lowering tries to look for G_PTR_ADD instructions and then converts 1974 /// them to a standard G_ADD with a COPY on the source. 1975 /// 1976 /// The motivation behind this is to expose the add semantics to the imported 1977 /// tablegen patterns. We shouldn't need to check for uses being loads/stores, 1978 /// because the selector works bottom up, uses before defs. By the time we 1979 /// end up trying to select a G_PTR_ADD, we should have already attempted to 1980 /// fold this into addressing modes and were therefore unsuccessful. 1981 bool AArch64InstructionSelector::convertPtrAddToAdd( 1982 MachineInstr &I, MachineRegisterInfo &MRI) { 1983 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 1984 Register DstReg = I.getOperand(0).getReg(); 1985 Register AddOp1Reg = I.getOperand(1).getReg(); 1986 const LLT PtrTy = MRI.getType(DstReg); 1987 if (PtrTy.getAddressSpace() != 0) 1988 return false; 1989 1990 MachineIRBuilder MIB(I); 1991 const LLT CastPtrTy = PtrTy.isVector() ? LLT::vector(2, 64) : LLT::scalar(64); 1992 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg); 1993 // Set regbanks on the registers. 1994 if (PtrTy.isVector()) 1995 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); 1996 else 1997 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 1998 1999 // Now turn the %dst(p0) = G_PTR_ADD %base, off into: 2000 // %dst(intty) = G_ADD %intbase, off 2001 I.setDesc(TII.get(TargetOpcode::G_ADD)); 2002 MRI.setType(DstReg, CastPtrTy); 2003 I.getOperand(1).setReg(PtrToInt.getReg(0)); 2004 if (!select(*PtrToInt)) { 2005 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"); 2006 return false; 2007 } 2008 2009 // Also take the opportunity here to try to do some optimization. 2010 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. 2011 Register NegatedReg; 2012 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) 2013 return true; 2014 I.getOperand(2).setReg(NegatedReg); 2015 I.setDesc(TII.get(TargetOpcode::G_SUB)); 2016 return true; 2017 } 2018 2019 bool AArch64InstructionSelector::earlySelectSHL( 2020 MachineInstr &I, MachineRegisterInfo &MRI) const { 2021 // We try to match the immediate variant of LSL, which is actually an alias 2022 // for a special case of UBFM. Otherwise, we fall back to the imported 2023 // selector which will match the register variant. 2024 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"); 2025 const auto &MO = I.getOperand(2); 2026 auto VRegAndVal = getConstantVRegVal(MO.getReg(), MRI); 2027 if (!VRegAndVal) 2028 return false; 2029 2030 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2031 if (DstTy.isVector()) 2032 return false; 2033 bool Is64Bit = DstTy.getSizeInBits() == 64; 2034 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); 2035 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); 2036 MachineIRBuilder MIB(I); 2037 2038 if (!Imm1Fn || !Imm2Fn) 2039 return false; 2040 2041 auto NewI = 2042 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, 2043 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); 2044 2045 for (auto &RenderFn : *Imm1Fn) 2046 RenderFn(NewI); 2047 for (auto &RenderFn : *Imm2Fn) 2048 RenderFn(NewI); 2049 2050 I.eraseFromParent(); 2051 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 2052 } 2053 2054 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( 2055 MachineInstr &I, MachineRegisterInfo &MRI) { 2056 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); 2057 // If we're storing a scalar, it doesn't matter what register bank that 2058 // scalar is on. All that matters is the size. 2059 // 2060 // So, if we see something like this (with a 32-bit scalar as an example): 2061 // 2062 // %x:gpr(s32) = ... something ... 2063 // %y:fpr(s32) = COPY %x:gpr(s32) 2064 // G_STORE %y:fpr(s32) 2065 // 2066 // We can fix this up into something like this: 2067 // 2068 // G_STORE %x:gpr(s32) 2069 // 2070 // And then continue the selection process normally. 2071 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); 2072 if (!DefDstReg.isValid()) 2073 return false; 2074 LLT DefDstTy = MRI.getType(DefDstReg); 2075 Register StoreSrcReg = I.getOperand(0).getReg(); 2076 LLT StoreSrcTy = MRI.getType(StoreSrcReg); 2077 2078 // If we get something strange like a physical register, then we shouldn't 2079 // go any further. 2080 if (!DefDstTy.isValid()) 2081 return false; 2082 2083 // Are the source and dst types the same size? 2084 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) 2085 return false; 2086 2087 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == 2088 RBI.getRegBank(DefDstReg, MRI, TRI)) 2089 return false; 2090 2091 // We have a cross-bank copy, which is entering a store. Let's fold it. 2092 I.getOperand(0).setReg(DefDstReg); 2093 return true; 2094 } 2095 2096 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const { 2097 assert(I.getParent() && "Instruction should be in a basic block!"); 2098 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 2099 2100 MachineBasicBlock &MBB = *I.getParent(); 2101 MachineFunction &MF = *MBB.getParent(); 2102 MachineRegisterInfo &MRI = MF.getRegInfo(); 2103 2104 switch (I.getOpcode()) { 2105 case TargetOpcode::G_BR: { 2106 // If the branch jumps to the fallthrough block, don't bother emitting it. 2107 // Only do this for -O0 for a good code size improvement, because when 2108 // optimizations are enabled we want to leave this choice to 2109 // MachineBlockPlacement. 2110 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None; 2111 if (EnableOpt || !MBB.isLayoutSuccessor(I.getOperand(0).getMBB())) 2112 return false; 2113 I.eraseFromParent(); 2114 return true; 2115 } 2116 case TargetOpcode::G_SHL: 2117 return earlySelectSHL(I, MRI); 2118 case TargetOpcode::G_CONSTANT: { 2119 bool IsZero = false; 2120 if (I.getOperand(1).isCImm()) 2121 IsZero = I.getOperand(1).getCImm()->getZExtValue() == 0; 2122 else if (I.getOperand(1).isImm()) 2123 IsZero = I.getOperand(1).getImm() == 0; 2124 2125 if (!IsZero) 2126 return false; 2127 2128 Register DefReg = I.getOperand(0).getReg(); 2129 LLT Ty = MRI.getType(DefReg); 2130 if (Ty.getSizeInBits() == 64) { 2131 I.getOperand(1).ChangeToRegister(AArch64::XZR, false); 2132 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); 2133 } else if (Ty.getSizeInBits() == 32) { 2134 I.getOperand(1).ChangeToRegister(AArch64::WZR, false); 2135 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); 2136 } else 2137 return false; 2138 2139 I.setDesc(TII.get(TargetOpcode::COPY)); 2140 return true; 2141 } 2142 default: 2143 return false; 2144 } 2145 } 2146 2147 bool AArch64InstructionSelector::select(MachineInstr &I) { 2148 assert(I.getParent() && "Instruction should be in a basic block!"); 2149 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 2150 2151 MachineBasicBlock &MBB = *I.getParent(); 2152 MachineFunction &MF = *MBB.getParent(); 2153 MachineRegisterInfo &MRI = MF.getRegInfo(); 2154 2155 const AArch64Subtarget *Subtarget = 2156 &static_cast<const AArch64Subtarget &>(MF.getSubtarget()); 2157 if (Subtarget->requiresStrictAlign()) { 2158 // We don't support this feature yet. 2159 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n"); 2160 return false; 2161 } 2162 2163 unsigned Opcode = I.getOpcode(); 2164 // G_PHI requires same handling as PHI 2165 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { 2166 // Certain non-generic instructions also need some special handling. 2167 2168 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 2169 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2170 2171 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { 2172 const Register DefReg = I.getOperand(0).getReg(); 2173 const LLT DefTy = MRI.getType(DefReg); 2174 2175 const RegClassOrRegBank &RegClassOrBank = 2176 MRI.getRegClassOrRegBank(DefReg); 2177 2178 const TargetRegisterClass *DefRC 2179 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 2180 if (!DefRC) { 2181 if (!DefTy.isValid()) { 2182 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 2183 return false; 2184 } 2185 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 2186 DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI); 2187 if (!DefRC) { 2188 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 2189 return false; 2190 } 2191 } 2192 2193 I.setDesc(TII.get(TargetOpcode::PHI)); 2194 2195 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 2196 } 2197 2198 if (I.isCopy()) 2199 return selectCopy(I, TII, MRI, TRI, RBI); 2200 2201 return true; 2202 } 2203 2204 2205 if (I.getNumOperands() != I.getNumExplicitOperands()) { 2206 LLVM_DEBUG( 2207 dbgs() << "Generic instruction has unexpected implicit operands\n"); 2208 return false; 2209 } 2210 2211 // Try to do some lowering before we start instruction selecting. These 2212 // lowerings are purely transformations on the input G_MIR and so selection 2213 // must continue after any modification of the instruction. 2214 if (preISelLower(I)) { 2215 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. 2216 } 2217 2218 // There may be patterns where the importer can't deal with them optimally, 2219 // but does select it to a suboptimal sequence so our custom C++ selection 2220 // code later never has a chance to work on it. Therefore, we have an early 2221 // selection attempt here to give priority to certain selection routines 2222 // over the imported ones. 2223 if (earlySelect(I)) 2224 return true; 2225 2226 if (selectImpl(I, *CoverageInfo)) 2227 return true; 2228 2229 LLT Ty = 2230 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; 2231 2232 MachineIRBuilder MIB(I); 2233 2234 switch (Opcode) { 2235 case TargetOpcode::G_BRCOND: 2236 return selectCompareBranch(I, MF, MRI); 2237 2238 case TargetOpcode::G_BRINDIRECT: { 2239 I.setDesc(TII.get(AArch64::BR)); 2240 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2241 } 2242 2243 case TargetOpcode::G_BRJT: 2244 return selectBrJT(I, MRI); 2245 2246 case AArch64::G_ADD_LOW: { 2247 // This op may have been separated from it's ADRP companion by the localizer 2248 // or some other code motion pass. Given that many CPUs will try to 2249 // macro fuse these operations anyway, select this into a MOVaddr pseudo 2250 // which will later be expanded into an ADRP+ADD pair after scheduling. 2251 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg()); 2252 if (BaseMI->getOpcode() != AArch64::ADRP) { 2253 I.setDesc(TII.get(AArch64::ADDXri)); 2254 I.addOperand(MachineOperand::CreateImm(0)); 2255 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2256 } 2257 assert(TM.getCodeModel() == CodeModel::Small && 2258 "Expected small code model"); 2259 MachineIRBuilder MIB(I); 2260 auto Op1 = BaseMI->getOperand(1); 2261 auto Op2 = I.getOperand(2); 2262 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) 2263 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), 2264 Op1.getTargetFlags()) 2265 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), 2266 Op2.getTargetFlags()); 2267 I.eraseFromParent(); 2268 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); 2269 } 2270 2271 case TargetOpcode::G_BSWAP: { 2272 // Handle vector types for G_BSWAP directly. 2273 Register DstReg = I.getOperand(0).getReg(); 2274 LLT DstTy = MRI.getType(DstReg); 2275 2276 // We should only get vector types here; everything else is handled by the 2277 // importer right now. 2278 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { 2279 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n"); 2280 return false; 2281 } 2282 2283 // Only handle 4 and 2 element vectors for now. 2284 // TODO: 16-bit elements. 2285 unsigned NumElts = DstTy.getNumElements(); 2286 if (NumElts != 4 && NumElts != 2) { 2287 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n"); 2288 return false; 2289 } 2290 2291 // Choose the correct opcode for the supported types. Right now, that's 2292 // v2s32, v4s32, and v2s64. 2293 unsigned Opc = 0; 2294 unsigned EltSize = DstTy.getElementType().getSizeInBits(); 2295 if (EltSize == 32) 2296 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 2297 : AArch64::REV32v16i8; 2298 else if (EltSize == 64) 2299 Opc = AArch64::REV64v16i8; 2300 2301 // We should always get something by the time we get here... 2302 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?"); 2303 2304 I.setDesc(TII.get(Opc)); 2305 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2306 } 2307 2308 case TargetOpcode::G_FCONSTANT: 2309 case TargetOpcode::G_CONSTANT: { 2310 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; 2311 2312 const LLT s8 = LLT::scalar(8); 2313 const LLT s16 = LLT::scalar(16); 2314 const LLT s32 = LLT::scalar(32); 2315 const LLT s64 = LLT::scalar(64); 2316 const LLT s128 = LLT::scalar(128); 2317 const LLT p0 = LLT::pointer(0, 64); 2318 2319 const Register DefReg = I.getOperand(0).getReg(); 2320 const LLT DefTy = MRI.getType(DefReg); 2321 const unsigned DefSize = DefTy.getSizeInBits(); 2322 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2323 2324 // FIXME: Redundant check, but even less readable when factored out. 2325 if (isFP) { 2326 if (Ty != s32 && Ty != s64 && Ty != s128) { 2327 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2328 << " constant, expected: " << s32 << " or " << s64 2329 << " or " << s128 << '\n'); 2330 return false; 2331 } 2332 2333 if (RB.getID() != AArch64::FPRRegBankID) { 2334 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2335 << " constant on bank: " << RB 2336 << ", expected: FPR\n"); 2337 return false; 2338 } 2339 2340 // The case when we have 0.0 is covered by tablegen. Reject it here so we 2341 // can be sure tablegen works correctly and isn't rescued by this code. 2342 // 0.0 is not covered by tablegen for FP128. So we will handle this 2343 // scenario in the code here. 2344 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) 2345 return false; 2346 } else { 2347 // s32 and s64 are covered by tablegen. 2348 if (Ty != p0 && Ty != s8 && Ty != s16) { 2349 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2350 << " constant, expected: " << s32 << ", " << s64 2351 << ", or " << p0 << '\n'); 2352 return false; 2353 } 2354 2355 if (RB.getID() != AArch64::GPRRegBankID) { 2356 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2357 << " constant on bank: " << RB 2358 << ", expected: GPR\n"); 2359 return false; 2360 } 2361 } 2362 2363 // We allow G_CONSTANT of types < 32b. 2364 const unsigned MovOpc = 2365 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; 2366 2367 if (isFP) { 2368 // Either emit a FMOV, or emit a copy to emit a normal mov. 2369 const TargetRegisterClass &GPRRC = 2370 DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass; 2371 const TargetRegisterClass &FPRRC = 2372 DefSize == 32 ? AArch64::FPR32RegClass 2373 : (DefSize == 64 ? AArch64::FPR64RegClass 2374 : AArch64::FPR128RegClass); 2375 2376 // Can we use a FMOV instruction to represent the immediate? 2377 if (emitFMovForFConstant(I, MRI)) 2378 return true; 2379 2380 // For 64b values, emit a constant pool load instead. 2381 if (DefSize == 64 || DefSize == 128) { 2382 auto *FPImm = I.getOperand(1).getFPImm(); 2383 MachineIRBuilder MIB(I); 2384 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB); 2385 if (!LoadMI) { 2386 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n"); 2387 return false; 2388 } 2389 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()}); 2390 I.eraseFromParent(); 2391 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI); 2392 } 2393 2394 // Nope. Emit a copy and use a normal mov instead. 2395 const Register DefGPRReg = MRI.createVirtualRegister(&GPRRC); 2396 MachineOperand &RegOp = I.getOperand(0); 2397 RegOp.setReg(DefGPRReg); 2398 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2399 MIB.buildCopy({DefReg}, {DefGPRReg}); 2400 2401 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { 2402 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n"); 2403 return false; 2404 } 2405 2406 MachineOperand &ImmOp = I.getOperand(1); 2407 // FIXME: Is going through int64_t always correct? 2408 ImmOp.ChangeToImmediate( 2409 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); 2410 } else if (I.getOperand(1).isCImm()) { 2411 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); 2412 I.getOperand(1).ChangeToImmediate(Val); 2413 } else if (I.getOperand(1).isImm()) { 2414 uint64_t Val = I.getOperand(1).getImm(); 2415 I.getOperand(1).ChangeToImmediate(Val); 2416 } 2417 2418 I.setDesc(TII.get(MovOpc)); 2419 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2420 return true; 2421 } 2422 case TargetOpcode::G_EXTRACT: { 2423 Register DstReg = I.getOperand(0).getReg(); 2424 Register SrcReg = I.getOperand(1).getReg(); 2425 LLT SrcTy = MRI.getType(SrcReg); 2426 LLT DstTy = MRI.getType(DstReg); 2427 (void)DstTy; 2428 unsigned SrcSize = SrcTy.getSizeInBits(); 2429 2430 if (SrcTy.getSizeInBits() > 64) { 2431 // This should be an extract of an s128, which is like a vector extract. 2432 if (SrcTy.getSizeInBits() != 128) 2433 return false; 2434 // Only support extracting 64 bits from an s128 at the moment. 2435 if (DstTy.getSizeInBits() != 64) 2436 return false; 2437 2438 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2439 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2440 // Check we have the right regbank always. 2441 assert(SrcRB.getID() == AArch64::FPRRegBankID && 2442 DstRB.getID() == AArch64::FPRRegBankID && 2443 "Wrong extract regbank!"); 2444 (void)SrcRB; 2445 2446 // Emit the same code as a vector extract. 2447 // Offset must be a multiple of 64. 2448 unsigned Offset = I.getOperand(2).getImm(); 2449 if (Offset % 64 != 0) 2450 return false; 2451 unsigned LaneIdx = Offset / 64; 2452 MachineIRBuilder MIB(I); 2453 MachineInstr *Extract = emitExtractVectorElt( 2454 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); 2455 if (!Extract) 2456 return false; 2457 I.eraseFromParent(); 2458 return true; 2459 } 2460 2461 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); 2462 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + 2463 Ty.getSizeInBits() - 1); 2464 2465 if (SrcSize < 64) { 2466 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && 2467 "unexpected G_EXTRACT types"); 2468 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2469 } 2470 2471 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2472 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2473 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 2474 .addReg(DstReg, 0, AArch64::sub_32); 2475 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 2476 AArch64::GPR32RegClass, MRI); 2477 I.getOperand(0).setReg(DstReg); 2478 2479 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2480 } 2481 2482 case TargetOpcode::G_INSERT: { 2483 LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); 2484 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2485 unsigned DstSize = DstTy.getSizeInBits(); 2486 // Larger inserts are vectors, same-size ones should be something else by 2487 // now (split up or turned into COPYs). 2488 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) 2489 return false; 2490 2491 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); 2492 unsigned LSB = I.getOperand(3).getImm(); 2493 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 2494 I.getOperand(3).setImm((DstSize - LSB) % DstSize); 2495 MachineInstrBuilder(MF, I).addImm(Width - 1); 2496 2497 if (DstSize < 64) { 2498 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && 2499 "unexpected G_INSERT types"); 2500 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2501 } 2502 2503 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2504 BuildMI(MBB, I.getIterator(), I.getDebugLoc(), 2505 TII.get(AArch64::SUBREG_TO_REG)) 2506 .addDef(SrcReg) 2507 .addImm(0) 2508 .addUse(I.getOperand(2).getReg()) 2509 .addImm(AArch64::sub_32); 2510 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 2511 AArch64::GPR32RegClass, MRI); 2512 I.getOperand(2).setReg(SrcReg); 2513 2514 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2515 } 2516 case TargetOpcode::G_FRAME_INDEX: { 2517 // allocas and G_FRAME_INDEX are only supported in addrspace(0). 2518 if (Ty != LLT::pointer(0, 64)) { 2519 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty 2520 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2521 return false; 2522 } 2523 I.setDesc(TII.get(AArch64::ADDXri)); 2524 2525 // MOs for a #0 shifted immediate. 2526 I.addOperand(MachineOperand::CreateImm(0)); 2527 I.addOperand(MachineOperand::CreateImm(0)); 2528 2529 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2530 } 2531 2532 case TargetOpcode::G_GLOBAL_VALUE: { 2533 auto GV = I.getOperand(1).getGlobal(); 2534 if (GV->isThreadLocal()) 2535 return selectTLSGlobalValue(I, MRI); 2536 2537 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); 2538 if (OpFlags & AArch64II::MO_GOT) { 2539 I.setDesc(TII.get(AArch64::LOADgot)); 2540 I.getOperand(1).setTargetFlags(OpFlags); 2541 } else if (TM.getCodeModel() == CodeModel::Large) { 2542 // Materialize the global using movz/movk instructions. 2543 materializeLargeCMVal(I, GV, OpFlags); 2544 I.eraseFromParent(); 2545 return true; 2546 } else if (TM.getCodeModel() == CodeModel::Tiny) { 2547 I.setDesc(TII.get(AArch64::ADR)); 2548 I.getOperand(1).setTargetFlags(OpFlags); 2549 } else { 2550 I.setDesc(TII.get(AArch64::MOVaddr)); 2551 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); 2552 MachineInstrBuilder MIB(MF, I); 2553 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), 2554 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 2555 } 2556 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2557 } 2558 2559 case TargetOpcode::G_ZEXTLOAD: 2560 case TargetOpcode::G_LOAD: 2561 case TargetOpcode::G_STORE: { 2562 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; 2563 MachineIRBuilder MIB(I); 2564 2565 LLT PtrTy = MRI.getType(I.getOperand(1).getReg()); 2566 2567 if (PtrTy != LLT::pointer(0, 64)) { 2568 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy 2569 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2570 return false; 2571 } 2572 2573 auto &MemOp = **I.memoperands_begin(); 2574 uint64_t MemSizeInBytes = MemOp.getSize(); 2575 if (MemOp.isAtomic()) { 2576 // For now we just support s8 acquire loads to be able to compile stack 2577 // protector code. 2578 if (MemOp.getOrdering() == AtomicOrdering::Acquire && 2579 MemSizeInBytes == 1) { 2580 I.setDesc(TII.get(AArch64::LDARB)); 2581 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2582 } 2583 LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n"); 2584 return false; 2585 } 2586 unsigned MemSizeInBits = MemSizeInBytes * 8; 2587 2588 #ifndef NDEBUG 2589 const Register PtrReg = I.getOperand(1).getReg(); 2590 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); 2591 // Sanity-check the pointer register. 2592 assert(PtrRB.getID() == AArch64::GPRRegBankID && 2593 "Load/Store pointer operand isn't a GPR"); 2594 assert(MRI.getType(PtrReg).isPointer() && 2595 "Load/Store pointer operand isn't a pointer"); 2596 #endif 2597 2598 const Register ValReg = I.getOperand(0).getReg(); 2599 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); 2600 2601 // Helper lambda for partially selecting I. Either returns the original 2602 // instruction with an updated opcode, or a new instruction. 2603 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { 2604 bool IsStore = I.getOpcode() == TargetOpcode::G_STORE; 2605 const unsigned NewOpc = 2606 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); 2607 if (NewOpc == I.getOpcode()) 2608 return nullptr; 2609 // Check if we can fold anything into the addressing mode. 2610 auto AddrModeFns = 2611 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); 2612 if (!AddrModeFns) { 2613 // Can't fold anything. Use the original instruction. 2614 I.setDesc(TII.get(NewOpc)); 2615 I.addOperand(MachineOperand::CreateImm(0)); 2616 return &I; 2617 } 2618 2619 // Folded something. Create a new instruction and return it. 2620 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); 2621 IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg); 2622 NewInst.cloneMemRefs(I); 2623 for (auto &Fn : *AddrModeFns) 2624 Fn(NewInst); 2625 I.eraseFromParent(); 2626 return &*NewInst; 2627 }; 2628 2629 MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); 2630 if (!LoadStore) 2631 return false; 2632 2633 // If we're storing a 0, use WZR/XZR. 2634 if (Opcode == TargetOpcode::G_STORE) { 2635 auto CVal = getConstantVRegValWithLookThrough( 2636 LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true, 2637 /*HandleFConstants = */ false); 2638 if (CVal && CVal->Value == 0) { 2639 switch (LoadStore->getOpcode()) { 2640 case AArch64::STRWui: 2641 case AArch64::STRHHui: 2642 case AArch64::STRBBui: 2643 LoadStore->getOperand(0).setReg(AArch64::WZR); 2644 break; 2645 case AArch64::STRXui: 2646 LoadStore->getOperand(0).setReg(AArch64::XZR); 2647 break; 2648 } 2649 } 2650 } 2651 2652 if (IsZExtLoad) { 2653 // The zextload from a smaller type to i32 should be handled by the 2654 // importer. 2655 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) 2656 return false; 2657 // If we have a ZEXTLOAD then change the load's type to be a narrower reg 2658 // and zero_extend with SUBREG_TO_REG. 2659 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2660 Register DstReg = LoadStore->getOperand(0).getReg(); 2661 LoadStore->getOperand(0).setReg(LdReg); 2662 2663 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); 2664 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) 2665 .addImm(0) 2666 .addUse(LdReg) 2667 .addImm(AArch64::sub_32); 2668 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 2669 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, 2670 MRI); 2671 } 2672 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 2673 } 2674 2675 case TargetOpcode::G_SMULH: 2676 case TargetOpcode::G_UMULH: { 2677 // Reject the various things we don't support yet. 2678 if (unsupportedBinOp(I, RBI, MRI, TRI)) 2679 return false; 2680 2681 const Register DefReg = I.getOperand(0).getReg(); 2682 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2683 2684 if (RB.getID() != AArch64::GPRRegBankID) { 2685 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"); 2686 return false; 2687 } 2688 2689 if (Ty != LLT::scalar(64)) { 2690 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty 2691 << ", expected: " << LLT::scalar(64) << '\n'); 2692 return false; 2693 } 2694 2695 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr 2696 : AArch64::UMULHrr; 2697 I.setDesc(TII.get(NewOpc)); 2698 2699 // Now that we selected an opcode, we need to constrain the register 2700 // operands to use appropriate classes. 2701 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2702 } 2703 case TargetOpcode::G_LSHR: 2704 case TargetOpcode::G_ASHR: 2705 if (MRI.getType(I.getOperand(0).getReg()).isVector()) 2706 return selectVectorAshrLshr(I, MRI); 2707 LLVM_FALLTHROUGH; 2708 case TargetOpcode::G_SHL: 2709 if (Opcode == TargetOpcode::G_SHL && 2710 MRI.getType(I.getOperand(0).getReg()).isVector()) 2711 return selectVectorSHL(I, MRI); 2712 LLVM_FALLTHROUGH; 2713 case TargetOpcode::G_FADD: 2714 case TargetOpcode::G_FSUB: 2715 case TargetOpcode::G_FMUL: 2716 case TargetOpcode::G_FDIV: 2717 case TargetOpcode::G_OR: { 2718 // Reject the various things we don't support yet. 2719 if (unsupportedBinOp(I, RBI, MRI, TRI)) 2720 return false; 2721 2722 const unsigned OpSize = Ty.getSizeInBits(); 2723 2724 const Register DefReg = I.getOperand(0).getReg(); 2725 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2726 2727 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); 2728 if (NewOpc == I.getOpcode()) 2729 return false; 2730 2731 I.setDesc(TII.get(NewOpc)); 2732 // FIXME: Should the type be always reset in setDesc? 2733 2734 // Now that we selected an opcode, we need to constrain the register 2735 // operands to use appropriate classes. 2736 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2737 } 2738 2739 case TargetOpcode::G_PTR_ADD: { 2740 MachineIRBuilder MIRBuilder(I); 2741 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), 2742 MIRBuilder); 2743 I.eraseFromParent(); 2744 return true; 2745 } 2746 case TargetOpcode::G_SADDO: 2747 case TargetOpcode::G_UADDO: 2748 case TargetOpcode::G_SSUBO: 2749 case TargetOpcode::G_USUBO: { 2750 // Emit the operation and get the correct condition code. 2751 MachineIRBuilder MIRBuilder(I); 2752 auto OpAndCC = emitOverflowOp(Opcode, I.getOperand(0).getReg(), 2753 I.getOperand(2), I.getOperand(3), MIRBuilder); 2754 2755 // Now, put the overflow result in the register given by the first operand 2756 // to the overflow op. CSINC increments the result when the predicate is 2757 // false, so to get the increment when it's true, we need to use the 2758 // inverse. In this case, we want to increment when carry is set. 2759 Register ZReg = AArch64::WZR; 2760 auto CsetMI = MIRBuilder 2761 .buildInstr(AArch64::CSINCWr, {I.getOperand(1).getReg()}, 2762 {ZReg, ZReg}) 2763 .addImm(getInvertedCondCode(OpAndCC.second)); 2764 constrainSelectedInstRegOperands(*CsetMI, TII, TRI, RBI); 2765 I.eraseFromParent(); 2766 return true; 2767 } 2768 2769 case TargetOpcode::G_PTRMASK: { 2770 Register MaskReg = I.getOperand(2).getReg(); 2771 Optional<int64_t> MaskVal = getConstantVRegSExtVal(MaskReg, MRI); 2772 // TODO: Implement arbitrary cases 2773 if (!MaskVal || !isShiftedMask_64(*MaskVal)) 2774 return false; 2775 2776 uint64_t Mask = *MaskVal; 2777 I.setDesc(TII.get(AArch64::ANDXri)); 2778 I.getOperand(2).ChangeToImmediate( 2779 AArch64_AM::encodeLogicalImmediate(Mask, 64)); 2780 2781 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2782 } 2783 case TargetOpcode::G_PTRTOINT: 2784 case TargetOpcode::G_TRUNC: { 2785 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2786 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 2787 2788 const Register DstReg = I.getOperand(0).getReg(); 2789 const Register SrcReg = I.getOperand(1).getReg(); 2790 2791 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2792 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2793 2794 if (DstRB.getID() != SrcRB.getID()) { 2795 LLVM_DEBUG( 2796 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); 2797 return false; 2798 } 2799 2800 if (DstRB.getID() == AArch64::GPRRegBankID) { 2801 const TargetRegisterClass *DstRC = 2802 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 2803 if (!DstRC) 2804 return false; 2805 2806 const TargetRegisterClass *SrcRC = 2807 getRegClassForTypeOnBank(SrcTy, SrcRB, RBI); 2808 if (!SrcRC) 2809 return false; 2810 2811 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 2812 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 2813 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); 2814 return false; 2815 } 2816 2817 if (DstRC == SrcRC) { 2818 // Nothing to be done 2819 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && 2820 SrcTy == LLT::scalar(64)) { 2821 llvm_unreachable("TableGen can import this case"); 2822 return false; 2823 } else if (DstRC == &AArch64::GPR32RegClass && 2824 SrcRC == &AArch64::GPR64RegClass) { 2825 I.getOperand(1).setSubReg(AArch64::sub_32); 2826 } else { 2827 LLVM_DEBUG( 2828 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); 2829 return false; 2830 } 2831 2832 I.setDesc(TII.get(TargetOpcode::COPY)); 2833 return true; 2834 } else if (DstRB.getID() == AArch64::FPRRegBankID) { 2835 if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) { 2836 I.setDesc(TII.get(AArch64::XTNv4i16)); 2837 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2838 return true; 2839 } 2840 2841 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { 2842 MachineIRBuilder MIB(I); 2843 MachineInstr *Extract = emitExtractVectorElt( 2844 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); 2845 if (!Extract) 2846 return false; 2847 I.eraseFromParent(); 2848 return true; 2849 } 2850 2851 // We might have a vector G_PTRTOINT, in which case just emit a COPY. 2852 if (Opcode == TargetOpcode::G_PTRTOINT) { 2853 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"); 2854 I.setDesc(TII.get(TargetOpcode::COPY)); 2855 return true; 2856 } 2857 } 2858 2859 return false; 2860 } 2861 2862 case TargetOpcode::G_ANYEXT: { 2863 const Register DstReg = I.getOperand(0).getReg(); 2864 const Register SrcReg = I.getOperand(1).getReg(); 2865 2866 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); 2867 if (RBDst.getID() != AArch64::GPRRegBankID) { 2868 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst 2869 << ", expected: GPR\n"); 2870 return false; 2871 } 2872 2873 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); 2874 if (RBSrc.getID() != AArch64::GPRRegBankID) { 2875 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc 2876 << ", expected: GPR\n"); 2877 return false; 2878 } 2879 2880 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); 2881 2882 if (DstSize == 0) { 2883 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"); 2884 return false; 2885 } 2886 2887 if (DstSize != 64 && DstSize > 32) { 2888 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize 2889 << ", expected: 32 or 64\n"); 2890 return false; 2891 } 2892 // At this point G_ANYEXT is just like a plain COPY, but we need 2893 // to explicitly form the 64-bit value if any. 2894 if (DstSize > 32) { 2895 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); 2896 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) 2897 .addDef(ExtSrc) 2898 .addImm(0) 2899 .addUse(SrcReg) 2900 .addImm(AArch64::sub_32); 2901 I.getOperand(1).setReg(ExtSrc); 2902 } 2903 return selectCopy(I, TII, MRI, TRI, RBI); 2904 } 2905 2906 case TargetOpcode::G_ZEXT: 2907 case TargetOpcode::G_SEXT_INREG: 2908 case TargetOpcode::G_SEXT: { 2909 unsigned Opcode = I.getOpcode(); 2910 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; 2911 const Register DefReg = I.getOperand(0).getReg(); 2912 Register SrcReg = I.getOperand(1).getReg(); 2913 const LLT DstTy = MRI.getType(DefReg); 2914 const LLT SrcTy = MRI.getType(SrcReg); 2915 unsigned DstSize = DstTy.getSizeInBits(); 2916 unsigned SrcSize = SrcTy.getSizeInBits(); 2917 2918 // SEXT_INREG has the same src reg size as dst, the size of the value to be 2919 // extended is encoded in the imm. 2920 if (Opcode == TargetOpcode::G_SEXT_INREG) 2921 SrcSize = I.getOperand(2).getImm(); 2922 2923 if (DstTy.isVector()) 2924 return false; // Should be handled by imported patterns. 2925 2926 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == 2927 AArch64::GPRRegBankID && 2928 "Unexpected ext regbank"); 2929 2930 MachineIRBuilder MIB(I); 2931 MachineInstr *ExtI; 2932 2933 // First check if we're extending the result of a load which has a dest type 2934 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest 2935 // GPR register on AArch64 and all loads which are smaller automatically 2936 // zero-extend the upper bits. E.g. 2937 // %v(s8) = G_LOAD %p, :: (load 1) 2938 // %v2(s32) = G_ZEXT %v(s8) 2939 if (!IsSigned) { 2940 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 2941 bool IsGPR = 2942 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; 2943 if (LoadMI && IsGPR) { 2944 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); 2945 unsigned BytesLoaded = MemOp->getSize(); 2946 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) 2947 return selectCopy(I, TII, MRI, TRI, RBI); 2948 } 2949 2950 // If we are zero extending from 32 bits to 64 bits, it's possible that 2951 // the instruction implicitly does the zero extend for us. In that case, 2952 // we can just emit a SUBREG_TO_REG. 2953 if (IsGPR && SrcSize == 32 && DstSize == 64) { 2954 // Unlike with the G_LOAD case, we don't want to look through copies 2955 // here. 2956 MachineInstr *Def = MRI.getVRegDef(SrcReg); 2957 if (Def && isDef32(*Def)) { 2958 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) 2959 .addImm(0) 2960 .addUse(SrcReg) 2961 .addImm(AArch64::sub_32); 2962 2963 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, 2964 MRI)) { 2965 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n"); 2966 return false; 2967 } 2968 2969 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 2970 MRI)) { 2971 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n"); 2972 return false; 2973 } 2974 2975 I.eraseFromParent(); 2976 return true; 2977 } 2978 } 2979 } 2980 2981 if (DstSize == 64) { 2982 if (Opcode != TargetOpcode::G_SEXT_INREG) { 2983 // FIXME: Can we avoid manually doing this? 2984 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 2985 MRI)) { 2986 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) 2987 << " operand\n"); 2988 return false; 2989 } 2990 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, 2991 {&AArch64::GPR64RegClass}, {}) 2992 .addImm(0) 2993 .addUse(SrcReg) 2994 .addImm(AArch64::sub_32) 2995 .getReg(0); 2996 } 2997 2998 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, 2999 {DefReg}, {SrcReg}) 3000 .addImm(0) 3001 .addImm(SrcSize - 1); 3002 } else if (DstSize <= 32) { 3003 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, 3004 {DefReg}, {SrcReg}) 3005 .addImm(0) 3006 .addImm(SrcSize - 1); 3007 } else { 3008 return false; 3009 } 3010 3011 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 3012 I.eraseFromParent(); 3013 return true; 3014 } 3015 3016 case TargetOpcode::G_SITOFP: 3017 case TargetOpcode::G_UITOFP: 3018 case TargetOpcode::G_FPTOSI: 3019 case TargetOpcode::G_FPTOUI: { 3020 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), 3021 SrcTy = MRI.getType(I.getOperand(1).getReg()); 3022 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); 3023 if (NewOpc == Opcode) 3024 return false; 3025 3026 I.setDesc(TII.get(NewOpc)); 3027 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3028 3029 return true; 3030 } 3031 3032 case TargetOpcode::G_FREEZE: 3033 return selectCopy(I, TII, MRI, TRI, RBI); 3034 3035 case TargetOpcode::G_INTTOPTR: 3036 // The importer is currently unable to import pointer types since they 3037 // didn't exist in SelectionDAG. 3038 return selectCopy(I, TII, MRI, TRI, RBI); 3039 3040 case TargetOpcode::G_BITCAST: 3041 // Imported SelectionDAG rules can handle every bitcast except those that 3042 // bitcast from a type to the same type. Ideally, these shouldn't occur 3043 // but we might not run an optimizer that deletes them. The other exception 3044 // is bitcasts involving pointer types, as SelectionDAG has no knowledge 3045 // of them. 3046 return selectCopy(I, TII, MRI, TRI, RBI); 3047 3048 case TargetOpcode::G_SELECT: { 3049 if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) { 3050 LLVM_DEBUG(dbgs() << "G_SELECT cond has type: " << Ty 3051 << ", expected: " << LLT::scalar(1) << '\n'); 3052 return false; 3053 } 3054 3055 const Register CondReg = I.getOperand(1).getReg(); 3056 const Register TReg = I.getOperand(2).getReg(); 3057 const Register FReg = I.getOperand(3).getReg(); 3058 3059 if (tryOptSelect(I)) 3060 return true; 3061 3062 // Make sure to use an unused vreg instead of wzr, so that the peephole 3063 // optimizations will be able to optimize these. 3064 MachineIRBuilder MIB(I); 3065 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 3066 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) 3067 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 3068 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 3069 if (!emitSelect(I.getOperand(0).getReg(), TReg, FReg, AArch64CC::NE, MIB)) 3070 return false; 3071 I.eraseFromParent(); 3072 return true; 3073 } 3074 case TargetOpcode::G_ICMP: { 3075 if (Ty.isVector()) 3076 return selectVectorICmp(I, MRI); 3077 3078 if (Ty != LLT::scalar(32)) { 3079 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty 3080 << ", expected: " << LLT::scalar(32) << '\n'); 3081 return false; 3082 } 3083 3084 MachineIRBuilder MIRBuilder(I); 3085 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 3086 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), 3087 MIRBuilder); 3088 emitCSetForICMP(I.getOperand(0).getReg(), Pred, MIRBuilder); 3089 I.eraseFromParent(); 3090 return true; 3091 } 3092 3093 case TargetOpcode::G_FCMP: { 3094 MachineIRBuilder MIRBuilder(I); 3095 CmpInst::Predicate Pred = 3096 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 3097 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), 3098 MIRBuilder, Pred) || 3099 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIRBuilder)) 3100 return false; 3101 I.eraseFromParent(); 3102 return true; 3103 } 3104 case TargetOpcode::G_VASTART: 3105 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) 3106 : selectVaStartAAPCS(I, MF, MRI); 3107 case TargetOpcode::G_INTRINSIC: 3108 return selectIntrinsic(I, MRI); 3109 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 3110 return selectIntrinsicWithSideEffects(I, MRI); 3111 case TargetOpcode::G_IMPLICIT_DEF: { 3112 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 3113 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3114 const Register DstReg = I.getOperand(0).getReg(); 3115 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 3116 const TargetRegisterClass *DstRC = 3117 getRegClassForTypeOnBank(DstTy, DstRB, RBI); 3118 RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 3119 return true; 3120 } 3121 case TargetOpcode::G_BLOCK_ADDR: { 3122 if (TM.getCodeModel() == CodeModel::Large) { 3123 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); 3124 I.eraseFromParent(); 3125 return true; 3126 } else { 3127 I.setDesc(TII.get(AArch64::MOVaddrBA)); 3128 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), 3129 I.getOperand(0).getReg()) 3130 .addBlockAddress(I.getOperand(1).getBlockAddress(), 3131 /* Offset */ 0, AArch64II::MO_PAGE) 3132 .addBlockAddress( 3133 I.getOperand(1).getBlockAddress(), /* Offset */ 0, 3134 AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 3135 I.eraseFromParent(); 3136 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 3137 } 3138 } 3139 case AArch64::G_DUP: { 3140 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by 3141 // imported patterns. Do it manually here. Avoiding generating s16 gpr is 3142 // difficult because at RBS we may end up pessimizing the fpr case if we 3143 // decided to add an anyextend to fix this. Manual selection is the most 3144 // robust solution for now. 3145 Register SrcReg = I.getOperand(1).getReg(); 3146 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::GPRRegBankID) 3147 return false; // We expect the fpr regbank case to be imported. 3148 LLT SrcTy = MRI.getType(SrcReg); 3149 if (SrcTy.getSizeInBits() == 16) 3150 I.setDesc(TII.get(AArch64::DUPv8i16gpr)); 3151 else if (SrcTy.getSizeInBits() == 8) 3152 I.setDesc(TII.get(AArch64::DUPv16i8gpr)); 3153 else 3154 return false; 3155 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3156 } 3157 case TargetOpcode::G_INTRINSIC_TRUNC: 3158 return selectIntrinsicTrunc(I, MRI); 3159 case TargetOpcode::G_INTRINSIC_ROUND: 3160 return selectIntrinsicRound(I, MRI); 3161 case TargetOpcode::G_BUILD_VECTOR: 3162 return selectBuildVector(I, MRI); 3163 case TargetOpcode::G_MERGE_VALUES: 3164 return selectMergeValues(I, MRI); 3165 case TargetOpcode::G_UNMERGE_VALUES: 3166 return selectUnmergeValues(I, MRI); 3167 case TargetOpcode::G_SHUFFLE_VECTOR: 3168 return selectShuffleVector(I, MRI); 3169 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 3170 return selectExtractElt(I, MRI); 3171 case TargetOpcode::G_INSERT_VECTOR_ELT: 3172 return selectInsertElt(I, MRI); 3173 case TargetOpcode::G_CONCAT_VECTORS: 3174 return selectConcatVectors(I, MRI); 3175 case TargetOpcode::G_JUMP_TABLE: 3176 return selectJumpTable(I, MRI); 3177 case TargetOpcode::G_VECREDUCE_FADD: 3178 case TargetOpcode::G_VECREDUCE_ADD: 3179 return selectReduction(I, MRI); 3180 } 3181 3182 return false; 3183 } 3184 3185 bool AArch64InstructionSelector::selectReduction( 3186 MachineInstr &I, MachineRegisterInfo &MRI) const { 3187 Register VecReg = I.getOperand(1).getReg(); 3188 LLT VecTy = MRI.getType(VecReg); 3189 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { 3190 unsigned Opc = 0; 3191 if (VecTy == LLT::vector(16, 8)) 3192 Opc = AArch64::ADDVv16i8v; 3193 else if (VecTy == LLT::vector(8, 16)) 3194 Opc = AArch64::ADDVv8i16v; 3195 else if (VecTy == LLT::vector(4, 32)) 3196 Opc = AArch64::ADDVv4i32v; 3197 else if (VecTy == LLT::vector(2, 64)) 3198 Opc = AArch64::ADDPv2i64p; 3199 else { 3200 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction"); 3201 return false; 3202 } 3203 I.setDesc(TII.get(Opc)); 3204 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3205 } 3206 3207 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_FADD) { 3208 unsigned Opc = 0; 3209 if (VecTy == LLT::vector(2, 32)) 3210 Opc = AArch64::FADDPv2i32p; 3211 else if (VecTy == LLT::vector(2, 64)) 3212 Opc = AArch64::FADDPv2i64p; 3213 else { 3214 LLVM_DEBUG(dbgs() << "Unhandled type for fadd reduction"); 3215 return false; 3216 } 3217 I.setDesc(TII.get(Opc)); 3218 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3219 } 3220 return false; 3221 } 3222 3223 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, 3224 MachineRegisterInfo &MRI) const { 3225 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); 3226 Register JTAddr = I.getOperand(0).getReg(); 3227 unsigned JTI = I.getOperand(1).getIndex(); 3228 Register Index = I.getOperand(2).getReg(); 3229 MachineIRBuilder MIB(I); 3230 3231 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3232 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 3233 3234 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); 3235 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, 3236 {TargetReg, ScratchReg}, {JTAddr, Index}) 3237 .addJumpTableIndex(JTI); 3238 // Build the indirect branch. 3239 MIB.buildInstr(AArch64::BR, {}, {TargetReg}); 3240 I.eraseFromParent(); 3241 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); 3242 } 3243 3244 bool AArch64InstructionSelector::selectJumpTable( 3245 MachineInstr &I, MachineRegisterInfo &MRI) const { 3246 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"); 3247 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!"); 3248 3249 Register DstReg = I.getOperand(0).getReg(); 3250 unsigned JTI = I.getOperand(1).getIndex(); 3251 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. 3252 MachineIRBuilder MIB(I); 3253 auto MovMI = 3254 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) 3255 .addJumpTableIndex(JTI, AArch64II::MO_PAGE) 3256 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 3257 I.eraseFromParent(); 3258 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 3259 } 3260 3261 bool AArch64InstructionSelector::selectTLSGlobalValue( 3262 MachineInstr &I, MachineRegisterInfo &MRI) const { 3263 if (!STI.isTargetMachO()) 3264 return false; 3265 MachineFunction &MF = *I.getParent()->getParent(); 3266 MF.getFrameInfo().setAdjustsStack(true); 3267 3268 const GlobalValue &GV = *I.getOperand(1).getGlobal(); 3269 MachineIRBuilder MIB(I); 3270 3271 auto LoadGOT = 3272 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) 3273 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); 3274 3275 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, 3276 {LoadGOT.getReg(0)}) 3277 .addImm(0); 3278 3279 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); 3280 // TLS calls preserve all registers except those that absolutely must be 3281 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be 3282 // silly). 3283 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) 3284 .addUse(AArch64::X0, RegState::Implicit) 3285 .addDef(AArch64::X0, RegState::Implicit) 3286 .addRegMask(TRI.getTLSCallPreservedMask()); 3287 3288 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); 3289 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, 3290 MRI); 3291 I.eraseFromParent(); 3292 return true; 3293 } 3294 3295 bool AArch64InstructionSelector::selectIntrinsicTrunc( 3296 MachineInstr &I, MachineRegisterInfo &MRI) const { 3297 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 3298 3299 // Select the correct opcode. 3300 unsigned Opc = 0; 3301 if (!SrcTy.isVector()) { 3302 switch (SrcTy.getSizeInBits()) { 3303 default: 3304 case 16: 3305 Opc = AArch64::FRINTZHr; 3306 break; 3307 case 32: 3308 Opc = AArch64::FRINTZSr; 3309 break; 3310 case 64: 3311 Opc = AArch64::FRINTZDr; 3312 break; 3313 } 3314 } else { 3315 unsigned NumElts = SrcTy.getNumElements(); 3316 switch (SrcTy.getElementType().getSizeInBits()) { 3317 default: 3318 break; 3319 case 16: 3320 if (NumElts == 4) 3321 Opc = AArch64::FRINTZv4f16; 3322 else if (NumElts == 8) 3323 Opc = AArch64::FRINTZv8f16; 3324 break; 3325 case 32: 3326 if (NumElts == 2) 3327 Opc = AArch64::FRINTZv2f32; 3328 else if (NumElts == 4) 3329 Opc = AArch64::FRINTZv4f32; 3330 break; 3331 case 64: 3332 if (NumElts == 2) 3333 Opc = AArch64::FRINTZv2f64; 3334 break; 3335 } 3336 } 3337 3338 if (!Opc) { 3339 // Didn't get an opcode above, bail. 3340 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"); 3341 return false; 3342 } 3343 3344 // Legalization would have set us up perfectly for this; we just need to 3345 // set the opcode and move on. 3346 I.setDesc(TII.get(Opc)); 3347 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3348 } 3349 3350 bool AArch64InstructionSelector::selectIntrinsicRound( 3351 MachineInstr &I, MachineRegisterInfo &MRI) const { 3352 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 3353 3354 // Select the correct opcode. 3355 unsigned Opc = 0; 3356 if (!SrcTy.isVector()) { 3357 switch (SrcTy.getSizeInBits()) { 3358 default: 3359 case 16: 3360 Opc = AArch64::FRINTAHr; 3361 break; 3362 case 32: 3363 Opc = AArch64::FRINTASr; 3364 break; 3365 case 64: 3366 Opc = AArch64::FRINTADr; 3367 break; 3368 } 3369 } else { 3370 unsigned NumElts = SrcTy.getNumElements(); 3371 switch (SrcTy.getElementType().getSizeInBits()) { 3372 default: 3373 break; 3374 case 16: 3375 if (NumElts == 4) 3376 Opc = AArch64::FRINTAv4f16; 3377 else if (NumElts == 8) 3378 Opc = AArch64::FRINTAv8f16; 3379 break; 3380 case 32: 3381 if (NumElts == 2) 3382 Opc = AArch64::FRINTAv2f32; 3383 else if (NumElts == 4) 3384 Opc = AArch64::FRINTAv4f32; 3385 break; 3386 case 64: 3387 if (NumElts == 2) 3388 Opc = AArch64::FRINTAv2f64; 3389 break; 3390 } 3391 } 3392 3393 if (!Opc) { 3394 // Didn't get an opcode above, bail. 3395 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"); 3396 return false; 3397 } 3398 3399 // Legalization would have set us up perfectly for this; we just need to 3400 // set the opcode and move on. 3401 I.setDesc(TII.get(Opc)); 3402 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3403 } 3404 3405 bool AArch64InstructionSelector::selectVectorICmp( 3406 MachineInstr &I, MachineRegisterInfo &MRI) const { 3407 Register DstReg = I.getOperand(0).getReg(); 3408 LLT DstTy = MRI.getType(DstReg); 3409 Register SrcReg = I.getOperand(2).getReg(); 3410 Register Src2Reg = I.getOperand(3).getReg(); 3411 LLT SrcTy = MRI.getType(SrcReg); 3412 3413 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); 3414 unsigned NumElts = DstTy.getNumElements(); 3415 3416 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b 3417 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 3418 // Third index is cc opcode: 3419 // 0 == eq 3420 // 1 == ugt 3421 // 2 == uge 3422 // 3 == ult 3423 // 4 == ule 3424 // 5 == sgt 3425 // 6 == sge 3426 // 7 == slt 3427 // 8 == sle 3428 // ne is done by negating 'eq' result. 3429 3430 // This table below assumes that for some comparisons the operands will be 3431 // commuted. 3432 // ult op == commute + ugt op 3433 // ule op == commute + uge op 3434 // slt op == commute + sgt op 3435 // sle op == commute + sge op 3436 unsigned PredIdx = 0; 3437 bool SwapOperands = false; 3438 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 3439 switch (Pred) { 3440 case CmpInst::ICMP_NE: 3441 case CmpInst::ICMP_EQ: 3442 PredIdx = 0; 3443 break; 3444 case CmpInst::ICMP_UGT: 3445 PredIdx = 1; 3446 break; 3447 case CmpInst::ICMP_UGE: 3448 PredIdx = 2; 3449 break; 3450 case CmpInst::ICMP_ULT: 3451 PredIdx = 3; 3452 SwapOperands = true; 3453 break; 3454 case CmpInst::ICMP_ULE: 3455 PredIdx = 4; 3456 SwapOperands = true; 3457 break; 3458 case CmpInst::ICMP_SGT: 3459 PredIdx = 5; 3460 break; 3461 case CmpInst::ICMP_SGE: 3462 PredIdx = 6; 3463 break; 3464 case CmpInst::ICMP_SLT: 3465 PredIdx = 7; 3466 SwapOperands = true; 3467 break; 3468 case CmpInst::ICMP_SLE: 3469 PredIdx = 8; 3470 SwapOperands = true; 3471 break; 3472 default: 3473 llvm_unreachable("Unhandled icmp predicate"); 3474 return false; 3475 } 3476 3477 // This table obviously should be tablegen'd when we have our GISel native 3478 // tablegen selector. 3479 3480 static const unsigned OpcTable[4][4][9] = { 3481 { 3482 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3483 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3484 0 /* invalid */}, 3485 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3486 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3487 0 /* invalid */}, 3488 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, 3489 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, 3490 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, 3491 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, 3492 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, 3493 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} 3494 }, 3495 { 3496 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3497 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3498 0 /* invalid */}, 3499 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, 3500 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, 3501 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, 3502 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, 3503 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, 3504 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, 3505 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3506 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3507 0 /* invalid */} 3508 }, 3509 { 3510 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, 3511 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, 3512 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, 3513 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, 3514 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, 3515 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, 3516 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3517 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3518 0 /* invalid */}, 3519 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3520 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3521 0 /* invalid */} 3522 }, 3523 { 3524 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, 3525 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, 3526 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, 3527 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3528 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3529 0 /* invalid */}, 3530 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3531 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3532 0 /* invalid */}, 3533 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3534 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3535 0 /* invalid */} 3536 }, 3537 }; 3538 unsigned EltIdx = Log2_32(SrcEltSize / 8); 3539 unsigned NumEltsIdx = Log2_32(NumElts / 2); 3540 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; 3541 if (!Opc) { 3542 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode"); 3543 return false; 3544 } 3545 3546 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); 3547 const TargetRegisterClass *SrcRC = 3548 getRegClassForTypeOnBank(SrcTy, VecRB, RBI, true); 3549 if (!SrcRC) { 3550 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 3551 return false; 3552 } 3553 3554 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; 3555 if (SrcTy.getSizeInBits() == 128) 3556 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; 3557 3558 if (SwapOperands) 3559 std::swap(SrcReg, Src2Reg); 3560 3561 MachineIRBuilder MIB(I); 3562 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); 3563 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3564 3565 // Invert if we had a 'ne' cc. 3566 if (NotOpc) { 3567 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); 3568 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3569 } else { 3570 MIB.buildCopy(DstReg, Cmp.getReg(0)); 3571 } 3572 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); 3573 I.eraseFromParent(); 3574 return true; 3575 } 3576 3577 MachineInstr *AArch64InstructionSelector::emitScalarToVector( 3578 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, 3579 MachineIRBuilder &MIRBuilder) const { 3580 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); 3581 3582 auto BuildFn = [&](unsigned SubregIndex) { 3583 auto Ins = 3584 MIRBuilder 3585 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) 3586 .addImm(SubregIndex); 3587 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); 3588 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); 3589 return &*Ins; 3590 }; 3591 3592 switch (EltSize) { 3593 case 16: 3594 return BuildFn(AArch64::hsub); 3595 case 32: 3596 return BuildFn(AArch64::ssub); 3597 case 64: 3598 return BuildFn(AArch64::dsub); 3599 default: 3600 return nullptr; 3601 } 3602 } 3603 3604 bool AArch64InstructionSelector::selectMergeValues( 3605 MachineInstr &I, MachineRegisterInfo &MRI) const { 3606 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"); 3607 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3608 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 3609 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); 3610 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 3611 3612 if (I.getNumOperands() != 3) 3613 return false; 3614 3615 // Merging 2 s64s into an s128. 3616 if (DstTy == LLT::scalar(128)) { 3617 if (SrcTy.getSizeInBits() != 64) 3618 return false; 3619 MachineIRBuilder MIB(I); 3620 Register DstReg = I.getOperand(0).getReg(); 3621 Register Src1Reg = I.getOperand(1).getReg(); 3622 Register Src2Reg = I.getOperand(2).getReg(); 3623 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); 3624 MachineInstr *InsMI = 3625 emitLaneInsert(None, Tmp.getReg(0), Src1Reg, /* LaneIdx */ 0, RB, MIB); 3626 if (!InsMI) 3627 return false; 3628 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), 3629 Src2Reg, /* LaneIdx */ 1, RB, MIB); 3630 if (!Ins2MI) 3631 return false; 3632 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 3633 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); 3634 I.eraseFromParent(); 3635 return true; 3636 } 3637 3638 if (RB.getID() != AArch64::GPRRegBankID) 3639 return false; 3640 3641 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) 3642 return false; 3643 3644 auto *DstRC = &AArch64::GPR64RegClass; 3645 Register SubToRegDef = MRI.createVirtualRegister(DstRC); 3646 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 3647 TII.get(TargetOpcode::SUBREG_TO_REG)) 3648 .addDef(SubToRegDef) 3649 .addImm(0) 3650 .addUse(I.getOperand(1).getReg()) 3651 .addImm(AArch64::sub_32); 3652 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); 3653 // Need to anyext the second scalar before we can use bfm 3654 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 3655 TII.get(TargetOpcode::SUBREG_TO_REG)) 3656 .addDef(SubToRegDef2) 3657 .addImm(0) 3658 .addUse(I.getOperand(2).getReg()) 3659 .addImm(AArch64::sub_32); 3660 MachineInstr &BFM = 3661 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) 3662 .addDef(I.getOperand(0).getReg()) 3663 .addUse(SubToRegDef) 3664 .addUse(SubToRegDef2) 3665 .addImm(32) 3666 .addImm(31); 3667 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); 3668 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); 3669 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); 3670 I.eraseFromParent(); 3671 return true; 3672 } 3673 3674 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, 3675 const unsigned EltSize) { 3676 // Choose a lane copy opcode and subregister based off of the size of the 3677 // vector's elements. 3678 switch (EltSize) { 3679 case 16: 3680 CopyOpc = AArch64::CPYi16; 3681 ExtractSubReg = AArch64::hsub; 3682 break; 3683 case 32: 3684 CopyOpc = AArch64::CPYi32; 3685 ExtractSubReg = AArch64::ssub; 3686 break; 3687 case 64: 3688 CopyOpc = AArch64::CPYi64; 3689 ExtractSubReg = AArch64::dsub; 3690 break; 3691 default: 3692 // Unknown size, bail out. 3693 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n"); 3694 return false; 3695 } 3696 return true; 3697 } 3698 3699 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( 3700 Optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, 3701 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { 3702 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 3703 unsigned CopyOpc = 0; 3704 unsigned ExtractSubReg = 0; 3705 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { 3706 LLVM_DEBUG( 3707 dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); 3708 return nullptr; 3709 } 3710 3711 const TargetRegisterClass *DstRC = 3712 getRegClassForTypeOnBank(ScalarTy, DstRB, RBI, true); 3713 if (!DstRC) { 3714 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); 3715 return nullptr; 3716 } 3717 3718 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); 3719 const LLT &VecTy = MRI.getType(VecReg); 3720 const TargetRegisterClass *VecRC = 3721 getRegClassForTypeOnBank(VecTy, VecRB, RBI, true); 3722 if (!VecRC) { 3723 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 3724 return nullptr; 3725 } 3726 3727 // The register that we're going to copy into. 3728 Register InsertReg = VecReg; 3729 if (!DstReg) 3730 DstReg = MRI.createVirtualRegister(DstRC); 3731 // If the lane index is 0, we just use a subregister COPY. 3732 if (LaneIdx == 0) { 3733 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) 3734 .addReg(VecReg, 0, ExtractSubReg); 3735 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 3736 return &*Copy; 3737 } 3738 3739 // Lane copies require 128-bit wide registers. If we're dealing with an 3740 // unpacked vector, then we need to move up to that width. Insert an implicit 3741 // def and a subregister insert to get us there. 3742 if (VecTy.getSizeInBits() != 128) { 3743 MachineInstr *ScalarToVector = emitScalarToVector( 3744 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); 3745 if (!ScalarToVector) 3746 return nullptr; 3747 InsertReg = ScalarToVector->getOperand(0).getReg(); 3748 } 3749 3750 MachineInstr *LaneCopyMI = 3751 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); 3752 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); 3753 3754 // Make sure that we actually constrain the initial copy. 3755 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 3756 return LaneCopyMI; 3757 } 3758 3759 bool AArch64InstructionSelector::selectExtractElt( 3760 MachineInstr &I, MachineRegisterInfo &MRI) const { 3761 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && 3762 "unexpected opcode!"); 3763 Register DstReg = I.getOperand(0).getReg(); 3764 const LLT NarrowTy = MRI.getType(DstReg); 3765 const Register SrcReg = I.getOperand(1).getReg(); 3766 const LLT WideTy = MRI.getType(SrcReg); 3767 (void)WideTy; 3768 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && 3769 "source register size too small!"); 3770 assert(!NarrowTy.isVector() && "cannot extract vector into vector!"); 3771 3772 // Need the lane index to determine the correct copy opcode. 3773 MachineOperand &LaneIdxOp = I.getOperand(2); 3774 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?"); 3775 3776 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 3777 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n"); 3778 return false; 3779 } 3780 3781 // Find the index to extract from. 3782 auto VRegAndVal = getConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); 3783 if (!VRegAndVal) 3784 return false; 3785 unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 3786 3787 MachineIRBuilder MIRBuilder(I); 3788 3789 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 3790 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, 3791 LaneIdx, MIRBuilder); 3792 if (!Extract) 3793 return false; 3794 3795 I.eraseFromParent(); 3796 return true; 3797 } 3798 3799 bool AArch64InstructionSelector::selectSplitVectorUnmerge( 3800 MachineInstr &I, MachineRegisterInfo &MRI) const { 3801 unsigned NumElts = I.getNumOperands() - 1; 3802 Register SrcReg = I.getOperand(NumElts).getReg(); 3803 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3804 const LLT SrcTy = MRI.getType(SrcReg); 3805 3806 assert(NarrowTy.isVector() && "Expected an unmerge into vectors"); 3807 if (SrcTy.getSizeInBits() > 128) { 3808 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge"); 3809 return false; 3810 } 3811 3812 MachineIRBuilder MIB(I); 3813 3814 // We implement a split vector operation by treating the sub-vectors as 3815 // scalars and extracting them. 3816 const RegisterBank &DstRB = 3817 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); 3818 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { 3819 Register Dst = I.getOperand(OpIdx).getReg(); 3820 MachineInstr *Extract = 3821 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); 3822 if (!Extract) 3823 return false; 3824 } 3825 I.eraseFromParent(); 3826 return true; 3827 } 3828 3829 bool AArch64InstructionSelector::selectUnmergeValues( 3830 MachineInstr &I, MachineRegisterInfo &MRI) const { 3831 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 3832 "unexpected opcode"); 3833 3834 // TODO: Handle unmerging into GPRs and from scalars to scalars. 3835 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 3836 AArch64::FPRRegBankID || 3837 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 3838 AArch64::FPRRegBankID) { 3839 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " 3840 "currently unsupported.\n"); 3841 return false; 3842 } 3843 3844 // The last operand is the vector source register, and every other operand is 3845 // a register to unpack into. 3846 unsigned NumElts = I.getNumOperands() - 1; 3847 Register SrcReg = I.getOperand(NumElts).getReg(); 3848 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 3849 const LLT WideTy = MRI.getType(SrcReg); 3850 (void)WideTy; 3851 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && 3852 "can only unmerge from vector or s128 types!"); 3853 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && 3854 "source register size too small!"); 3855 3856 if (!NarrowTy.isScalar()) 3857 return selectSplitVectorUnmerge(I, MRI); 3858 3859 MachineIRBuilder MIB(I); 3860 3861 // Choose a lane copy opcode and subregister based off of the size of the 3862 // vector's elements. 3863 unsigned CopyOpc = 0; 3864 unsigned ExtractSubReg = 0; 3865 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) 3866 return false; 3867 3868 // Set up for the lane copies. 3869 MachineBasicBlock &MBB = *I.getParent(); 3870 3871 // Stores the registers we'll be copying from. 3872 SmallVector<Register, 4> InsertRegs; 3873 3874 // We'll use the first register twice, so we only need NumElts-1 registers. 3875 unsigned NumInsertRegs = NumElts - 1; 3876 3877 // If our elements fit into exactly 128 bits, then we can copy from the source 3878 // directly. Otherwise, we need to do a bit of setup with some subregister 3879 // inserts. 3880 if (NarrowTy.getSizeInBits() * NumElts == 128) { 3881 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); 3882 } else { 3883 // No. We have to perform subregister inserts. For each insert, create an 3884 // implicit def and a subregister insert, and save the register we create. 3885 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { 3886 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 3887 MachineInstr &ImpDefMI = 3888 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), 3889 ImpDefReg); 3890 3891 // Now, create the subregister insert from SrcReg. 3892 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 3893 MachineInstr &InsMI = 3894 *BuildMI(MBB, I, I.getDebugLoc(), 3895 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) 3896 .addUse(ImpDefReg) 3897 .addUse(SrcReg) 3898 .addImm(AArch64::dsub); 3899 3900 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); 3901 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); 3902 3903 // Save the register so that we can copy from it after. 3904 InsertRegs.push_back(InsertReg); 3905 } 3906 } 3907 3908 // Now that we've created any necessary subregister inserts, we can 3909 // create the copies. 3910 // 3911 // Perform the first copy separately as a subregister copy. 3912 Register CopyTo = I.getOperand(0).getReg(); 3913 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) 3914 .addReg(InsertRegs[0], 0, ExtractSubReg); 3915 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); 3916 3917 // Now, perform the remaining copies as vector lane copies. 3918 unsigned LaneIdx = 1; 3919 for (Register InsReg : InsertRegs) { 3920 Register CopyTo = I.getOperand(LaneIdx).getReg(); 3921 MachineInstr &CopyInst = 3922 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) 3923 .addUse(InsReg) 3924 .addImm(LaneIdx); 3925 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); 3926 ++LaneIdx; 3927 } 3928 3929 // Separately constrain the first copy's destination. Because of the 3930 // limitation in constrainOperandRegClass, we can't guarantee that this will 3931 // actually be constrained. So, do it ourselves using the second operand. 3932 const TargetRegisterClass *RC = 3933 MRI.getRegClassOrNull(I.getOperand(1).getReg()); 3934 if (!RC) { 3935 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n"); 3936 return false; 3937 } 3938 3939 RBI.constrainGenericRegister(CopyTo, *RC, MRI); 3940 I.eraseFromParent(); 3941 return true; 3942 } 3943 3944 bool AArch64InstructionSelector::selectConcatVectors( 3945 MachineInstr &I, MachineRegisterInfo &MRI) const { 3946 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 3947 "Unexpected opcode"); 3948 Register Dst = I.getOperand(0).getReg(); 3949 Register Op1 = I.getOperand(1).getReg(); 3950 Register Op2 = I.getOperand(2).getReg(); 3951 MachineIRBuilder MIRBuilder(I); 3952 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIRBuilder); 3953 if (!ConcatMI) 3954 return false; 3955 I.eraseFromParent(); 3956 return true; 3957 } 3958 3959 unsigned 3960 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, 3961 MachineFunction &MF) const { 3962 Type *CPTy = CPVal->getType(); 3963 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy); 3964 3965 MachineConstantPool *MCP = MF.getConstantPool(); 3966 return MCP->getConstantPoolIndex(CPVal, Alignment); 3967 } 3968 3969 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( 3970 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { 3971 unsigned CPIdx = emitConstantPoolEntry(CPVal, MIRBuilder.getMF()); 3972 3973 auto Adrp = 3974 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) 3975 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 3976 3977 MachineInstr *LoadMI = nullptr; 3978 switch (MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType())) { 3979 case 16: 3980 LoadMI = 3981 &*MIRBuilder 3982 .buildInstr(AArch64::LDRQui, {&AArch64::FPR128RegClass}, {Adrp}) 3983 .addConstantPoolIndex(CPIdx, 0, 3984 AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3985 break; 3986 case 8: 3987 LoadMI = &*MIRBuilder 3988 .buildInstr(AArch64::LDRDui, {&AArch64::FPR64RegClass}, {Adrp}) 3989 .addConstantPoolIndex( 3990 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 3991 break; 3992 default: 3993 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " 3994 << *CPVal->getType()); 3995 return nullptr; 3996 } 3997 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); 3998 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); 3999 return LoadMI; 4000 } 4001 4002 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given 4003 /// size and RB. 4004 static std::pair<unsigned, unsigned> 4005 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { 4006 unsigned Opc, SubregIdx; 4007 if (RB.getID() == AArch64::GPRRegBankID) { 4008 if (EltSize == 16) { 4009 Opc = AArch64::INSvi16gpr; 4010 SubregIdx = AArch64::ssub; 4011 } else if (EltSize == 32) { 4012 Opc = AArch64::INSvi32gpr; 4013 SubregIdx = AArch64::ssub; 4014 } else if (EltSize == 64) { 4015 Opc = AArch64::INSvi64gpr; 4016 SubregIdx = AArch64::dsub; 4017 } else { 4018 llvm_unreachable("invalid elt size!"); 4019 } 4020 } else { 4021 if (EltSize == 8) { 4022 Opc = AArch64::INSvi8lane; 4023 SubregIdx = AArch64::bsub; 4024 } else if (EltSize == 16) { 4025 Opc = AArch64::INSvi16lane; 4026 SubregIdx = AArch64::hsub; 4027 } else if (EltSize == 32) { 4028 Opc = AArch64::INSvi32lane; 4029 SubregIdx = AArch64::ssub; 4030 } else if (EltSize == 64) { 4031 Opc = AArch64::INSvi64lane; 4032 SubregIdx = AArch64::dsub; 4033 } else { 4034 llvm_unreachable("invalid elt size!"); 4035 } 4036 } 4037 return std::make_pair(Opc, SubregIdx); 4038 } 4039 4040 MachineInstr *AArch64InstructionSelector::emitInstr( 4041 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, 4042 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, 4043 const ComplexRendererFns &RenderFns) const { 4044 assert(Opcode && "Expected an opcode?"); 4045 assert(!isPreISelGenericOpcode(Opcode) && 4046 "Function should only be used to produce selected instructions!"); 4047 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); 4048 if (RenderFns) 4049 for (auto &Fn : *RenderFns) 4050 Fn(MI); 4051 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); 4052 return &*MI; 4053 } 4054 4055 MachineInstr *AArch64InstructionSelector::emitAddSub( 4056 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 4057 Register Dst, MachineOperand &LHS, MachineOperand &RHS, 4058 MachineIRBuilder &MIRBuilder) const { 4059 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4060 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 4061 auto Ty = MRI.getType(LHS.getReg()); 4062 assert(!Ty.isVector() && "Expected a scalar or pointer?"); 4063 unsigned Size = Ty.getSizeInBits(); 4064 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"); 4065 bool Is32Bit = Size == 32; 4066 4067 // INSTRri form with positive arithmetic immediate. 4068 if (auto Fns = selectArithImmed(RHS)) 4069 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, 4070 MIRBuilder, Fns); 4071 4072 // INSTRri form with negative arithmetic immediate. 4073 if (auto Fns = selectNegArithImmed(RHS)) 4074 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, 4075 MIRBuilder, Fns); 4076 4077 // INSTRrx form. 4078 if (auto Fns = selectArithExtendedRegister(RHS)) 4079 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, 4080 MIRBuilder, Fns); 4081 4082 // INSTRrs form. 4083 if (auto Fns = selectShiftedRegister(RHS)) 4084 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, 4085 MIRBuilder, Fns); 4086 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, 4087 MIRBuilder); 4088 } 4089 4090 MachineInstr * 4091 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, 4092 MachineOperand &RHS, 4093 MachineIRBuilder &MIRBuilder) const { 4094 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4095 {{AArch64::ADDXri, AArch64::ADDWri}, 4096 {AArch64::ADDXrs, AArch64::ADDWrs}, 4097 {AArch64::ADDXrr, AArch64::ADDWrr}, 4098 {AArch64::SUBXri, AArch64::SUBWri}, 4099 {AArch64::ADDXrx, AArch64::ADDWrx}}}; 4100 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); 4101 } 4102 4103 MachineInstr * 4104 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, 4105 MachineOperand &RHS, 4106 MachineIRBuilder &MIRBuilder) const { 4107 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4108 {{AArch64::ADDSXri, AArch64::ADDSWri}, 4109 {AArch64::ADDSXrs, AArch64::ADDSWrs}, 4110 {AArch64::ADDSXrr, AArch64::ADDSWrr}, 4111 {AArch64::SUBSXri, AArch64::SUBSWri}, 4112 {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; 4113 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 4114 } 4115 4116 MachineInstr * 4117 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, 4118 MachineOperand &RHS, 4119 MachineIRBuilder &MIRBuilder) const { 4120 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4121 {{AArch64::SUBSXri, AArch64::SUBSWri}, 4122 {AArch64::SUBSXrs, AArch64::SUBSWrs}, 4123 {AArch64::SUBSXrr, AArch64::SUBSWrr}, 4124 {AArch64::ADDSXri, AArch64::ADDSWri}, 4125 {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; 4126 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 4127 } 4128 4129 MachineInstr * 4130 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, 4131 MachineIRBuilder &MIRBuilder) const { 4132 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4133 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); 4134 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; 4135 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); 4136 } 4137 4138 MachineInstr * 4139 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, 4140 MachineIRBuilder &MIRBuilder) const { 4141 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 4142 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4143 LLT Ty = MRI.getType(LHS.getReg()); 4144 unsigned RegSize = Ty.getSizeInBits(); 4145 bool Is32Bit = (RegSize == 32); 4146 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, 4147 {AArch64::ANDSXrs, AArch64::ANDSWrs}, 4148 {AArch64::ANDSXrr, AArch64::ANDSWrr}}; 4149 // ANDS needs a logical immediate for its immediate form. Check if we can 4150 // fold one in. 4151 if (auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { 4152 int64_t Imm = ValAndVReg->Value.getSExtValue(); 4153 4154 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { 4155 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); 4156 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 4157 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 4158 return &*TstMI; 4159 } 4160 } 4161 4162 if (auto Fns = selectLogicalShiftedRegister(RHS)) 4163 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); 4164 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); 4165 } 4166 4167 MachineInstr *AArch64InstructionSelector::emitIntegerCompare( 4168 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 4169 MachineIRBuilder &MIRBuilder) const { 4170 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 4171 assert(Predicate.isPredicate() && "Expected predicate?"); 4172 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4173 LLT CmpTy = MRI.getType(LHS.getReg()); 4174 assert(!CmpTy.isVector() && "Expected scalar or pointer"); 4175 unsigned Size = CmpTy.getSizeInBits(); 4176 (void)Size; 4177 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"); 4178 // Fold the compare into a cmn or tst if possible. 4179 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) 4180 return FoldCmp; 4181 auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); 4182 return emitSUBS(Dst, LHS, RHS, MIRBuilder); 4183 } 4184 4185 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( 4186 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { 4187 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4188 #ifndef NDEBUG 4189 LLT Ty = MRI.getType(Dst); 4190 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && 4191 "Expected a 32-bit scalar register?"); 4192 #endif 4193 const Register ZeroReg = AArch64::WZR; 4194 auto EmitCSet = [&](Register CsetDst, AArch64CC::CondCode CC) { 4195 auto CSet = 4196 MIRBuilder.buildInstr(AArch64::CSINCWr, {CsetDst}, {ZeroReg, ZeroReg}) 4197 .addImm(getInvertedCondCode(CC)); 4198 constrainSelectedInstRegOperands(*CSet, TII, TRI, RBI); 4199 return &*CSet; 4200 }; 4201 4202 AArch64CC::CondCode CC1, CC2; 4203 changeFCMPPredToAArch64CC(Pred, CC1, CC2); 4204 if (CC2 == AArch64CC::AL) 4205 return EmitCSet(Dst, CC1); 4206 4207 const TargetRegisterClass *RC = &AArch64::GPR32RegClass; 4208 Register Def1Reg = MRI.createVirtualRegister(RC); 4209 Register Def2Reg = MRI.createVirtualRegister(RC); 4210 EmitCSet(Def1Reg, CC1); 4211 EmitCSet(Def2Reg, CC2); 4212 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); 4213 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); 4214 return &*OrMI; 4215 } 4216 4217 MachineInstr * 4218 AArch64InstructionSelector::emitFPCompare(Register LHS, Register RHS, 4219 MachineIRBuilder &MIRBuilder, 4220 Optional<CmpInst::Predicate> Pred) const { 4221 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4222 LLT Ty = MRI.getType(LHS); 4223 if (Ty.isVector()) 4224 return nullptr; 4225 unsigned OpSize = Ty.getSizeInBits(); 4226 if (OpSize != 32 && OpSize != 64) 4227 return nullptr; 4228 4229 // If this is a compare against +0.0, then we don't have 4230 // to explicitly materialize a constant. 4231 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); 4232 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); 4233 4234 auto IsEqualityPred = [](CmpInst::Predicate P) { 4235 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || 4236 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; 4237 }; 4238 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { 4239 // Try commutating the operands. 4240 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); 4241 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { 4242 ShouldUseImm = true; 4243 std::swap(LHS, RHS); 4244 } 4245 } 4246 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, 4247 {AArch64::FCMPSri, AArch64::FCMPDri}}; 4248 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; 4249 4250 // Partially build the compare. Decide if we need to add a use for the 4251 // third operand based off whether or not we're comparing against 0.0. 4252 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); 4253 if (!ShouldUseImm) 4254 CmpMI.addUse(RHS); 4255 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 4256 return &*CmpMI; 4257 } 4258 4259 MachineInstr *AArch64InstructionSelector::emitVectorConcat( 4260 Optional<Register> Dst, Register Op1, Register Op2, 4261 MachineIRBuilder &MIRBuilder) const { 4262 // We implement a vector concat by: 4263 // 1. Use scalar_to_vector to insert the lower vector into the larger dest 4264 // 2. Insert the upper vector into the destination's upper element 4265 // TODO: some of this code is common with G_BUILD_VECTOR handling. 4266 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4267 4268 const LLT Op1Ty = MRI.getType(Op1); 4269 const LLT Op2Ty = MRI.getType(Op2); 4270 4271 if (Op1Ty != Op2Ty) { 4272 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys"); 4273 return nullptr; 4274 } 4275 assert(Op1Ty.isVector() && "Expected a vector for vector concat"); 4276 4277 if (Op1Ty.getSizeInBits() >= 128) { 4278 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors"); 4279 return nullptr; 4280 } 4281 4282 // At the moment we just support 64 bit vector concats. 4283 if (Op1Ty.getSizeInBits() != 64) { 4284 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors"); 4285 return nullptr; 4286 } 4287 4288 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); 4289 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); 4290 const TargetRegisterClass *DstRC = 4291 getMinClassForRegBank(FPRBank, Op1Ty.getSizeInBits() * 2); 4292 4293 MachineInstr *WidenedOp1 = 4294 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); 4295 MachineInstr *WidenedOp2 = 4296 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); 4297 if (!WidenedOp1 || !WidenedOp2) { 4298 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value"); 4299 return nullptr; 4300 } 4301 4302 // Now do the insert of the upper element. 4303 unsigned InsertOpc, InsSubRegIdx; 4304 std::tie(InsertOpc, InsSubRegIdx) = 4305 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); 4306 4307 if (!Dst) 4308 Dst = MRI.createVirtualRegister(DstRC); 4309 auto InsElt = 4310 MIRBuilder 4311 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) 4312 .addImm(1) /* Lane index */ 4313 .addUse(WidenedOp2->getOperand(0).getReg()) 4314 .addImm(0); 4315 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 4316 return &*InsElt; 4317 } 4318 4319 MachineInstr *AArch64InstructionSelector::emitFMovForFConstant( 4320 MachineInstr &I, MachineRegisterInfo &MRI) const { 4321 assert(I.getOpcode() == TargetOpcode::G_FCONSTANT && 4322 "Expected a G_FCONSTANT!"); 4323 MachineOperand &ImmOp = I.getOperand(1); 4324 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); 4325 4326 // Only handle 32 and 64 bit defs for now. 4327 if (DefSize != 32 && DefSize != 64) 4328 return nullptr; 4329 4330 // Don't handle null values using FMOV. 4331 if (ImmOp.getFPImm()->isNullValue()) 4332 return nullptr; 4333 4334 // Get the immediate representation for the FMOV. 4335 const APFloat &ImmValAPF = ImmOp.getFPImm()->getValueAPF(); 4336 int Imm = DefSize == 32 ? AArch64_AM::getFP32Imm(ImmValAPF) 4337 : AArch64_AM::getFP64Imm(ImmValAPF); 4338 4339 // If this is -1, it means the immediate can't be represented as the requested 4340 // floating point value. Bail. 4341 if (Imm == -1) 4342 return nullptr; 4343 4344 // Update MI to represent the new FMOV instruction, constrain it, and return. 4345 ImmOp.ChangeToImmediate(Imm); 4346 unsigned MovOpc = DefSize == 32 ? AArch64::FMOVSi : AArch64::FMOVDi; 4347 I.setDesc(TII.get(MovOpc)); 4348 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 4349 return &I; 4350 } 4351 4352 MachineInstr * 4353 AArch64InstructionSelector::emitCSetForICMP(Register DefReg, unsigned Pred, 4354 MachineIRBuilder &MIRBuilder) const { 4355 // CSINC increments the result when the predicate is false. Invert it. 4356 const AArch64CC::CondCode InvCC = changeICMPPredToAArch64CC( 4357 CmpInst::getInversePredicate((CmpInst::Predicate)Pred)); 4358 auto I = 4359 MIRBuilder 4360 .buildInstr(AArch64::CSINCWr, {DefReg}, {Register(AArch64::WZR), Register(AArch64::WZR)}) 4361 .addImm(InvCC); 4362 constrainSelectedInstRegOperands(*I, TII, TRI, RBI); 4363 return &*I; 4364 } 4365 4366 std::pair<MachineInstr *, AArch64CC::CondCode> 4367 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, 4368 MachineOperand &LHS, 4369 MachineOperand &RHS, 4370 MachineIRBuilder &MIRBuilder) const { 4371 switch (Opcode) { 4372 default: 4373 llvm_unreachable("Unexpected opcode!"); 4374 case TargetOpcode::G_SADDO: 4375 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 4376 case TargetOpcode::G_UADDO: 4377 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); 4378 case TargetOpcode::G_SSUBO: 4379 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 4380 case TargetOpcode::G_USUBO: 4381 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); 4382 } 4383 } 4384 4385 bool AArch64InstructionSelector::tryOptSelect(MachineInstr &I) const { 4386 MachineIRBuilder MIB(I); 4387 MachineRegisterInfo &MRI = *MIB.getMRI(); 4388 // We want to recognize this pattern: 4389 // 4390 // $z = G_FCMP pred, $x, $y 4391 // ... 4392 // $w = G_SELECT $z, $a, $b 4393 // 4394 // Where the value of $z is *only* ever used by the G_SELECT (possibly with 4395 // some copies/truncs in between.) 4396 // 4397 // If we see this, then we can emit something like this: 4398 // 4399 // fcmp $x, $y 4400 // fcsel $w, $a, $b, pred 4401 // 4402 // Rather than emitting both of the rather long sequences in the standard 4403 // G_FCMP/G_SELECT select methods. 4404 4405 // First, check if the condition is defined by a compare. 4406 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); 4407 while (CondDef) { 4408 // We can only fold if all of the defs have one use. 4409 Register CondDefReg = CondDef->getOperand(0).getReg(); 4410 if (!MRI.hasOneNonDBGUse(CondDefReg)) { 4411 // Unless it's another select. 4412 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { 4413 if (CondDef == &UI) 4414 continue; 4415 if (UI.getOpcode() != TargetOpcode::G_SELECT) 4416 return false; 4417 } 4418 } 4419 4420 // We can skip over G_TRUNC since the condition is 1-bit. 4421 // Truncating/extending can have no impact on the value. 4422 unsigned Opc = CondDef->getOpcode(); 4423 if (Opc != TargetOpcode::COPY && Opc != TargetOpcode::G_TRUNC) 4424 break; 4425 4426 // Can't see past copies from physregs. 4427 if (Opc == TargetOpcode::COPY && 4428 Register::isPhysicalRegister(CondDef->getOperand(1).getReg())) 4429 return false; 4430 4431 CondDef = MRI.getVRegDef(CondDef->getOperand(1).getReg()); 4432 } 4433 4434 // Is the condition defined by a compare? 4435 if (!CondDef) 4436 return false; 4437 4438 unsigned CondOpc = CondDef->getOpcode(); 4439 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) 4440 return false; 4441 4442 AArch64CC::CondCode CondCode; 4443 if (CondOpc == TargetOpcode::G_ICMP) { 4444 auto Pred = 4445 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 4446 CondCode = changeICMPPredToAArch64CC(Pred); 4447 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), 4448 CondDef->getOperand(1), MIB); 4449 } else { 4450 // Get the condition code for the select. 4451 auto Pred = 4452 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 4453 AArch64CC::CondCode CondCode2; 4454 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); 4455 4456 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two 4457 // instructions to emit the comparison. 4458 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be 4459 // unnecessary. 4460 if (CondCode2 != AArch64CC::AL) 4461 return false; 4462 4463 if (!emitFPCompare(CondDef->getOperand(2).getReg(), 4464 CondDef->getOperand(3).getReg(), MIB)) { 4465 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); 4466 return false; 4467 } 4468 } 4469 4470 // Emit the select. 4471 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), 4472 I.getOperand(3).getReg(), CondCode, MIB); 4473 I.eraseFromParent(); 4474 return true; 4475 } 4476 4477 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( 4478 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 4479 MachineIRBuilder &MIRBuilder) const { 4480 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && 4481 "Unexpected MachineOperand"); 4482 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4483 // We want to find this sort of thing: 4484 // x = G_SUB 0, y 4485 // G_ICMP z, x 4486 // 4487 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. 4488 // e.g: 4489 // 4490 // cmn z, y 4491 4492 // Helper lambda to detect the subtract followed by the compare. 4493 // Takes in the def of the LHS or RHS, and checks if it's a subtract from 0. 4494 auto IsCMN = [&](MachineInstr *DefMI, const AArch64CC::CondCode &CC) { 4495 if (!DefMI || DefMI->getOpcode() != TargetOpcode::G_SUB) 4496 return false; 4497 4498 // Need to make sure NZCV is the same at the end of the transformation. 4499 if (CC != AArch64CC::EQ && CC != AArch64CC::NE) 4500 return false; 4501 4502 // We want to match against SUBs. 4503 if (DefMI->getOpcode() != TargetOpcode::G_SUB) 4504 return false; 4505 4506 // Make sure that we're getting 4507 // x = G_SUB 0, y 4508 auto ValAndVReg = 4509 getConstantVRegValWithLookThrough(DefMI->getOperand(1).getReg(), MRI); 4510 if (!ValAndVReg || ValAndVReg->Value != 0) 4511 return false; 4512 4513 // This can safely be represented as a CMN. 4514 return true; 4515 }; 4516 4517 // Check if the RHS or LHS of the G_ICMP is defined by a SUB 4518 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); 4519 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); 4520 CmpInst::Predicate P = (CmpInst::Predicate)Predicate.getPredicate(); 4521 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(P); 4522 4523 // Given this: 4524 // 4525 // x = G_SUB 0, y 4526 // G_ICMP x, z 4527 // 4528 // Produce this: 4529 // 4530 // cmn y, z 4531 if (IsCMN(LHSDef, CC)) 4532 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); 4533 4534 // Same idea here, but with the RHS of the compare instead: 4535 // 4536 // Given this: 4537 // 4538 // x = G_SUB 0, y 4539 // G_ICMP z, x 4540 // 4541 // Produce this: 4542 // 4543 // cmn z, y 4544 if (IsCMN(RHSDef, CC)) 4545 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); 4546 4547 // Given this: 4548 // 4549 // z = G_AND x, y 4550 // G_ICMP z, 0 4551 // 4552 // Produce this if the compare is signed: 4553 // 4554 // tst x, y 4555 if (!CmpInst::isUnsigned(P) && LHSDef && 4556 LHSDef->getOpcode() == TargetOpcode::G_AND) { 4557 // Make sure that the RHS is 0. 4558 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS.getReg(), MRI); 4559 if (!ValAndVReg || ValAndVReg->Value != 0) 4560 return nullptr; 4561 4562 return emitTST(LHSDef->getOperand(1), 4563 LHSDef->getOperand(2), MIRBuilder); 4564 } 4565 4566 return nullptr; 4567 } 4568 4569 bool AArch64InstructionSelector::selectShuffleVector( 4570 MachineInstr &I, MachineRegisterInfo &MRI) const { 4571 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 4572 Register Src1Reg = I.getOperand(1).getReg(); 4573 const LLT Src1Ty = MRI.getType(Src1Reg); 4574 Register Src2Reg = I.getOperand(2).getReg(); 4575 const LLT Src2Ty = MRI.getType(Src2Reg); 4576 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); 4577 4578 MachineBasicBlock &MBB = *I.getParent(); 4579 MachineFunction &MF = *MBB.getParent(); 4580 LLVMContext &Ctx = MF.getFunction().getContext(); 4581 4582 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if 4583 // it's originated from a <1 x T> type. Those should have been lowered into 4584 // G_BUILD_VECTOR earlier. 4585 if (!Src1Ty.isVector() || !Src2Ty.isVector()) { 4586 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"); 4587 return false; 4588 } 4589 4590 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; 4591 4592 SmallVector<Constant *, 64> CstIdxs; 4593 for (int Val : Mask) { 4594 // For now, any undef indexes we'll just assume to be 0. This should be 4595 // optimized in future, e.g. to select DUP etc. 4596 Val = Val < 0 ? 0 : Val; 4597 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { 4598 unsigned Offset = Byte + Val * BytesPerElt; 4599 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); 4600 } 4601 } 4602 4603 MachineIRBuilder MIRBuilder(I); 4604 4605 // Use a constant pool to load the index vector for TBL. 4606 Constant *CPVal = ConstantVector::get(CstIdxs); 4607 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIRBuilder); 4608 if (!IndexLoad) { 4609 LLVM_DEBUG(dbgs() << "Could not load from a constant pool"); 4610 return false; 4611 } 4612 4613 if (DstTy.getSizeInBits() != 128) { 4614 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); 4615 // This case can be done with TBL1. 4616 MachineInstr *Concat = emitVectorConcat(None, Src1Reg, Src2Reg, MIRBuilder); 4617 if (!Concat) { 4618 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); 4619 return false; 4620 } 4621 4622 // The constant pool load will be 64 bits, so need to convert to FPR128 reg. 4623 IndexLoad = 4624 emitScalarToVector(64, &AArch64::FPR128RegClass, 4625 IndexLoad->getOperand(0).getReg(), MIRBuilder); 4626 4627 auto TBL1 = MIRBuilder.buildInstr( 4628 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, 4629 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); 4630 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); 4631 4632 auto Copy = 4633 MIRBuilder 4634 .buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 4635 .addReg(TBL1.getReg(0), 0, AArch64::dsub); 4636 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); 4637 I.eraseFromParent(); 4638 return true; 4639 } 4640 4641 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive 4642 // Q registers for regalloc. 4643 auto RegSeq = MIRBuilder 4644 .buildInstr(TargetOpcode::REG_SEQUENCE, 4645 {&AArch64::QQRegClass}, {Src1Reg}) 4646 .addImm(AArch64::qsub0) 4647 .addUse(Src2Reg) 4648 .addImm(AArch64::qsub1); 4649 4650 auto TBL2 = MIRBuilder.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, 4651 {RegSeq, IndexLoad->getOperand(0)}); 4652 constrainSelectedInstRegOperands(*RegSeq, TII, TRI, RBI); 4653 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); 4654 I.eraseFromParent(); 4655 return true; 4656 } 4657 4658 MachineInstr *AArch64InstructionSelector::emitLaneInsert( 4659 Optional<Register> DstReg, Register SrcReg, Register EltReg, 4660 unsigned LaneIdx, const RegisterBank &RB, 4661 MachineIRBuilder &MIRBuilder) const { 4662 MachineInstr *InsElt = nullptr; 4663 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 4664 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4665 4666 // Create a register to define with the insert if one wasn't passed in. 4667 if (!DstReg) 4668 DstReg = MRI.createVirtualRegister(DstRC); 4669 4670 unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); 4671 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; 4672 4673 if (RB.getID() == AArch64::FPRRegBankID) { 4674 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); 4675 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 4676 .addImm(LaneIdx) 4677 .addUse(InsSub->getOperand(0).getReg()) 4678 .addImm(0); 4679 } else { 4680 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 4681 .addImm(LaneIdx) 4682 .addUse(EltReg); 4683 } 4684 4685 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 4686 return InsElt; 4687 } 4688 4689 bool AArch64InstructionSelector::selectInsertElt( 4690 MachineInstr &I, MachineRegisterInfo &MRI) const { 4691 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); 4692 4693 // Get information on the destination. 4694 Register DstReg = I.getOperand(0).getReg(); 4695 const LLT DstTy = MRI.getType(DstReg); 4696 unsigned VecSize = DstTy.getSizeInBits(); 4697 4698 // Get information on the element we want to insert into the destination. 4699 Register EltReg = I.getOperand(2).getReg(); 4700 const LLT EltTy = MRI.getType(EltReg); 4701 unsigned EltSize = EltTy.getSizeInBits(); 4702 if (EltSize < 16 || EltSize > 64) 4703 return false; // Don't support all element types yet. 4704 4705 // Find the definition of the index. Bail out if it's not defined by a 4706 // G_CONSTANT. 4707 Register IdxReg = I.getOperand(3).getReg(); 4708 auto VRegAndVal = getConstantVRegValWithLookThrough(IdxReg, MRI); 4709 if (!VRegAndVal) 4710 return false; 4711 unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 4712 4713 // Perform the lane insert. 4714 Register SrcReg = I.getOperand(1).getReg(); 4715 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); 4716 MachineIRBuilder MIRBuilder(I); 4717 4718 if (VecSize < 128) { 4719 // If the vector we're inserting into is smaller than 128 bits, widen it 4720 // to 128 to do the insert. 4721 MachineInstr *ScalarToVec = emitScalarToVector( 4722 VecSize, &AArch64::FPR128RegClass, SrcReg, MIRBuilder); 4723 if (!ScalarToVec) 4724 return false; 4725 SrcReg = ScalarToVec->getOperand(0).getReg(); 4726 } 4727 4728 // Create an insert into a new FPR128 register. 4729 // Note that if our vector is already 128 bits, we end up emitting an extra 4730 // register. 4731 MachineInstr *InsMI = 4732 emitLaneInsert(None, SrcReg, EltReg, LaneIdx, EltRB, MIRBuilder); 4733 4734 if (VecSize < 128) { 4735 // If we had to widen to perform the insert, then we have to demote back to 4736 // the original size to get the result we want. 4737 Register DemoteVec = InsMI->getOperand(0).getReg(); 4738 const TargetRegisterClass *RC = 4739 getMinClassForRegBank(*RBI.getRegBank(DemoteVec, MRI, TRI), VecSize); 4740 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 4741 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 4742 return false; 4743 } 4744 unsigned SubReg = 0; 4745 if (!getSubRegForClass(RC, TRI, SubReg)) 4746 return false; 4747 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 4748 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize 4749 << "\n"); 4750 return false; 4751 } 4752 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 4753 .addReg(DemoteVec, 0, SubReg); 4754 RBI.constrainGenericRegister(DstReg, *RC, MRI); 4755 } else { 4756 // No widening needed. 4757 InsMI->getOperand(0).setReg(DstReg); 4758 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 4759 } 4760 4761 I.eraseFromParent(); 4762 return true; 4763 } 4764 4765 bool AArch64InstructionSelector::tryOptConstantBuildVec( 4766 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) const { 4767 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 4768 unsigned DstSize = DstTy.getSizeInBits(); 4769 assert(DstSize <= 128 && "Unexpected build_vec type!"); 4770 if (DstSize < 32) 4771 return false; 4772 // Check if we're building a constant vector, in which case we want to 4773 // generate a constant pool load instead of a vector insert sequence. 4774 SmallVector<Constant *, 16> Csts; 4775 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { 4776 // Try to find G_CONSTANT or G_FCONSTANT 4777 auto *OpMI = 4778 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI); 4779 if (OpMI) 4780 Csts.emplace_back( 4781 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm())); 4782 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT, 4783 I.getOperand(Idx).getReg(), MRI))) 4784 Csts.emplace_back( 4785 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm())); 4786 else 4787 return false; 4788 } 4789 Constant *CV = ConstantVector::get(Csts); 4790 MachineIRBuilder MIB(I); 4791 if (CV->isNullValue()) { 4792 // Until the importer can support immAllZerosV in pattern leaf nodes, 4793 // select a zero move manually here. 4794 Register DstReg = I.getOperand(0).getReg(); 4795 if (DstSize == 128) { 4796 auto Mov = MIB.buildInstr(AArch64::MOVIv2d_ns, {DstReg}, {}).addImm(0); 4797 I.eraseFromParent(); 4798 return constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); 4799 } else if (DstSize == 64) { 4800 auto Mov = 4801 MIB.buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) 4802 .addImm(0); 4803 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 4804 .addReg(Mov.getReg(0), 0, AArch64::dsub); 4805 I.eraseFromParent(); 4806 return RBI.constrainGenericRegister(DstReg, AArch64::FPR64RegClass, MRI); 4807 } 4808 } 4809 auto *CPLoad = emitLoadFromConstantPool(CV, MIB); 4810 if (!CPLoad) { 4811 LLVM_DEBUG(dbgs() << "Could not generate cp load for build_vector"); 4812 return false; 4813 } 4814 MIB.buildCopy(I.getOperand(0), CPLoad->getOperand(0)); 4815 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 4816 *MRI.getRegClass(CPLoad->getOperand(0).getReg()), 4817 MRI); 4818 I.eraseFromParent(); 4819 return true; 4820 } 4821 4822 bool AArch64InstructionSelector::selectBuildVector( 4823 MachineInstr &I, MachineRegisterInfo &MRI) const { 4824 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 4825 // Until we port more of the optimized selections, for now just use a vector 4826 // insert sequence. 4827 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 4828 const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); 4829 unsigned EltSize = EltTy.getSizeInBits(); 4830 4831 if (tryOptConstantBuildVec(I, DstTy, MRI)) 4832 return true; 4833 if (EltSize < 16 || EltSize > 64) 4834 return false; // Don't support all element types yet. 4835 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 4836 MachineIRBuilder MIRBuilder(I); 4837 4838 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 4839 MachineInstr *ScalarToVec = 4840 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, 4841 I.getOperand(1).getReg(), MIRBuilder); 4842 if (!ScalarToVec) 4843 return false; 4844 4845 Register DstVec = ScalarToVec->getOperand(0).getReg(); 4846 unsigned DstSize = DstTy.getSizeInBits(); 4847 4848 // Keep track of the last MI we inserted. Later on, we might be able to save 4849 // a copy using it. 4850 MachineInstr *PrevMI = nullptr; 4851 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { 4852 // Note that if we don't do a subregister copy, we can end up making an 4853 // extra register. 4854 PrevMI = &*emitLaneInsert(None, DstVec, I.getOperand(i).getReg(), i - 1, RB, 4855 MIRBuilder); 4856 DstVec = PrevMI->getOperand(0).getReg(); 4857 } 4858 4859 // If DstTy's size in bits is less than 128, then emit a subregister copy 4860 // from DstVec to the last register we've defined. 4861 if (DstSize < 128) { 4862 // Force this to be FPR using the destination vector. 4863 const TargetRegisterClass *RC = 4864 getMinClassForRegBank(*RBI.getRegBank(DstVec, MRI, TRI), DstSize); 4865 if (!RC) 4866 return false; 4867 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 4868 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 4869 return false; 4870 } 4871 4872 unsigned SubReg = 0; 4873 if (!getSubRegForClass(RC, TRI, SubReg)) 4874 return false; 4875 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 4876 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize 4877 << "\n"); 4878 return false; 4879 } 4880 4881 Register Reg = MRI.createVirtualRegister(RC); 4882 Register DstReg = I.getOperand(0).getReg(); 4883 4884 MIRBuilder.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 4885 .addReg(DstVec, 0, SubReg); 4886 MachineOperand &RegOp = I.getOperand(1); 4887 RegOp.setReg(Reg); 4888 RBI.constrainGenericRegister(DstReg, *RC, MRI); 4889 } else { 4890 // We don't need a subregister copy. Save a copy by re-using the 4891 // destination register on the final insert. 4892 assert(PrevMI && "PrevMI was null?"); 4893 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); 4894 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); 4895 } 4896 4897 I.eraseFromParent(); 4898 return true; 4899 } 4900 4901 /// Helper function to find an intrinsic ID on an a MachineInstr. Returns the 4902 /// ID if it exists, and 0 otherwise. 4903 static unsigned findIntrinsicID(MachineInstr &I) { 4904 auto IntrinOp = find_if(I.operands(), [&](const MachineOperand &Op) { 4905 return Op.isIntrinsicID(); 4906 }); 4907 if (IntrinOp == I.operands_end()) 4908 return 0; 4909 return IntrinOp->getIntrinsicID(); 4910 } 4911 4912 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( 4913 MachineInstr &I, MachineRegisterInfo &MRI) const { 4914 // Find the intrinsic ID. 4915 unsigned IntrinID = findIntrinsicID(I); 4916 if (!IntrinID) 4917 return false; 4918 MachineIRBuilder MIRBuilder(I); 4919 4920 // Select the instruction. 4921 switch (IntrinID) { 4922 default: 4923 return false; 4924 case Intrinsic::trap: 4925 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(1); 4926 break; 4927 case Intrinsic::debugtrap: 4928 MIRBuilder.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); 4929 break; 4930 case Intrinsic::ubsantrap: 4931 MIRBuilder.buildInstr(AArch64::BRK, {}, {}) 4932 .addImm(I.getOperand(1).getImm() | ('U' << 8)); 4933 break; 4934 } 4935 4936 I.eraseFromParent(); 4937 return true; 4938 } 4939 4940 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, 4941 MachineRegisterInfo &MRI) { 4942 unsigned IntrinID = findIntrinsicID(I); 4943 if (!IntrinID) 4944 return false; 4945 MachineIRBuilder MIRBuilder(I); 4946 4947 switch (IntrinID) { 4948 default: 4949 break; 4950 case Intrinsic::aarch64_crypto_sha1h: { 4951 Register DstReg = I.getOperand(0).getReg(); 4952 Register SrcReg = I.getOperand(2).getReg(); 4953 4954 // FIXME: Should this be an assert? 4955 if (MRI.getType(DstReg).getSizeInBits() != 32 || 4956 MRI.getType(SrcReg).getSizeInBits() != 32) 4957 return false; 4958 4959 // The operation has to happen on FPRs. Set up some new FPR registers for 4960 // the source and destination if they are on GPRs. 4961 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 4962 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 4963 MIRBuilder.buildCopy({SrcReg}, {I.getOperand(2)}); 4964 4965 // Make sure the copy ends up getting constrained properly. 4966 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 4967 AArch64::GPR32RegClass, MRI); 4968 } 4969 4970 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) 4971 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 4972 4973 // Actually insert the instruction. 4974 auto SHA1Inst = MIRBuilder.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); 4975 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); 4976 4977 // Did we create a new register for the destination? 4978 if (DstReg != I.getOperand(0).getReg()) { 4979 // Yep. Copy the result of the instruction back into the original 4980 // destination. 4981 MIRBuilder.buildCopy({I.getOperand(0)}, {DstReg}); 4982 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 4983 AArch64::GPR32RegClass, MRI); 4984 } 4985 4986 I.eraseFromParent(); 4987 return true; 4988 } 4989 case Intrinsic::frameaddress: 4990 case Intrinsic::returnaddress: { 4991 MachineFunction &MF = *I.getParent()->getParent(); 4992 MachineFrameInfo &MFI = MF.getFrameInfo(); 4993 4994 unsigned Depth = I.getOperand(2).getImm(); 4995 Register DstReg = I.getOperand(0).getReg(); 4996 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); 4997 4998 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { 4999 if (!MFReturnAddr) { 5000 // Insert the copy from LR/X30 into the entry block, before it can be 5001 // clobbered by anything. 5002 MFI.setReturnAddressIsTaken(true); 5003 MFReturnAddr = getFunctionLiveInPhysReg(MF, TII, AArch64::LR, 5004 AArch64::GPR64RegClass); 5005 } 5006 5007 if (STI.hasPAuth()) { 5008 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); 5009 } else { 5010 MIRBuilder.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); 5011 MIRBuilder.buildInstr(AArch64::XPACLRI); 5012 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); 5013 } 5014 5015 I.eraseFromParent(); 5016 return true; 5017 } 5018 5019 MFI.setFrameAddressIsTaken(true); 5020 Register FrameAddr(AArch64::FP); 5021 while (Depth--) { 5022 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 5023 auto Ldr = 5024 MIRBuilder.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}) 5025 .addImm(0); 5026 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); 5027 FrameAddr = NextFrame; 5028 } 5029 5030 if (IntrinID == Intrinsic::frameaddress) 5031 MIRBuilder.buildCopy({DstReg}, {FrameAddr}); 5032 else { 5033 MFI.setReturnAddressIsTaken(true); 5034 5035 if (STI.hasPAuth()) { 5036 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 5037 MIRBuilder.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); 5038 MIRBuilder.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); 5039 } else { 5040 MIRBuilder.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}).addImm(1); 5041 MIRBuilder.buildInstr(AArch64::XPACLRI); 5042 MIRBuilder.buildCopy({DstReg}, {Register(AArch64::LR)}); 5043 } 5044 } 5045 5046 I.eraseFromParent(); 5047 return true; 5048 } 5049 } 5050 return false; 5051 } 5052 5053 InstructionSelector::ComplexRendererFns 5054 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { 5055 auto MaybeImmed = getImmedFromMO(Root); 5056 if (MaybeImmed == None || *MaybeImmed > 31) 5057 return None; 5058 uint64_t Enc = (32 - *MaybeImmed) & 0x1f; 5059 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5060 } 5061 5062 InstructionSelector::ComplexRendererFns 5063 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { 5064 auto MaybeImmed = getImmedFromMO(Root); 5065 if (MaybeImmed == None || *MaybeImmed > 31) 5066 return None; 5067 uint64_t Enc = 31 - *MaybeImmed; 5068 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5069 } 5070 5071 InstructionSelector::ComplexRendererFns 5072 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { 5073 auto MaybeImmed = getImmedFromMO(Root); 5074 if (MaybeImmed == None || *MaybeImmed > 63) 5075 return None; 5076 uint64_t Enc = (64 - *MaybeImmed) & 0x3f; 5077 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5078 } 5079 5080 InstructionSelector::ComplexRendererFns 5081 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { 5082 auto MaybeImmed = getImmedFromMO(Root); 5083 if (MaybeImmed == None || *MaybeImmed > 63) 5084 return None; 5085 uint64_t Enc = 63 - *MaybeImmed; 5086 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 5087 } 5088 5089 /// Helper to select an immediate value that can be represented as a 12-bit 5090 /// value shifted left by either 0 or 12. If it is possible to do so, return 5091 /// the immediate and shift value. If not, return None. 5092 /// 5093 /// Used by selectArithImmed and selectNegArithImmed. 5094 InstructionSelector::ComplexRendererFns 5095 AArch64InstructionSelector::select12BitValueWithLeftShift( 5096 uint64_t Immed) const { 5097 unsigned ShiftAmt; 5098 if (Immed >> 12 == 0) { 5099 ShiftAmt = 0; 5100 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 5101 ShiftAmt = 12; 5102 Immed = Immed >> 12; 5103 } else 5104 return None; 5105 5106 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 5107 return {{ 5108 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, 5109 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, 5110 }}; 5111 } 5112 5113 /// SelectArithImmed - Select an immediate value that can be represented as 5114 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 5115 /// Val set to the 12-bit value and Shift set to the shifter operand. 5116 InstructionSelector::ComplexRendererFns 5117 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { 5118 // This function is called from the addsub_shifted_imm ComplexPattern, 5119 // which lists [imm] as the list of opcode it's interested in, however 5120 // we still need to check whether the operand is actually an immediate 5121 // here because the ComplexPattern opcode list is only used in 5122 // root-level opcode matching. 5123 auto MaybeImmed = getImmedFromMO(Root); 5124 if (MaybeImmed == None) 5125 return None; 5126 return select12BitValueWithLeftShift(*MaybeImmed); 5127 } 5128 5129 /// SelectNegArithImmed - As above, but negates the value before trying to 5130 /// select it. 5131 InstructionSelector::ComplexRendererFns 5132 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { 5133 // We need a register here, because we need to know if we have a 64 or 32 5134 // bit immediate. 5135 if (!Root.isReg()) 5136 return None; 5137 auto MaybeImmed = getImmedFromMO(Root); 5138 if (MaybeImmed == None) 5139 return None; 5140 uint64_t Immed = *MaybeImmed; 5141 5142 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 5143 // have the opposite effect on the C flag, so this pattern mustn't match under 5144 // those circumstances. 5145 if (Immed == 0) 5146 return None; 5147 5148 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on 5149 // the root. 5150 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5151 if (MRI.getType(Root.getReg()).getSizeInBits() == 32) 5152 Immed = ~((uint32_t)Immed) + 1; 5153 else 5154 Immed = ~Immed + 1ULL; 5155 5156 if (Immed & 0xFFFFFFFFFF000000ULL) 5157 return None; 5158 5159 Immed &= 0xFFFFFFULL; 5160 return select12BitValueWithLeftShift(Immed); 5161 } 5162 5163 /// Return true if it is worth folding MI into an extended register. That is, 5164 /// if it's safe to pull it into the addressing mode of a load or store as a 5165 /// shift. 5166 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( 5167 MachineInstr &MI, const MachineRegisterInfo &MRI) const { 5168 // Always fold if there is one use, or if we're optimizing for size. 5169 Register DefReg = MI.getOperand(0).getReg(); 5170 if (MRI.hasOneNonDBGUse(DefReg) || 5171 MI.getParent()->getParent()->getFunction().hasMinSize()) 5172 return true; 5173 5174 // It's better to avoid folding and recomputing shifts when we don't have a 5175 // fastpath. 5176 if (!STI.hasLSLFast()) 5177 return false; 5178 5179 // We have a fastpath, so folding a shift in and potentially computing it 5180 // many times may be beneficial. Check if this is only used in memory ops. 5181 // If it is, then we should fold. 5182 return all_of(MRI.use_nodbg_instructions(DefReg), 5183 [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); 5184 } 5185 5186 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { 5187 switch (Type) { 5188 case AArch64_AM::SXTB: 5189 case AArch64_AM::SXTH: 5190 case AArch64_AM::SXTW: 5191 return true; 5192 default: 5193 return false; 5194 } 5195 } 5196 5197 InstructionSelector::ComplexRendererFns 5198 AArch64InstructionSelector::selectExtendedSHL( 5199 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, 5200 unsigned SizeInBytes, bool WantsExt) const { 5201 assert(Base.isReg() && "Expected base to be a register operand"); 5202 assert(Offset.isReg() && "Expected offset to be a register operand"); 5203 5204 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5205 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); 5206 if (!OffsetInst) 5207 return None; 5208 5209 unsigned OffsetOpc = OffsetInst->getOpcode(); 5210 bool LookedThroughZExt = false; 5211 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { 5212 // Try to look through a ZEXT. 5213 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) 5214 return None; 5215 5216 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg()); 5217 OffsetOpc = OffsetInst->getOpcode(); 5218 LookedThroughZExt = true; 5219 5220 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) 5221 return None; 5222 } 5223 // Make sure that the memory op is a valid size. 5224 int64_t LegalShiftVal = Log2_32(SizeInBytes); 5225 if (LegalShiftVal == 0) 5226 return None; 5227 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 5228 return None; 5229 5230 // Now, try to find the specific G_CONSTANT. Start by assuming that the 5231 // register we will offset is the LHS, and the register containing the 5232 // constant is the RHS. 5233 Register OffsetReg = OffsetInst->getOperand(1).getReg(); 5234 Register ConstantReg = OffsetInst->getOperand(2).getReg(); 5235 auto ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 5236 if (!ValAndVReg) { 5237 // We didn't get a constant on the RHS. If the opcode is a shift, then 5238 // we're done. 5239 if (OffsetOpc == TargetOpcode::G_SHL) 5240 return None; 5241 5242 // If we have a G_MUL, we can use either register. Try looking at the RHS. 5243 std::swap(OffsetReg, ConstantReg); 5244 ValAndVReg = getConstantVRegValWithLookThrough(ConstantReg, MRI); 5245 if (!ValAndVReg) 5246 return None; 5247 } 5248 5249 // The value must fit into 3 bits, and must be positive. Make sure that is 5250 // true. 5251 int64_t ImmVal = ValAndVReg->Value.getSExtValue(); 5252 5253 // Since we're going to pull this into a shift, the constant value must be 5254 // a power of 2. If we got a multiply, then we need to check this. 5255 if (OffsetOpc == TargetOpcode::G_MUL) { 5256 if (!isPowerOf2_32(ImmVal)) 5257 return None; 5258 5259 // Got a power of 2. So, the amount we'll shift is the log base-2 of that. 5260 ImmVal = Log2_32(ImmVal); 5261 } 5262 5263 if ((ImmVal & 0x7) != ImmVal) 5264 return None; 5265 5266 // We are only allowed to shift by LegalShiftVal. This shift value is built 5267 // into the instruction, so we can't just use whatever we want. 5268 if (ImmVal != LegalShiftVal) 5269 return None; 5270 5271 unsigned SignExtend = 0; 5272 if (WantsExt) { 5273 // Check if the offset is defined by an extend, unless we looked through a 5274 // G_ZEXT earlier. 5275 if (!LookedThroughZExt) { 5276 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); 5277 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); 5278 if (Ext == AArch64_AM::InvalidShiftExtend) 5279 return None; 5280 5281 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; 5282 // We only support SXTW for signed extension here. 5283 if (SignExtend && Ext != AArch64_AM::SXTW) 5284 return None; 5285 OffsetReg = ExtInst->getOperand(1).getReg(); 5286 } 5287 5288 // Need a 32-bit wide register here. 5289 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); 5290 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); 5291 } 5292 5293 // We can use the LHS of the GEP as the base, and the LHS of the shift as an 5294 // offset. Signify that we are shifting by setting the shift flag to 1. 5295 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, 5296 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, 5297 [=](MachineInstrBuilder &MIB) { 5298 // Need to add both immediates here to make sure that they are both 5299 // added to the instruction. 5300 MIB.addImm(SignExtend); 5301 MIB.addImm(1); 5302 }}}; 5303 } 5304 5305 /// This is used for computing addresses like this: 5306 /// 5307 /// ldr x1, [x2, x3, lsl #3] 5308 /// 5309 /// Where x2 is the base register, and x3 is an offset register. The shift-left 5310 /// is a constant value specific to this load instruction. That is, we'll never 5311 /// see anything other than a 3 here (which corresponds to the size of the 5312 /// element being loaded.) 5313 InstructionSelector::ComplexRendererFns 5314 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( 5315 MachineOperand &Root, unsigned SizeInBytes) const { 5316 if (!Root.isReg()) 5317 return None; 5318 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5319 5320 // We want to find something like this: 5321 // 5322 // val = G_CONSTANT LegalShiftVal 5323 // shift = G_SHL off_reg val 5324 // ptr = G_PTR_ADD base_reg shift 5325 // x = G_LOAD ptr 5326 // 5327 // And fold it into this addressing mode: 5328 // 5329 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] 5330 5331 // Check if we can find the G_PTR_ADD. 5332 MachineInstr *PtrAdd = 5333 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5334 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 5335 return None; 5336 5337 // Now, try to match an opcode which will match our specific offset. 5338 // We want a G_SHL or a G_MUL. 5339 MachineInstr *OffsetInst = 5340 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); 5341 return selectExtendedSHL(Root, PtrAdd->getOperand(1), 5342 OffsetInst->getOperand(0), SizeInBytes, 5343 /*WantsExt=*/false); 5344 } 5345 5346 /// This is used for computing addresses like this: 5347 /// 5348 /// ldr x1, [x2, x3] 5349 /// 5350 /// Where x2 is the base register, and x3 is an offset register. 5351 /// 5352 /// When possible (or profitable) to fold a G_PTR_ADD into the address calculation, 5353 /// this will do so. Otherwise, it will return None. 5354 InstructionSelector::ComplexRendererFns 5355 AArch64InstructionSelector::selectAddrModeRegisterOffset( 5356 MachineOperand &Root) const { 5357 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5358 5359 // We need a GEP. 5360 MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); 5361 if (!Gep || Gep->getOpcode() != TargetOpcode::G_PTR_ADD) 5362 return None; 5363 5364 // If this is used more than once, let's not bother folding. 5365 // TODO: Check if they are memory ops. If they are, then we can still fold 5366 // without having to recompute anything. 5367 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg())) 5368 return None; 5369 5370 // Base is the GEP's LHS, offset is its RHS. 5371 return {{[=](MachineInstrBuilder &MIB) { 5372 MIB.addUse(Gep->getOperand(1).getReg()); 5373 }, 5374 [=](MachineInstrBuilder &MIB) { 5375 MIB.addUse(Gep->getOperand(2).getReg()); 5376 }, 5377 [=](MachineInstrBuilder &MIB) { 5378 // Need to add both immediates here to make sure that they are both 5379 // added to the instruction. 5380 MIB.addImm(0); 5381 MIB.addImm(0); 5382 }}}; 5383 } 5384 5385 /// This is intended to be equivalent to selectAddrModeXRO in 5386 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. 5387 InstructionSelector::ComplexRendererFns 5388 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, 5389 unsigned SizeInBytes) const { 5390 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5391 if (!Root.isReg()) 5392 return None; 5393 MachineInstr *PtrAdd = 5394 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5395 if (!PtrAdd) 5396 return None; 5397 5398 // Check for an immediates which cannot be encoded in the [base + imm] 5399 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll 5400 // end up with code like: 5401 // 5402 // mov x0, wide 5403 // add x1 base, x0 5404 // ldr x2, [x1, x0] 5405 // 5406 // In this situation, we can use the [base, xreg] addressing mode to save an 5407 // add/sub: 5408 // 5409 // mov x0, wide 5410 // ldr x2, [base, x0] 5411 auto ValAndVReg = 5412 getConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); 5413 if (ValAndVReg) { 5414 unsigned Scale = Log2_32(SizeInBytes); 5415 int64_t ImmOff = ValAndVReg->Value.getSExtValue(); 5416 5417 // Skip immediates that can be selected in the load/store addresing 5418 // mode. 5419 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && 5420 ImmOff < (0x1000 << Scale)) 5421 return None; 5422 5423 // Helper lambda to decide whether or not it is preferable to emit an add. 5424 auto isPreferredADD = [](int64_t ImmOff) { 5425 // Constants in [0x0, 0xfff] can be encoded in an add. 5426 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 5427 return true; 5428 5429 // Can it be encoded in an add lsl #12? 5430 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) 5431 return false; 5432 5433 // It can be encoded in an add lsl #12, but we may not want to. If it is 5434 // possible to select this as a single movz, then prefer that. A single 5435 // movz is faster than an add with a shift. 5436 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 5437 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 5438 }; 5439 5440 // If the immediate can be encoded in a single add/sub, then bail out. 5441 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 5442 return None; 5443 } 5444 5445 // Try to fold shifts into the addressing mode. 5446 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); 5447 if (AddrModeFns) 5448 return AddrModeFns; 5449 5450 // If that doesn't work, see if it's possible to fold in registers from 5451 // a GEP. 5452 return selectAddrModeRegisterOffset(Root); 5453 } 5454 5455 /// This is used for computing addresses like this: 5456 /// 5457 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] 5458 /// 5459 /// Where we have a 64-bit base register, a 32-bit offset register, and an 5460 /// extend (which may or may not be signed). 5461 InstructionSelector::ComplexRendererFns 5462 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, 5463 unsigned SizeInBytes) const { 5464 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 5465 5466 MachineInstr *PtrAdd = 5467 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 5468 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 5469 return None; 5470 5471 MachineOperand &LHS = PtrAdd->getOperand(1); 5472 MachineOperand &RHS = PtrAdd->getOperand(2); 5473 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); 5474 5475 // The first case is the same as selectAddrModeXRO, except we need an extend. 5476 // In this case, we try to find a shift and extend, and fold them into the 5477 // addressing mode. 5478 // 5479 // E.g. 5480 // 5481 // off_reg = G_Z/S/ANYEXT ext_reg 5482 // val = G_CONSTANT LegalShiftVal 5483 // shift = G_SHL off_reg val 5484 // ptr = G_PTR_ADD base_reg shift 5485 // x = G_LOAD ptr 5486 // 5487 // In this case we can get a load like this: 5488 // 5489 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] 5490 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), 5491 SizeInBytes, /*WantsExt=*/true); 5492 if (ExtendedShl) 5493 return ExtendedShl; 5494 5495 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. 5496 // 5497 // e.g. 5498 // ldr something, [base_reg, ext_reg, sxtw] 5499 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 5500 return None; 5501 5502 // Check if this is an extend. We'll get an extend type if it is. 5503 AArch64_AM::ShiftExtendType Ext = 5504 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); 5505 if (Ext == AArch64_AM::InvalidShiftExtend) 5506 return None; 5507 5508 // Need a 32-bit wide register. 5509 MachineIRBuilder MIB(*PtrAdd); 5510 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), 5511 AArch64::GPR32RegClass, MIB); 5512 unsigned SignExtend = Ext == AArch64_AM::SXTW; 5513 5514 // Base is LHS, offset is ExtReg. 5515 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, 5516 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 5517 [=](MachineInstrBuilder &MIB) { 5518 MIB.addImm(SignExtend); 5519 MIB.addImm(0); 5520 }}}; 5521 } 5522 5523 /// Select a "register plus unscaled signed 9-bit immediate" address. This 5524 /// should only match when there is an offset that is not valid for a scaled 5525 /// immediate addressing mode. The "Size" argument is the size in bytes of the 5526 /// memory reference, which is needed here to know what is valid for a scaled 5527 /// immediate. 5528 InstructionSelector::ComplexRendererFns 5529 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, 5530 unsigned Size) const { 5531 MachineRegisterInfo &MRI = 5532 Root.getParent()->getParent()->getParent()->getRegInfo(); 5533 5534 if (!Root.isReg()) 5535 return None; 5536 5537 if (!isBaseWithConstantOffset(Root, MRI)) 5538 return None; 5539 5540 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 5541 if (!RootDef) 5542 return None; 5543 5544 MachineOperand &OffImm = RootDef->getOperand(2); 5545 if (!OffImm.isReg()) 5546 return None; 5547 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); 5548 if (!RHS || RHS->getOpcode() != TargetOpcode::G_CONSTANT) 5549 return None; 5550 int64_t RHSC; 5551 MachineOperand &RHSOp1 = RHS->getOperand(1); 5552 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) 5553 return None; 5554 RHSC = RHSOp1.getCImm()->getSExtValue(); 5555 5556 // If the offset is valid as a scaled immediate, don't match here. 5557 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) 5558 return None; 5559 if (RHSC >= -256 && RHSC < 256) { 5560 MachineOperand &Base = RootDef->getOperand(1); 5561 return {{ 5562 [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, 5563 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, 5564 }}; 5565 } 5566 return None; 5567 } 5568 5569 InstructionSelector::ComplexRendererFns 5570 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, 5571 unsigned Size, 5572 MachineRegisterInfo &MRI) const { 5573 if (RootDef.getOpcode() != AArch64::G_ADD_LOW) 5574 return None; 5575 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); 5576 if (Adrp.getOpcode() != AArch64::ADRP) 5577 return None; 5578 5579 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. 5580 // TODO: Need to check GV's offset % size if doing offset folding into globals. 5581 assert(Adrp.getOperand(1).getOffset() == 0 && "Unexpected offset in global"); 5582 auto GV = Adrp.getOperand(1).getGlobal(); 5583 if (GV->isThreadLocal()) 5584 return None; 5585 5586 auto &MF = *RootDef.getParent()->getParent(); 5587 if (GV->getPointerAlignment(MF.getDataLayout()) < Size) 5588 return None; 5589 5590 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); 5591 MachineIRBuilder MIRBuilder(RootDef); 5592 Register AdrpReg = Adrp.getOperand(0).getReg(); 5593 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, 5594 [=](MachineInstrBuilder &MIB) { 5595 MIB.addGlobalAddress(GV, /* Offset */ 0, 5596 OpFlags | AArch64II::MO_PAGEOFF | 5597 AArch64II::MO_NC); 5598 }}}; 5599 } 5600 5601 /// Select a "register plus scaled unsigned 12-bit immediate" address. The 5602 /// "Size" argument is the size in bytes of the memory reference, which 5603 /// determines the scale. 5604 InstructionSelector::ComplexRendererFns 5605 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, 5606 unsigned Size) const { 5607 MachineFunction &MF = *Root.getParent()->getParent()->getParent(); 5608 MachineRegisterInfo &MRI = MF.getRegInfo(); 5609 5610 if (!Root.isReg()) 5611 return None; 5612 5613 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 5614 if (!RootDef) 5615 return None; 5616 5617 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 5618 return {{ 5619 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, 5620 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 5621 }}; 5622 } 5623 5624 CodeModel::Model CM = MF.getTarget().getCodeModel(); 5625 // Check if we can fold in the ADD of small code model ADRP + ADD address. 5626 if (CM == CodeModel::Small) { 5627 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI); 5628 if (OpFns) 5629 return OpFns; 5630 } 5631 5632 if (isBaseWithConstantOffset(Root, MRI)) { 5633 MachineOperand &LHS = RootDef->getOperand(1); 5634 MachineOperand &RHS = RootDef->getOperand(2); 5635 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 5636 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 5637 if (LHSDef && RHSDef) { 5638 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); 5639 unsigned Scale = Log2_32(Size); 5640 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 5641 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 5642 return {{ 5643 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, 5644 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 5645 }}; 5646 5647 return {{ 5648 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 5649 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 5650 }}; 5651 } 5652 } 5653 } 5654 5655 // Before falling back to our general case, check if the unscaled 5656 // instructions can handle this. If so, that's preferable. 5657 if (selectAddrModeUnscaled(Root, Size).hasValue()) 5658 return None; 5659 5660 return {{ 5661 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 5662 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 5663 }}; 5664 } 5665 5666 /// Given a shift instruction, return the correct shift type for that 5667 /// instruction. 5668 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { 5669 // TODO: Handle AArch64_AM::ROR 5670 switch (MI.getOpcode()) { 5671 default: 5672 return AArch64_AM::InvalidShiftExtend; 5673 case TargetOpcode::G_SHL: 5674 return AArch64_AM::LSL; 5675 case TargetOpcode::G_LSHR: 5676 return AArch64_AM::LSR; 5677 case TargetOpcode::G_ASHR: 5678 return AArch64_AM::ASR; 5679 } 5680 } 5681 5682 /// Select a "shifted register" operand. If the value is not shifted, set the 5683 /// shift operand to a default value of "lsl 0". 5684 /// 5685 /// TODO: Allow shifted register to be rotated in logical instructions. 5686 InstructionSelector::ComplexRendererFns 5687 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const { 5688 if (!Root.isReg()) 5689 return None; 5690 MachineRegisterInfo &MRI = 5691 Root.getParent()->getParent()->getParent()->getRegInfo(); 5692 5693 // Check if the operand is defined by an instruction which corresponds to 5694 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. 5695 // 5696 // TODO: Handle AArch64_AM::ROR for logical instructions. 5697 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); 5698 if (!ShiftInst) 5699 return None; 5700 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); 5701 if (ShType == AArch64_AM::InvalidShiftExtend) 5702 return None; 5703 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) 5704 return None; 5705 5706 // Need an immediate on the RHS. 5707 MachineOperand &ShiftRHS = ShiftInst->getOperand(2); 5708 auto Immed = getImmedFromMO(ShiftRHS); 5709 if (!Immed) 5710 return None; 5711 5712 // We have something that we can fold. Fold in the shift's LHS and RHS into 5713 // the instruction. 5714 MachineOperand &ShiftLHS = ShiftInst->getOperand(1); 5715 Register ShiftReg = ShiftLHS.getReg(); 5716 5717 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); 5718 unsigned Val = *Immed & (NumBits - 1); 5719 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); 5720 5721 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, 5722 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; 5723 } 5724 5725 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( 5726 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { 5727 unsigned Opc = MI.getOpcode(); 5728 5729 // Handle explicit extend instructions first. 5730 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { 5731 unsigned Size; 5732 if (Opc == TargetOpcode::G_SEXT) 5733 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 5734 else 5735 Size = MI.getOperand(2).getImm(); 5736 assert(Size != 64 && "Extend from 64 bits?"); 5737 switch (Size) { 5738 case 8: 5739 return AArch64_AM::SXTB; 5740 case 16: 5741 return AArch64_AM::SXTH; 5742 case 32: 5743 return AArch64_AM::SXTW; 5744 default: 5745 return AArch64_AM::InvalidShiftExtend; 5746 } 5747 } 5748 5749 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { 5750 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 5751 assert(Size != 64 && "Extend from 64 bits?"); 5752 switch (Size) { 5753 case 8: 5754 return AArch64_AM::UXTB; 5755 case 16: 5756 return AArch64_AM::UXTH; 5757 case 32: 5758 return AArch64_AM::UXTW; 5759 default: 5760 return AArch64_AM::InvalidShiftExtend; 5761 } 5762 } 5763 5764 // Don't have an explicit extend. Try to handle a G_AND with a constant mask 5765 // on the RHS. 5766 if (Opc != TargetOpcode::G_AND) 5767 return AArch64_AM::InvalidShiftExtend; 5768 5769 Optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); 5770 if (!MaybeAndMask) 5771 return AArch64_AM::InvalidShiftExtend; 5772 uint64_t AndMask = *MaybeAndMask; 5773 switch (AndMask) { 5774 default: 5775 return AArch64_AM::InvalidShiftExtend; 5776 case 0xFF: 5777 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 5778 case 0xFFFF: 5779 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 5780 case 0xFFFFFFFF: 5781 return AArch64_AM::UXTW; 5782 } 5783 } 5784 5785 Register AArch64InstructionSelector::moveScalarRegClass( 5786 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { 5787 MachineRegisterInfo &MRI = *MIB.getMRI(); 5788 auto Ty = MRI.getType(Reg); 5789 assert(!Ty.isVector() && "Expected scalars only!"); 5790 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) 5791 return Reg; 5792 5793 // Create a copy and immediately select it. 5794 // FIXME: We should have an emitCopy function? 5795 auto Copy = MIB.buildCopy({&RC}, {Reg}); 5796 selectCopy(*Copy, TII, MRI, TRI, RBI); 5797 return Copy.getReg(0); 5798 } 5799 5800 /// Select an "extended register" operand. This operand folds in an extend 5801 /// followed by an optional left shift. 5802 InstructionSelector::ComplexRendererFns 5803 AArch64InstructionSelector::selectArithExtendedRegister( 5804 MachineOperand &Root) const { 5805 if (!Root.isReg()) 5806 return None; 5807 MachineRegisterInfo &MRI = 5808 Root.getParent()->getParent()->getParent()->getRegInfo(); 5809 5810 uint64_t ShiftVal = 0; 5811 Register ExtReg; 5812 AArch64_AM::ShiftExtendType Ext; 5813 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); 5814 if (!RootDef) 5815 return None; 5816 5817 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) 5818 return None; 5819 5820 // Check if we can fold a shift and an extend. 5821 if (RootDef->getOpcode() == TargetOpcode::G_SHL) { 5822 // Look for a constant on the RHS of the shift. 5823 MachineOperand &RHS = RootDef->getOperand(2); 5824 Optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); 5825 if (!MaybeShiftVal) 5826 return None; 5827 ShiftVal = *MaybeShiftVal; 5828 if (ShiftVal > 4) 5829 return None; 5830 // Look for a valid extend instruction on the LHS of the shift. 5831 MachineOperand &LHS = RootDef->getOperand(1); 5832 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); 5833 if (!ExtDef) 5834 return None; 5835 Ext = getExtendTypeForInst(*ExtDef, MRI); 5836 if (Ext == AArch64_AM::InvalidShiftExtend) 5837 return None; 5838 ExtReg = ExtDef->getOperand(1).getReg(); 5839 } else { 5840 // Didn't get a shift. Try just folding an extend. 5841 Ext = getExtendTypeForInst(*RootDef, MRI); 5842 if (Ext == AArch64_AM::InvalidShiftExtend) 5843 return None; 5844 ExtReg = RootDef->getOperand(1).getReg(); 5845 5846 // If we have a 32 bit instruction which zeroes out the high half of a 5847 // register, we get an implicit zero extend for free. Check if we have one. 5848 // FIXME: We actually emit the extend right now even though we don't have 5849 // to. 5850 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { 5851 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); 5852 if (ExtInst && isDef32(*ExtInst)) 5853 return None; 5854 } 5855 } 5856 5857 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister 5858 // copy. 5859 MachineIRBuilder MIB(*RootDef); 5860 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); 5861 5862 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 5863 [=](MachineInstrBuilder &MIB) { 5864 MIB.addImm(getArithExtendImm(Ext, ShiftVal)); 5865 }}}; 5866 } 5867 5868 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, 5869 const MachineInstr &MI, 5870 int OpIdx) const { 5871 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 5872 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5873 "Expected G_CONSTANT"); 5874 Optional<int64_t> CstVal = 5875 getConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); 5876 assert(CstVal && "Expected constant value"); 5877 MIB.addImm(CstVal.getValue()); 5878 } 5879 5880 void AArch64InstructionSelector::renderLogicalImm32( 5881 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 5882 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5883 "Expected G_CONSTANT"); 5884 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 5885 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); 5886 MIB.addImm(Enc); 5887 } 5888 5889 void AArch64InstructionSelector::renderLogicalImm64( 5890 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 5891 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 5892 "Expected G_CONSTANT"); 5893 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 5894 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); 5895 MIB.addImm(Enc); 5896 } 5897 5898 bool AArch64InstructionSelector::isLoadStoreOfNumBytes( 5899 const MachineInstr &MI, unsigned NumBytes) const { 5900 if (!MI.mayLoadOrStore()) 5901 return false; 5902 assert(MI.hasOneMemOperand() && 5903 "Expected load/store to have only one mem op!"); 5904 return (*MI.memoperands_begin())->getSize() == NumBytes; 5905 } 5906 5907 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { 5908 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 5909 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) 5910 return false; 5911 5912 // Only return true if we know the operation will zero-out the high half of 5913 // the 64-bit register. Truncates can be subregister copies, which don't 5914 // zero out the high bits. Copies and other copy-like instructions can be 5915 // fed by truncates, or could be lowered as subregister copies. 5916 switch (MI.getOpcode()) { 5917 default: 5918 return true; 5919 case TargetOpcode::COPY: 5920 case TargetOpcode::G_BITCAST: 5921 case TargetOpcode::G_TRUNC: 5922 case TargetOpcode::G_PHI: 5923 return false; 5924 } 5925 } 5926 5927 5928 // Perform fixups on the given PHI instruction's operands to force them all 5929 // to be the same as the destination regbank. 5930 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, 5931 const AArch64RegisterBankInfo &RBI) { 5932 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI"); 5933 Register DstReg = MI.getOperand(0).getReg(); 5934 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg); 5935 assert(DstRB && "Expected PHI dst to have regbank assigned"); 5936 MachineIRBuilder MIB(MI); 5937 5938 // Go through each operand and ensure it has the same regbank. 5939 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) { 5940 MachineOperand &MO = MI.getOperand(OpIdx); 5941 if (!MO.isReg()) 5942 continue; 5943 Register OpReg = MO.getReg(); 5944 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg); 5945 if (RB != DstRB) { 5946 // Insert a cross-bank copy. 5947 auto *OpDef = MRI.getVRegDef(OpReg); 5948 const LLT &Ty = MRI.getType(OpReg); 5949 MIB.setInsertPt(*OpDef->getParent(), std::next(OpDef->getIterator())); 5950 auto Copy = MIB.buildCopy(Ty, OpReg); 5951 MRI.setRegBank(Copy.getReg(0), *DstRB); 5952 MO.setReg(Copy.getReg(0)); 5953 } 5954 } 5955 } 5956 5957 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { 5958 // We're looking for PHIs, build a list so we don't invalidate iterators. 5959 MachineRegisterInfo &MRI = MF.getRegInfo(); 5960 SmallVector<MachineInstr *, 32> Phis; 5961 for (auto &BB : MF) { 5962 for (auto &MI : BB) { 5963 if (MI.getOpcode() == TargetOpcode::G_PHI) 5964 Phis.emplace_back(&MI); 5965 } 5966 } 5967 5968 for (auto *MI : Phis) { 5969 // We need to do some work here if the operand types are < 16 bit and they 5970 // are split across fpr/gpr banks. Since all types <32b on gpr 5971 // end up being assigned gpr32 regclasses, we can end up with PHIs here 5972 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't 5973 // be selecting heterogenous regbanks for operands if possible, but we 5974 // still need to be able to deal with it here. 5975 // 5976 // To fix this, if we have a gpr-bank operand < 32b in size and at least 5977 // one other operand is on the fpr bank, then we add cross-bank copies 5978 // to homogenize the operand banks. For simplicity the bank that we choose 5979 // to settle on is whatever bank the def operand has. For example: 5980 // 5981 // %endbb: 5982 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 5983 // => 5984 // %bb2: 5985 // ... 5986 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) 5987 // ... 5988 // %endbb: 5989 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 5990 bool HasGPROp = false, HasFPROp = false; 5991 for (unsigned OpIdx = 1; OpIdx < MI->getNumOperands(); ++OpIdx) { 5992 const auto &MO = MI->getOperand(OpIdx); 5993 if (!MO.isReg()) 5994 continue; 5995 const LLT &Ty = MRI.getType(MO.getReg()); 5996 if (!Ty.isValid() || !Ty.isScalar()) 5997 break; 5998 if (Ty.getSizeInBits() >= 32) 5999 break; 6000 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); 6001 // If for some reason we don't have a regbank yet. Don't try anything. 6002 if (!RB) 6003 break; 6004 6005 if (RB->getID() == AArch64::GPRRegBankID) 6006 HasGPROp = true; 6007 else 6008 HasFPROp = true; 6009 } 6010 // We have heterogenous regbanks, need to fixup. 6011 if (HasGPROp && HasFPROp) 6012 fixupPHIOpBanks(*MI, MRI, RBI); 6013 } 6014 } 6015 6016 namespace llvm { 6017 InstructionSelector * 6018 createAArch64InstructionSelector(const AArch64TargetMachine &TM, 6019 AArch64Subtarget &Subtarget, 6020 AArch64RegisterBankInfo &RBI) { 6021 return new AArch64InstructionSelector(TM, Subtarget, RBI); 6022 } 6023 } 6024