1 //===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// AArch64. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64GlobalISelUtils.h" 15 #include "AArch64InstrInfo.h" 16 #include "AArch64MachineFunctionInfo.h" 17 #include "AArch64RegisterBankInfo.h" 18 #include "AArch64RegisterInfo.h" 19 #include "AArch64Subtarget.h" 20 #include "AArch64TargetMachine.h" 21 #include "MCTargetDesc/AArch64AddressingModes.h" 22 #include "MCTargetDesc/AArch64MCTargetDesc.h" 23 #include "llvm/BinaryFormat/Dwarf.h" 24 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 25 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 26 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 27 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 28 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 29 #include "llvm/CodeGen/GlobalISel/Utils.h" 30 #include "llvm/CodeGen/MachineBasicBlock.h" 31 #include "llvm/CodeGen/MachineConstantPool.h" 32 #include "llvm/CodeGen/MachineFrameInfo.h" 33 #include "llvm/CodeGen/MachineFunction.h" 34 #include "llvm/CodeGen/MachineInstr.h" 35 #include "llvm/CodeGen/MachineInstrBuilder.h" 36 #include "llvm/CodeGen/MachineMemOperand.h" 37 #include "llvm/CodeGen/MachineOperand.h" 38 #include "llvm/CodeGen/MachineRegisterInfo.h" 39 #include "llvm/CodeGen/TargetOpcodes.h" 40 #include "llvm/IR/Constants.h" 41 #include "llvm/IR/DerivedTypes.h" 42 #include "llvm/IR/Instructions.h" 43 #include "llvm/IR/IntrinsicsAArch64.h" 44 #include "llvm/IR/PatternMatch.h" 45 #include "llvm/IR/Type.h" 46 #include "llvm/Pass.h" 47 #include "llvm/Support/Debug.h" 48 #include "llvm/Support/raw_ostream.h" 49 #include <optional> 50 51 #define DEBUG_TYPE "aarch64-isel" 52 53 using namespace llvm; 54 using namespace MIPatternMatch; 55 using namespace AArch64GISelUtils; 56 57 namespace llvm { 58 class BlockFrequencyInfo; 59 class ProfileSummaryInfo; 60 } 61 62 namespace { 63 64 #define GET_GLOBALISEL_PREDICATE_BITSET 65 #include "AArch64GenGlobalISel.inc" 66 #undef GET_GLOBALISEL_PREDICATE_BITSET 67 68 69 class AArch64InstructionSelector : public InstructionSelector { 70 public: 71 AArch64InstructionSelector(const AArch64TargetMachine &TM, 72 const AArch64Subtarget &STI, 73 const AArch64RegisterBankInfo &RBI); 74 75 bool select(MachineInstr &I) override; 76 static const char *getName() { return DEBUG_TYPE; } 77 78 void setupMF(MachineFunction &MF, GISelKnownBits *KB, 79 CodeGenCoverage *CoverageInfo, ProfileSummaryInfo *PSI, 80 BlockFrequencyInfo *BFI) override { 81 InstructionSelector::setupMF(MF, KB, CoverageInfo, PSI, BFI); 82 MIB.setMF(MF); 83 84 // hasFnAttribute() is expensive to call on every BRCOND selection, so 85 // cache it here for each run of the selector. 86 ProduceNonFlagSettingCondBr = 87 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening); 88 MFReturnAddr = Register(); 89 90 processPHIs(MF); 91 } 92 93 private: 94 /// tblgen-erated 'select' implementation, used as the initial selector for 95 /// the patterns that don't require complex C++. 96 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 97 98 // A lowering phase that runs before any selection attempts. 99 // Returns true if the instruction was modified. 100 bool preISelLower(MachineInstr &I); 101 102 // An early selection function that runs before the selectImpl() call. 103 bool earlySelect(MachineInstr &I); 104 105 // Do some preprocessing of G_PHIs before we begin selection. 106 void processPHIs(MachineFunction &MF); 107 108 bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI); 109 110 /// Eliminate same-sized cross-bank copies into stores before selectImpl(). 111 bool contractCrossBankCopyIntoStore(MachineInstr &I, 112 MachineRegisterInfo &MRI); 113 114 bool convertPtrAddToAdd(MachineInstr &I, MachineRegisterInfo &MRI); 115 116 bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF, 117 MachineRegisterInfo &MRI) const; 118 bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF, 119 MachineRegisterInfo &MRI) const; 120 121 ///@{ 122 /// Helper functions for selectCompareBranch. 123 bool selectCompareBranchFedByFCmp(MachineInstr &I, MachineInstr &FCmp, 124 MachineIRBuilder &MIB) const; 125 bool selectCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 126 MachineIRBuilder &MIB) const; 127 bool tryOptCompareBranchFedByICmp(MachineInstr &I, MachineInstr &ICmp, 128 MachineIRBuilder &MIB) const; 129 bool tryOptAndIntoCompareBranch(MachineInstr &AndInst, bool Invert, 130 MachineBasicBlock *DstMBB, 131 MachineIRBuilder &MIB) const; 132 ///@} 133 134 bool selectCompareBranch(MachineInstr &I, MachineFunction &MF, 135 MachineRegisterInfo &MRI); 136 137 bool selectVectorAshrLshr(MachineInstr &I, MachineRegisterInfo &MRI); 138 bool selectVectorSHL(MachineInstr &I, MachineRegisterInfo &MRI); 139 140 // Helper to generate an equivalent of scalar_to_vector into a new register, 141 // returned via 'Dst'. 142 MachineInstr *emitScalarToVector(unsigned EltSize, 143 const TargetRegisterClass *DstRC, 144 Register Scalar, 145 MachineIRBuilder &MIRBuilder) const; 146 147 /// Emit a lane insert into \p DstReg, or a new vector register if 148 /// std::nullopt is provided. 149 /// 150 /// The lane inserted into is defined by \p LaneIdx. The vector source 151 /// register is given by \p SrcReg. The register containing the element is 152 /// given by \p EltReg. 153 MachineInstr *emitLaneInsert(std::optional<Register> DstReg, Register SrcReg, 154 Register EltReg, unsigned LaneIdx, 155 const RegisterBank &RB, 156 MachineIRBuilder &MIRBuilder) const; 157 158 /// Emit a sequence of instructions representing a constant \p CV for a 159 /// vector register \p Dst. (E.g. a MOV, or a load from a constant pool.) 160 /// 161 /// \returns the last instruction in the sequence on success, and nullptr 162 /// otherwise. 163 MachineInstr *emitConstantVector(Register Dst, Constant *CV, 164 MachineIRBuilder &MIRBuilder, 165 MachineRegisterInfo &MRI); 166 167 bool selectInsertElt(MachineInstr &I, MachineRegisterInfo &MRI); 168 bool tryOptConstantBuildVec(MachineInstr &MI, LLT DstTy, 169 MachineRegisterInfo &MRI); 170 /// \returns true if a G_BUILD_VECTOR instruction \p MI can be selected as a 171 /// SUBREG_TO_REG. 172 bool tryOptBuildVecToSubregToReg(MachineInstr &MI, MachineRegisterInfo &MRI); 173 bool selectBuildVector(MachineInstr &I, MachineRegisterInfo &MRI); 174 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI); 175 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI); 176 177 bool selectShuffleVector(MachineInstr &I, MachineRegisterInfo &MRI); 178 bool selectExtractElt(MachineInstr &I, MachineRegisterInfo &MRI); 179 bool selectConcatVectors(MachineInstr &I, MachineRegisterInfo &MRI); 180 bool selectSplitVectorUnmerge(MachineInstr &I, MachineRegisterInfo &MRI); 181 182 /// Helper function to select vector load intrinsics like 183 /// @llvm.aarch64.neon.ld2.*, @llvm.aarch64.neon.ld4.*, etc. 184 /// \p Opc is the opcode that the selected instruction should use. 185 /// \p NumVecs is the number of vector destinations for the instruction. 186 /// \p I is the original G_INTRINSIC_W_SIDE_EFFECTS instruction. 187 bool selectVectorLoadIntrinsic(unsigned Opc, unsigned NumVecs, 188 MachineInstr &I); 189 bool selectIntrinsicWithSideEffects(MachineInstr &I, 190 MachineRegisterInfo &MRI); 191 bool selectIntrinsic(MachineInstr &I, MachineRegisterInfo &MRI); 192 bool selectVectorICmp(MachineInstr &I, MachineRegisterInfo &MRI); 193 bool selectIntrinsicTrunc(MachineInstr &I, MachineRegisterInfo &MRI) const; 194 bool selectIntrinsicRound(MachineInstr &I, MachineRegisterInfo &MRI) const; 195 bool selectJumpTable(MachineInstr &I, MachineRegisterInfo &MRI); 196 bool selectBrJT(MachineInstr &I, MachineRegisterInfo &MRI); 197 bool selectTLSGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI); 198 bool selectReduction(MachineInstr &I, MachineRegisterInfo &MRI); 199 bool selectMOPS(MachineInstr &I, MachineRegisterInfo &MRI); 200 bool selectUSMovFromExtend(MachineInstr &I, MachineRegisterInfo &MRI); 201 202 unsigned emitConstantPoolEntry(const Constant *CPVal, 203 MachineFunction &MF) const; 204 MachineInstr *emitLoadFromConstantPool(const Constant *CPVal, 205 MachineIRBuilder &MIRBuilder) const; 206 207 // Emit a vector concat operation. 208 MachineInstr *emitVectorConcat(std::optional<Register> Dst, Register Op1, 209 Register Op2, 210 MachineIRBuilder &MIRBuilder) const; 211 212 // Emit an integer compare between LHS and RHS, which checks for Predicate. 213 MachineInstr *emitIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 214 MachineOperand &Predicate, 215 MachineIRBuilder &MIRBuilder) const; 216 217 /// Emit a floating point comparison between \p LHS and \p RHS. 218 /// \p Pred if given is the intended predicate to use. 219 MachineInstr * 220 emitFPCompare(Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, 221 std::optional<CmpInst::Predicate> = std::nullopt) const; 222 223 MachineInstr * 224 emitInstr(unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, 225 std::initializer_list<llvm::SrcOp> SrcOps, 226 MachineIRBuilder &MIRBuilder, 227 const ComplexRendererFns &RenderFns = std::nullopt) const; 228 /// Helper function to emit an add or sub instruction. 229 /// 230 /// \p AddrModeAndSizeToOpcode must contain each of the opcode variants above 231 /// in a specific order. 232 /// 233 /// Below is an example of the expected input to \p AddrModeAndSizeToOpcode. 234 /// 235 /// \code 236 /// const std::array<std::array<unsigned, 2>, 4> Table { 237 /// {{AArch64::ADDXri, AArch64::ADDWri}, 238 /// {AArch64::ADDXrs, AArch64::ADDWrs}, 239 /// {AArch64::ADDXrr, AArch64::ADDWrr}, 240 /// {AArch64::SUBXri, AArch64::SUBWri}, 241 /// {AArch64::ADDXrx, AArch64::ADDWrx}}}; 242 /// \endcode 243 /// 244 /// Each row in the table corresponds to a different addressing mode. Each 245 /// column corresponds to a different register size. 246 /// 247 /// \attention Rows must be structured as follows: 248 /// - Row 0: The ri opcode variants 249 /// - Row 1: The rs opcode variants 250 /// - Row 2: The rr opcode variants 251 /// - Row 3: The ri opcode variants for negative immediates 252 /// - Row 4: The rx opcode variants 253 /// 254 /// \attention Columns must be structured as follows: 255 /// - Column 0: The 64-bit opcode variants 256 /// - Column 1: The 32-bit opcode variants 257 /// 258 /// \p Dst is the destination register of the binop to emit. 259 /// \p LHS is the left-hand operand of the binop to emit. 260 /// \p RHS is the right-hand operand of the binop to emit. 261 MachineInstr *emitAddSub( 262 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 263 Register Dst, MachineOperand &LHS, MachineOperand &RHS, 264 MachineIRBuilder &MIRBuilder) const; 265 MachineInstr *emitADD(Register DefReg, MachineOperand &LHS, 266 MachineOperand &RHS, 267 MachineIRBuilder &MIRBuilder) const; 268 MachineInstr *emitADDS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 269 MachineIRBuilder &MIRBuilder) const; 270 MachineInstr *emitSUBS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 271 MachineIRBuilder &MIRBuilder) const; 272 MachineInstr *emitADCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 273 MachineIRBuilder &MIRBuilder) const; 274 MachineInstr *emitSBCS(Register Dst, MachineOperand &LHS, MachineOperand &RHS, 275 MachineIRBuilder &MIRBuilder) const; 276 MachineInstr *emitCMN(MachineOperand &LHS, MachineOperand &RHS, 277 MachineIRBuilder &MIRBuilder) const; 278 MachineInstr *emitTST(MachineOperand &LHS, MachineOperand &RHS, 279 MachineIRBuilder &MIRBuilder) const; 280 MachineInstr *emitSelect(Register Dst, Register LHS, Register RHS, 281 AArch64CC::CondCode CC, 282 MachineIRBuilder &MIRBuilder) const; 283 MachineInstr *emitExtractVectorElt(std::optional<Register> DstReg, 284 const RegisterBank &DstRB, LLT ScalarTy, 285 Register VecReg, unsigned LaneIdx, 286 MachineIRBuilder &MIRBuilder) const; 287 MachineInstr *emitCSINC(Register Dst, Register Src1, Register Src2, 288 AArch64CC::CondCode Pred, 289 MachineIRBuilder &MIRBuilder) const; 290 /// Emit a CSet for a FP compare. 291 /// 292 /// \p Dst is expected to be a 32-bit scalar register. 293 MachineInstr *emitCSetForFCmp(Register Dst, CmpInst::Predicate Pred, 294 MachineIRBuilder &MIRBuilder) const; 295 296 /// Emit an instruction that sets NZCV to the carry-in expected by \p I. 297 /// Might elide the instruction if the previous instruction already sets NZCV 298 /// correctly. 299 MachineInstr *emitCarryIn(MachineInstr &I, Register CarryReg); 300 301 /// Emit the overflow op for \p Opcode. 302 /// 303 /// \p Opcode is expected to be an overflow op's opcode, e.g. G_UADDO, 304 /// G_USUBO, etc. 305 std::pair<MachineInstr *, AArch64CC::CondCode> 306 emitOverflowOp(unsigned Opcode, Register Dst, MachineOperand &LHS, 307 MachineOperand &RHS, MachineIRBuilder &MIRBuilder) const; 308 309 bool selectOverflowOp(MachineInstr &I, MachineRegisterInfo &MRI); 310 311 /// Emit expression as a conjunction (a series of CCMP/CFCMP ops). 312 /// In some cases this is even possible with OR operations in the expression. 313 MachineInstr *emitConjunction(Register Val, AArch64CC::CondCode &OutCC, 314 MachineIRBuilder &MIB) const; 315 MachineInstr *emitConditionalComparison(Register LHS, Register RHS, 316 CmpInst::Predicate CC, 317 AArch64CC::CondCode Predicate, 318 AArch64CC::CondCode OutCC, 319 MachineIRBuilder &MIB) const; 320 MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC, 321 bool Negate, Register CCOp, 322 AArch64CC::CondCode Predicate, 323 MachineIRBuilder &MIB) const; 324 325 /// Emit a TB(N)Z instruction which tests \p Bit in \p TestReg. 326 /// \p IsNegative is true if the test should be "not zero". 327 /// This will also optimize the test bit instruction when possible. 328 MachineInstr *emitTestBit(Register TestReg, uint64_t Bit, bool IsNegative, 329 MachineBasicBlock *DstMBB, 330 MachineIRBuilder &MIB) const; 331 332 /// Emit a CB(N)Z instruction which branches to \p DestMBB. 333 MachineInstr *emitCBZ(Register CompareReg, bool IsNegative, 334 MachineBasicBlock *DestMBB, 335 MachineIRBuilder &MIB) const; 336 337 // Equivalent to the i32shift_a and friends from AArch64InstrInfo.td. 338 // We use these manually instead of using the importer since it doesn't 339 // support SDNodeXForm. 340 ComplexRendererFns selectShiftA_32(const MachineOperand &Root) const; 341 ComplexRendererFns selectShiftB_32(const MachineOperand &Root) const; 342 ComplexRendererFns selectShiftA_64(const MachineOperand &Root) const; 343 ComplexRendererFns selectShiftB_64(const MachineOperand &Root) const; 344 345 ComplexRendererFns select12BitValueWithLeftShift(uint64_t Immed) const; 346 ComplexRendererFns selectArithImmed(MachineOperand &Root) const; 347 ComplexRendererFns selectNegArithImmed(MachineOperand &Root) const; 348 349 ComplexRendererFns selectAddrModeUnscaled(MachineOperand &Root, 350 unsigned Size) const; 351 352 ComplexRendererFns selectAddrModeUnscaled8(MachineOperand &Root) const { 353 return selectAddrModeUnscaled(Root, 1); 354 } 355 ComplexRendererFns selectAddrModeUnscaled16(MachineOperand &Root) const { 356 return selectAddrModeUnscaled(Root, 2); 357 } 358 ComplexRendererFns selectAddrModeUnscaled32(MachineOperand &Root) const { 359 return selectAddrModeUnscaled(Root, 4); 360 } 361 ComplexRendererFns selectAddrModeUnscaled64(MachineOperand &Root) const { 362 return selectAddrModeUnscaled(Root, 8); 363 } 364 ComplexRendererFns selectAddrModeUnscaled128(MachineOperand &Root) const { 365 return selectAddrModeUnscaled(Root, 16); 366 } 367 368 /// Helper to try to fold in a GISEL_ADD_LOW into an immediate, to be used 369 /// from complex pattern matchers like selectAddrModeIndexed(). 370 ComplexRendererFns tryFoldAddLowIntoImm(MachineInstr &RootDef, unsigned Size, 371 MachineRegisterInfo &MRI) const; 372 373 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root, 374 unsigned Size) const; 375 template <int Width> 376 ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const { 377 return selectAddrModeIndexed(Root, Width / 8); 378 } 379 380 bool isWorthFoldingIntoExtendedReg(MachineInstr &MI, 381 const MachineRegisterInfo &MRI) const; 382 ComplexRendererFns 383 selectAddrModeShiftedExtendXReg(MachineOperand &Root, 384 unsigned SizeInBytes) const; 385 386 /// Returns a \p ComplexRendererFns which contains a base, offset, and whether 387 /// or not a shift + extend should be folded into an addressing mode. Returns 388 /// None when this is not profitable or possible. 389 ComplexRendererFns 390 selectExtendedSHL(MachineOperand &Root, MachineOperand &Base, 391 MachineOperand &Offset, unsigned SizeInBytes, 392 bool WantsExt) const; 393 ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const; 394 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root, 395 unsigned SizeInBytes) const; 396 template <int Width> 397 ComplexRendererFns selectAddrModeXRO(MachineOperand &Root) const { 398 return selectAddrModeXRO(Root, Width / 8); 399 } 400 401 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root, 402 unsigned SizeInBytes) const; 403 template <int Width> 404 ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const { 405 return selectAddrModeWRO(Root, Width / 8); 406 } 407 408 ComplexRendererFns selectShiftedRegister(MachineOperand &Root, 409 bool AllowROR = false) const; 410 411 ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const { 412 return selectShiftedRegister(Root); 413 } 414 415 ComplexRendererFns selectLogicalShiftedRegister(MachineOperand &Root) const { 416 return selectShiftedRegister(Root, true); 417 } 418 419 /// Given an extend instruction, determine the correct shift-extend type for 420 /// that instruction. 421 /// 422 /// If the instruction is going to be used in a load or store, pass 423 /// \p IsLoadStore = true. 424 AArch64_AM::ShiftExtendType 425 getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI, 426 bool IsLoadStore = false) const; 427 428 /// Move \p Reg to \p RC if \p Reg is not already on \p RC. 429 /// 430 /// \returns Either \p Reg if no change was necessary, or the new register 431 /// created by moving \p Reg. 432 /// 433 /// Note: This uses emitCopy right now. 434 Register moveScalarRegClass(Register Reg, const TargetRegisterClass &RC, 435 MachineIRBuilder &MIB) const; 436 437 ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const; 438 439 ComplexRendererFns selectExtractHigh(MachineOperand &Root) const; 440 441 void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI, 442 int OpIdx = -1) const; 443 void renderLogicalImm32(MachineInstrBuilder &MIB, const MachineInstr &I, 444 int OpIdx = -1) const; 445 void renderLogicalImm64(MachineInstrBuilder &MIB, const MachineInstr &I, 446 int OpIdx = -1) const; 447 void renderFPImm16(MachineInstrBuilder &MIB, const MachineInstr &MI, 448 int OpIdx = -1) const; 449 void renderFPImm32(MachineInstrBuilder &MIB, const MachineInstr &MI, 450 int OpIdx = -1) const; 451 void renderFPImm64(MachineInstrBuilder &MIB, const MachineInstr &MI, 452 int OpIdx = -1) const; 453 void renderFPImm32SIMDModImmType4(MachineInstrBuilder &MIB, 454 const MachineInstr &MI, 455 int OpIdx = -1) const; 456 457 // Materialize a GlobalValue or BlockAddress using a movz+movk sequence. 458 void materializeLargeCMVal(MachineInstr &I, const Value *V, unsigned OpFlags); 459 460 // Optimization methods. 461 bool tryOptSelect(GSelect &Sel); 462 bool tryOptSelectConjunction(GSelect &Sel, MachineInstr &CondMI); 463 MachineInstr *tryFoldIntegerCompare(MachineOperand &LHS, MachineOperand &RHS, 464 MachineOperand &Predicate, 465 MachineIRBuilder &MIRBuilder) const; 466 467 /// Return true if \p MI is a load or store of \p NumBytes bytes. 468 bool isLoadStoreOfNumBytes(const MachineInstr &MI, unsigned NumBytes) const; 469 470 /// Returns true if \p MI is guaranteed to have the high-half of a 64-bit 471 /// register zeroed out. In other words, the result of MI has been explicitly 472 /// zero extended. 473 bool isDef32(const MachineInstr &MI) const; 474 475 const AArch64TargetMachine &TM; 476 const AArch64Subtarget &STI; 477 const AArch64InstrInfo &TII; 478 const AArch64RegisterInfo &TRI; 479 const AArch64RegisterBankInfo &RBI; 480 481 bool ProduceNonFlagSettingCondBr = false; 482 483 // Some cached values used during selection. 484 // We use LR as a live-in register, and we keep track of it here as it can be 485 // clobbered by calls. 486 Register MFReturnAddr; 487 488 MachineIRBuilder MIB; 489 490 #define GET_GLOBALISEL_PREDICATES_DECL 491 #include "AArch64GenGlobalISel.inc" 492 #undef GET_GLOBALISEL_PREDICATES_DECL 493 494 // We declare the temporaries used by selectImpl() in the class to minimize the 495 // cost of constructing placeholder values. 496 #define GET_GLOBALISEL_TEMPORARIES_DECL 497 #include "AArch64GenGlobalISel.inc" 498 #undef GET_GLOBALISEL_TEMPORARIES_DECL 499 }; 500 501 } // end anonymous namespace 502 503 #define GET_GLOBALISEL_IMPL 504 #include "AArch64GenGlobalISel.inc" 505 #undef GET_GLOBALISEL_IMPL 506 507 AArch64InstructionSelector::AArch64InstructionSelector( 508 const AArch64TargetMachine &TM, const AArch64Subtarget &STI, 509 const AArch64RegisterBankInfo &RBI) 510 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), 511 RBI(RBI), 512 #define GET_GLOBALISEL_PREDICATES_INIT 513 #include "AArch64GenGlobalISel.inc" 514 #undef GET_GLOBALISEL_PREDICATES_INIT 515 #define GET_GLOBALISEL_TEMPORARIES_INIT 516 #include "AArch64GenGlobalISel.inc" 517 #undef GET_GLOBALISEL_TEMPORARIES_INIT 518 { 519 } 520 521 // FIXME: This should be target-independent, inferred from the types declared 522 // for each class in the bank. 523 // 524 /// Given a register bank, and a type, return the smallest register class that 525 /// can represent that combination. 526 static const TargetRegisterClass * 527 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB, 528 bool GetAllRegSet = false) { 529 if (RB.getID() == AArch64::GPRRegBankID) { 530 if (Ty.getSizeInBits() <= 32) 531 return GetAllRegSet ? &AArch64::GPR32allRegClass 532 : &AArch64::GPR32RegClass; 533 if (Ty.getSizeInBits() == 64) 534 return GetAllRegSet ? &AArch64::GPR64allRegClass 535 : &AArch64::GPR64RegClass; 536 if (Ty.getSizeInBits() == 128) 537 return &AArch64::XSeqPairsClassRegClass; 538 return nullptr; 539 } 540 541 if (RB.getID() == AArch64::FPRRegBankID) { 542 switch (Ty.getSizeInBits()) { 543 case 8: 544 return &AArch64::FPR8RegClass; 545 case 16: 546 return &AArch64::FPR16RegClass; 547 case 32: 548 return &AArch64::FPR32RegClass; 549 case 64: 550 return &AArch64::FPR64RegClass; 551 case 128: 552 return &AArch64::FPR128RegClass; 553 } 554 return nullptr; 555 } 556 557 return nullptr; 558 } 559 560 /// Given a register bank, and size in bits, return the smallest register class 561 /// that can represent that combination. 562 static const TargetRegisterClass * 563 getMinClassForRegBank(const RegisterBank &RB, unsigned SizeInBits, 564 bool GetAllRegSet = false) { 565 unsigned RegBankID = RB.getID(); 566 567 if (RegBankID == AArch64::GPRRegBankID) { 568 if (SizeInBits <= 32) 569 return GetAllRegSet ? &AArch64::GPR32allRegClass 570 : &AArch64::GPR32RegClass; 571 if (SizeInBits == 64) 572 return GetAllRegSet ? &AArch64::GPR64allRegClass 573 : &AArch64::GPR64RegClass; 574 if (SizeInBits == 128) 575 return &AArch64::XSeqPairsClassRegClass; 576 } 577 578 if (RegBankID == AArch64::FPRRegBankID) { 579 switch (SizeInBits) { 580 default: 581 return nullptr; 582 case 8: 583 return &AArch64::FPR8RegClass; 584 case 16: 585 return &AArch64::FPR16RegClass; 586 case 32: 587 return &AArch64::FPR32RegClass; 588 case 64: 589 return &AArch64::FPR64RegClass; 590 case 128: 591 return &AArch64::FPR128RegClass; 592 } 593 } 594 595 return nullptr; 596 } 597 598 /// Returns the correct subregister to use for a given register class. 599 static bool getSubRegForClass(const TargetRegisterClass *RC, 600 const TargetRegisterInfo &TRI, unsigned &SubReg) { 601 switch (TRI.getRegSizeInBits(*RC)) { 602 case 8: 603 SubReg = AArch64::bsub; 604 break; 605 case 16: 606 SubReg = AArch64::hsub; 607 break; 608 case 32: 609 if (RC != &AArch64::FPR32RegClass) 610 SubReg = AArch64::sub_32; 611 else 612 SubReg = AArch64::ssub; 613 break; 614 case 64: 615 SubReg = AArch64::dsub; 616 break; 617 default: 618 LLVM_DEBUG( 619 dbgs() << "Couldn't find appropriate subregister for register class."); 620 return false; 621 } 622 623 return true; 624 } 625 626 /// Returns the minimum size the given register bank can hold. 627 static unsigned getMinSizeForRegBank(const RegisterBank &RB) { 628 switch (RB.getID()) { 629 case AArch64::GPRRegBankID: 630 return 32; 631 case AArch64::FPRRegBankID: 632 return 8; 633 default: 634 llvm_unreachable("Tried to get minimum size for unknown register bank."); 635 } 636 } 637 638 /// Create a REG_SEQUENCE instruction using the registers in \p Regs. 639 /// Helper function for functions like createDTuple and createQTuple. 640 /// 641 /// \p RegClassIDs - The list of register class IDs available for some tuple of 642 /// a scalar class. E.g. QQRegClassID, QQQRegClassID, QQQQRegClassID. This is 643 /// expected to contain between 2 and 4 tuple classes. 644 /// 645 /// \p SubRegs - The list of subregister classes associated with each register 646 /// class ID in \p RegClassIDs. E.g., QQRegClassID should use the qsub0 647 /// subregister class. The index of each subregister class is expected to 648 /// correspond with the index of each register class. 649 /// 650 /// \returns Either the destination register of REG_SEQUENCE instruction that 651 /// was created, or the 0th element of \p Regs if \p Regs contains a single 652 /// element. 653 static Register createTuple(ArrayRef<Register> Regs, 654 const unsigned RegClassIDs[], 655 const unsigned SubRegs[], MachineIRBuilder &MIB) { 656 unsigned NumRegs = Regs.size(); 657 if (NumRegs == 1) 658 return Regs[0]; 659 assert(NumRegs >= 2 && NumRegs <= 4 && 660 "Only support between two and 4 registers in a tuple!"); 661 const TargetRegisterInfo *TRI = MIB.getMF().getSubtarget().getRegisterInfo(); 662 auto *DesiredClass = TRI->getRegClass(RegClassIDs[NumRegs - 2]); 663 auto RegSequence = 664 MIB.buildInstr(TargetOpcode::REG_SEQUENCE, {DesiredClass}, {}); 665 for (unsigned I = 0, E = Regs.size(); I < E; ++I) { 666 RegSequence.addUse(Regs[I]); 667 RegSequence.addImm(SubRegs[I]); 668 } 669 return RegSequence.getReg(0); 670 } 671 672 /// Create a tuple of D-registers using the registers in \p Regs. 673 static Register createDTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { 674 static const unsigned RegClassIDs[] = { 675 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 676 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 677 AArch64::dsub2, AArch64::dsub3}; 678 return createTuple(Regs, RegClassIDs, SubRegs, MIB); 679 } 680 681 /// Create a tuple of Q-registers using the registers in \p Regs. 682 static Register createQTuple(ArrayRef<Register> Regs, MachineIRBuilder &MIB) { 683 static const unsigned RegClassIDs[] = { 684 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 685 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 686 AArch64::qsub2, AArch64::qsub3}; 687 return createTuple(Regs, RegClassIDs, SubRegs, MIB); 688 } 689 690 static std::optional<uint64_t> getImmedFromMO(const MachineOperand &Root) { 691 auto &MI = *Root.getParent(); 692 auto &MBB = *MI.getParent(); 693 auto &MF = *MBB.getParent(); 694 auto &MRI = MF.getRegInfo(); 695 uint64_t Immed; 696 if (Root.isImm()) 697 Immed = Root.getImm(); 698 else if (Root.isCImm()) 699 Immed = Root.getCImm()->getZExtValue(); 700 else if (Root.isReg()) { 701 auto ValAndVReg = 702 getIConstantVRegValWithLookThrough(Root.getReg(), MRI, true); 703 if (!ValAndVReg) 704 return std::nullopt; 705 Immed = ValAndVReg->Value.getSExtValue(); 706 } else 707 return std::nullopt; 708 return Immed; 709 } 710 711 /// Check whether \p I is a currently unsupported binary operation: 712 /// - it has an unsized type 713 /// - an operand is not a vreg 714 /// - all operands are not in the same bank 715 /// These are checks that should someday live in the verifier, but right now, 716 /// these are mostly limitations of the aarch64 selector. 717 static bool unsupportedBinOp(const MachineInstr &I, 718 const AArch64RegisterBankInfo &RBI, 719 const MachineRegisterInfo &MRI, 720 const AArch64RegisterInfo &TRI) { 721 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 722 if (!Ty.isValid()) { 723 LLVM_DEBUG(dbgs() << "Generic binop register should be typed\n"); 724 return true; 725 } 726 727 const RegisterBank *PrevOpBank = nullptr; 728 for (auto &MO : I.operands()) { 729 // FIXME: Support non-register operands. 730 if (!MO.isReg()) { 731 LLVM_DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n"); 732 return true; 733 } 734 735 // FIXME: Can generic operations have physical registers operands? If 736 // so, this will need to be taught about that, and we'll need to get the 737 // bank out of the minimal class for the register. 738 // Either way, this needs to be documented (and possibly verified). 739 if (!MO.getReg().isVirtual()) { 740 LLVM_DEBUG(dbgs() << "Generic inst has physical register operand\n"); 741 return true; 742 } 743 744 const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI); 745 if (!OpBank) { 746 LLVM_DEBUG(dbgs() << "Generic register has no bank or class\n"); 747 return true; 748 } 749 750 if (PrevOpBank && OpBank != PrevOpBank) { 751 LLVM_DEBUG(dbgs() << "Generic inst operands have different banks\n"); 752 return true; 753 } 754 PrevOpBank = OpBank; 755 } 756 return false; 757 } 758 759 /// Select the AArch64 opcode for the basic binary operation \p GenericOpc 760 /// (such as G_OR or G_SDIV), appropriate for the register bank \p RegBankID 761 /// and of size \p OpSize. 762 /// \returns \p GenericOpc if the combination is unsupported. 763 static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID, 764 unsigned OpSize) { 765 switch (RegBankID) { 766 case AArch64::GPRRegBankID: 767 if (OpSize == 32) { 768 switch (GenericOpc) { 769 case TargetOpcode::G_SHL: 770 return AArch64::LSLVWr; 771 case TargetOpcode::G_LSHR: 772 return AArch64::LSRVWr; 773 case TargetOpcode::G_ASHR: 774 return AArch64::ASRVWr; 775 default: 776 return GenericOpc; 777 } 778 } else if (OpSize == 64) { 779 switch (GenericOpc) { 780 case TargetOpcode::G_PTR_ADD: 781 return AArch64::ADDXrr; 782 case TargetOpcode::G_SHL: 783 return AArch64::LSLVXr; 784 case TargetOpcode::G_LSHR: 785 return AArch64::LSRVXr; 786 case TargetOpcode::G_ASHR: 787 return AArch64::ASRVXr; 788 default: 789 return GenericOpc; 790 } 791 } 792 break; 793 case AArch64::FPRRegBankID: 794 switch (OpSize) { 795 case 32: 796 switch (GenericOpc) { 797 case TargetOpcode::G_FADD: 798 return AArch64::FADDSrr; 799 case TargetOpcode::G_FSUB: 800 return AArch64::FSUBSrr; 801 case TargetOpcode::G_FMUL: 802 return AArch64::FMULSrr; 803 case TargetOpcode::G_FDIV: 804 return AArch64::FDIVSrr; 805 default: 806 return GenericOpc; 807 } 808 case 64: 809 switch (GenericOpc) { 810 case TargetOpcode::G_FADD: 811 return AArch64::FADDDrr; 812 case TargetOpcode::G_FSUB: 813 return AArch64::FSUBDrr; 814 case TargetOpcode::G_FMUL: 815 return AArch64::FMULDrr; 816 case TargetOpcode::G_FDIV: 817 return AArch64::FDIVDrr; 818 case TargetOpcode::G_OR: 819 return AArch64::ORRv8i8; 820 default: 821 return GenericOpc; 822 } 823 } 824 break; 825 } 826 return GenericOpc; 827 } 828 829 /// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc, 830 /// appropriate for the (value) register bank \p RegBankID and of memory access 831 /// size \p OpSize. This returns the variant with the base+unsigned-immediate 832 /// addressing mode (e.g., LDRXui). 833 /// \returns \p GenericOpc if the combination is unsupported. 834 static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID, 835 unsigned OpSize) { 836 const bool isStore = GenericOpc == TargetOpcode::G_STORE; 837 switch (RegBankID) { 838 case AArch64::GPRRegBankID: 839 switch (OpSize) { 840 case 8: 841 return isStore ? AArch64::STRBBui : AArch64::LDRBBui; 842 case 16: 843 return isStore ? AArch64::STRHHui : AArch64::LDRHHui; 844 case 32: 845 return isStore ? AArch64::STRWui : AArch64::LDRWui; 846 case 64: 847 return isStore ? AArch64::STRXui : AArch64::LDRXui; 848 } 849 break; 850 case AArch64::FPRRegBankID: 851 switch (OpSize) { 852 case 8: 853 return isStore ? AArch64::STRBui : AArch64::LDRBui; 854 case 16: 855 return isStore ? AArch64::STRHui : AArch64::LDRHui; 856 case 32: 857 return isStore ? AArch64::STRSui : AArch64::LDRSui; 858 case 64: 859 return isStore ? AArch64::STRDui : AArch64::LDRDui; 860 case 128: 861 return isStore ? AArch64::STRQui : AArch64::LDRQui; 862 } 863 break; 864 } 865 return GenericOpc; 866 } 867 868 /// Helper function for selectCopy. Inserts a subregister copy from \p SrcReg 869 /// to \p *To. 870 /// 871 /// E.g "To = COPY SrcReg:SubReg" 872 static bool copySubReg(MachineInstr &I, MachineRegisterInfo &MRI, 873 const RegisterBankInfo &RBI, Register SrcReg, 874 const TargetRegisterClass *To, unsigned SubReg) { 875 assert(SrcReg.isValid() && "Expected a valid source register?"); 876 assert(To && "Destination register class cannot be null"); 877 assert(SubReg && "Expected a valid subregister"); 878 879 MachineIRBuilder MIB(I); 880 auto SubRegCopy = 881 MIB.buildInstr(TargetOpcode::COPY, {To}, {}).addReg(SrcReg, 0, SubReg); 882 MachineOperand &RegOp = I.getOperand(1); 883 RegOp.setReg(SubRegCopy.getReg(0)); 884 885 // It's possible that the destination register won't be constrained. Make 886 // sure that happens. 887 if (!I.getOperand(0).getReg().isPhysical()) 888 RBI.constrainGenericRegister(I.getOperand(0).getReg(), *To, MRI); 889 890 return true; 891 } 892 893 /// Helper function to get the source and destination register classes for a 894 /// copy. Returns a std::pair containing the source register class for the 895 /// copy, and the destination register class for the copy. If a register class 896 /// cannot be determined, then it will be nullptr. 897 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 898 getRegClassesForCopy(MachineInstr &I, const TargetInstrInfo &TII, 899 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 900 const RegisterBankInfo &RBI) { 901 Register DstReg = I.getOperand(0).getReg(); 902 Register SrcReg = I.getOperand(1).getReg(); 903 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 904 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 905 unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 906 unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 907 908 // Special casing for cross-bank copies of s1s. We can technically represent 909 // a 1-bit value with any size of register. The minimum size for a GPR is 32 910 // bits. So, we need to put the FPR on 32 bits as well. 911 // 912 // FIXME: I'm not sure if this case holds true outside of copies. If it does, 913 // then we can pull it into the helpers that get the appropriate class for a 914 // register bank. Or make a new helper that carries along some constraint 915 // information. 916 if (SrcRegBank != DstRegBank && (DstSize == 1 && SrcSize == 1)) 917 SrcSize = DstSize = 32; 918 919 return {getMinClassForRegBank(SrcRegBank, SrcSize, true), 920 getMinClassForRegBank(DstRegBank, DstSize, true)}; 921 } 922 923 // FIXME: We need some sort of API in RBI/TRI to allow generic code to 924 // constrain operands of simple instructions given a TargetRegisterClass 925 // and LLT 926 static bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI, 927 const RegisterBankInfo &RBI) { 928 for (MachineOperand &MO : I.operands()) { 929 if (!MO.isReg()) 930 continue; 931 Register Reg = MO.getReg(); 932 if (!Reg) 933 continue; 934 if (Reg.isPhysical()) 935 continue; 936 LLT Ty = MRI.getType(Reg); 937 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 938 const TargetRegisterClass *RC = 939 RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 940 if (!RC) { 941 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 942 RC = getRegClassForTypeOnBank(Ty, RB); 943 if (!RC) { 944 LLVM_DEBUG( 945 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"); 946 break; 947 } 948 } 949 RBI.constrainGenericRegister(Reg, *RC, MRI); 950 } 951 952 return true; 953 } 954 955 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII, 956 MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI, 957 const RegisterBankInfo &RBI) { 958 Register DstReg = I.getOperand(0).getReg(); 959 Register SrcReg = I.getOperand(1).getReg(); 960 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 961 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 962 963 // Find the correct register classes for the source and destination registers. 964 const TargetRegisterClass *SrcRC; 965 const TargetRegisterClass *DstRC; 966 std::tie(SrcRC, DstRC) = getRegClassesForCopy(I, TII, MRI, TRI, RBI); 967 968 if (!DstRC) { 969 LLVM_DEBUG(dbgs() << "Unexpected dest size " 970 << RBI.getSizeInBits(DstReg, MRI, TRI) << '\n'); 971 return false; 972 } 973 974 // Is this a copy? If so, then we may need to insert a subregister copy. 975 if (I.isCopy()) { 976 // Yes. Check if there's anything to fix up. 977 if (!SrcRC) { 978 LLVM_DEBUG(dbgs() << "Couldn't determine source register class\n"); 979 return false; 980 } 981 982 unsigned SrcSize = TRI.getRegSizeInBits(*SrcRC); 983 unsigned DstSize = TRI.getRegSizeInBits(*DstRC); 984 unsigned SubReg; 985 986 // If the source bank doesn't support a subregister copy small enough, 987 // then we first need to copy to the destination bank. 988 if (getMinSizeForRegBank(SrcRegBank) > DstSize) { 989 const TargetRegisterClass *DstTempRC = 990 getMinClassForRegBank(DstRegBank, SrcSize, /* GetAllRegSet */ true); 991 getSubRegForClass(DstRC, TRI, SubReg); 992 993 MachineIRBuilder MIB(I); 994 auto Copy = MIB.buildCopy({DstTempRC}, {SrcReg}); 995 copySubReg(I, MRI, RBI, Copy.getReg(0), DstRC, SubReg); 996 } else if (SrcSize > DstSize) { 997 // If the source register is bigger than the destination we need to 998 // perform a subregister copy. 999 const TargetRegisterClass *SubRegRC = 1000 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 1001 getSubRegForClass(SubRegRC, TRI, SubReg); 1002 copySubReg(I, MRI, RBI, SrcReg, DstRC, SubReg); 1003 } else if (DstSize > SrcSize) { 1004 // If the destination register is bigger than the source we need to do 1005 // a promotion using SUBREG_TO_REG. 1006 const TargetRegisterClass *PromotionRC = 1007 getMinClassForRegBank(SrcRegBank, DstSize, /* GetAllRegSet */ true); 1008 getSubRegForClass(SrcRC, TRI, SubReg); 1009 1010 Register PromoteReg = MRI.createVirtualRegister(PromotionRC); 1011 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1012 TII.get(AArch64::SUBREG_TO_REG), PromoteReg) 1013 .addImm(0) 1014 .addUse(SrcReg) 1015 .addImm(SubReg); 1016 MachineOperand &RegOp = I.getOperand(1); 1017 RegOp.setReg(PromoteReg); 1018 } 1019 1020 // If the destination is a physical register, then there's nothing to 1021 // change, so we're done. 1022 if (DstReg.isPhysical()) 1023 return true; 1024 } 1025 1026 // No need to constrain SrcReg. It will get constrained when we hit another 1027 // of its use or its defs. Copies do not have constraints. 1028 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1029 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1030 << " operand\n"); 1031 return false; 1032 } 1033 1034 // If this a GPR ZEXT that we want to just reduce down into a copy. 1035 // The sizes will be mismatched with the source < 32b but that's ok. 1036 if (I.getOpcode() == TargetOpcode::G_ZEXT) { 1037 I.setDesc(TII.get(AArch64::COPY)); 1038 assert(SrcRegBank.getID() == AArch64::GPRRegBankID); 1039 return selectCopy(I, TII, MRI, TRI, RBI); 1040 } 1041 1042 I.setDesc(TII.get(AArch64::COPY)); 1043 return true; 1044 } 1045 1046 static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) { 1047 if (!DstTy.isScalar() || !SrcTy.isScalar()) 1048 return GenericOpc; 1049 1050 const unsigned DstSize = DstTy.getSizeInBits(); 1051 const unsigned SrcSize = SrcTy.getSizeInBits(); 1052 1053 switch (DstSize) { 1054 case 32: 1055 switch (SrcSize) { 1056 case 32: 1057 switch (GenericOpc) { 1058 case TargetOpcode::G_SITOFP: 1059 return AArch64::SCVTFUWSri; 1060 case TargetOpcode::G_UITOFP: 1061 return AArch64::UCVTFUWSri; 1062 case TargetOpcode::G_FPTOSI: 1063 return AArch64::FCVTZSUWSr; 1064 case TargetOpcode::G_FPTOUI: 1065 return AArch64::FCVTZUUWSr; 1066 default: 1067 return GenericOpc; 1068 } 1069 case 64: 1070 switch (GenericOpc) { 1071 case TargetOpcode::G_SITOFP: 1072 return AArch64::SCVTFUXSri; 1073 case TargetOpcode::G_UITOFP: 1074 return AArch64::UCVTFUXSri; 1075 case TargetOpcode::G_FPTOSI: 1076 return AArch64::FCVTZSUWDr; 1077 case TargetOpcode::G_FPTOUI: 1078 return AArch64::FCVTZUUWDr; 1079 default: 1080 return GenericOpc; 1081 } 1082 default: 1083 return GenericOpc; 1084 } 1085 case 64: 1086 switch (SrcSize) { 1087 case 32: 1088 switch (GenericOpc) { 1089 case TargetOpcode::G_SITOFP: 1090 return AArch64::SCVTFUWDri; 1091 case TargetOpcode::G_UITOFP: 1092 return AArch64::UCVTFUWDri; 1093 case TargetOpcode::G_FPTOSI: 1094 return AArch64::FCVTZSUXSr; 1095 case TargetOpcode::G_FPTOUI: 1096 return AArch64::FCVTZUUXSr; 1097 default: 1098 return GenericOpc; 1099 } 1100 case 64: 1101 switch (GenericOpc) { 1102 case TargetOpcode::G_SITOFP: 1103 return AArch64::SCVTFUXDri; 1104 case TargetOpcode::G_UITOFP: 1105 return AArch64::UCVTFUXDri; 1106 case TargetOpcode::G_FPTOSI: 1107 return AArch64::FCVTZSUXDr; 1108 case TargetOpcode::G_FPTOUI: 1109 return AArch64::FCVTZUUXDr; 1110 default: 1111 return GenericOpc; 1112 } 1113 default: 1114 return GenericOpc; 1115 } 1116 default: 1117 return GenericOpc; 1118 }; 1119 return GenericOpc; 1120 } 1121 1122 MachineInstr * 1123 AArch64InstructionSelector::emitSelect(Register Dst, Register True, 1124 Register False, AArch64CC::CondCode CC, 1125 MachineIRBuilder &MIB) const { 1126 MachineRegisterInfo &MRI = *MIB.getMRI(); 1127 assert(RBI.getRegBank(False, MRI, TRI)->getID() == 1128 RBI.getRegBank(True, MRI, TRI)->getID() && 1129 "Expected both select operands to have the same regbank?"); 1130 LLT Ty = MRI.getType(True); 1131 if (Ty.isVector()) 1132 return nullptr; 1133 const unsigned Size = Ty.getSizeInBits(); 1134 assert((Size == 32 || Size == 64) && 1135 "Expected 32 bit or 64 bit select only?"); 1136 const bool Is32Bit = Size == 32; 1137 if (RBI.getRegBank(True, MRI, TRI)->getID() != AArch64::GPRRegBankID) { 1138 unsigned Opc = Is32Bit ? AArch64::FCSELSrrr : AArch64::FCSELDrrr; 1139 auto FCSel = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 1140 constrainSelectedInstRegOperands(*FCSel, TII, TRI, RBI); 1141 return &*FCSel; 1142 } 1143 1144 // By default, we'll try and emit a CSEL. 1145 unsigned Opc = Is32Bit ? AArch64::CSELWr : AArch64::CSELXr; 1146 bool Optimized = false; 1147 auto TryFoldBinOpIntoSelect = [&Opc, Is32Bit, &CC, &MRI, 1148 &Optimized](Register &Reg, Register &OtherReg, 1149 bool Invert) { 1150 if (Optimized) 1151 return false; 1152 1153 // Attempt to fold: 1154 // 1155 // %sub = G_SUB 0, %x 1156 // %select = G_SELECT cc, %reg, %sub 1157 // 1158 // Into: 1159 // %select = CSNEG %reg, %x, cc 1160 Register MatchReg; 1161 if (mi_match(Reg, MRI, m_Neg(m_Reg(MatchReg)))) { 1162 Opc = Is32Bit ? AArch64::CSNEGWr : AArch64::CSNEGXr; 1163 Reg = MatchReg; 1164 if (Invert) { 1165 CC = AArch64CC::getInvertedCondCode(CC); 1166 std::swap(Reg, OtherReg); 1167 } 1168 return true; 1169 } 1170 1171 // Attempt to fold: 1172 // 1173 // %xor = G_XOR %x, -1 1174 // %select = G_SELECT cc, %reg, %xor 1175 // 1176 // Into: 1177 // %select = CSINV %reg, %x, cc 1178 if (mi_match(Reg, MRI, m_Not(m_Reg(MatchReg)))) { 1179 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1180 Reg = MatchReg; 1181 if (Invert) { 1182 CC = AArch64CC::getInvertedCondCode(CC); 1183 std::swap(Reg, OtherReg); 1184 } 1185 return true; 1186 } 1187 1188 // Attempt to fold: 1189 // 1190 // %add = G_ADD %x, 1 1191 // %select = G_SELECT cc, %reg, %add 1192 // 1193 // Into: 1194 // %select = CSINC %reg, %x, cc 1195 if (mi_match(Reg, MRI, 1196 m_any_of(m_GAdd(m_Reg(MatchReg), m_SpecificICst(1)), 1197 m_GPtrAdd(m_Reg(MatchReg), m_SpecificICst(1))))) { 1198 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1199 Reg = MatchReg; 1200 if (Invert) { 1201 CC = AArch64CC::getInvertedCondCode(CC); 1202 std::swap(Reg, OtherReg); 1203 } 1204 return true; 1205 } 1206 1207 return false; 1208 }; 1209 1210 // Helper lambda which tries to use CSINC/CSINV for the instruction when its 1211 // true/false values are constants. 1212 // FIXME: All of these patterns already exist in tablegen. We should be 1213 // able to import these. 1214 auto TryOptSelectCst = [&Opc, &True, &False, &CC, Is32Bit, &MRI, 1215 &Optimized]() { 1216 if (Optimized) 1217 return false; 1218 auto TrueCst = getIConstantVRegValWithLookThrough(True, MRI); 1219 auto FalseCst = getIConstantVRegValWithLookThrough(False, MRI); 1220 if (!TrueCst && !FalseCst) 1221 return false; 1222 1223 Register ZReg = Is32Bit ? AArch64::WZR : AArch64::XZR; 1224 if (TrueCst && FalseCst) { 1225 int64_t T = TrueCst->Value.getSExtValue(); 1226 int64_t F = FalseCst->Value.getSExtValue(); 1227 1228 if (T == 0 && F == 1) { 1229 // G_SELECT cc, 0, 1 -> CSINC zreg, zreg, cc 1230 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1231 True = ZReg; 1232 False = ZReg; 1233 return true; 1234 } 1235 1236 if (T == 0 && F == -1) { 1237 // G_SELECT cc 0, -1 -> CSINV zreg, zreg cc 1238 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1239 True = ZReg; 1240 False = ZReg; 1241 return true; 1242 } 1243 } 1244 1245 if (TrueCst) { 1246 int64_t T = TrueCst->Value.getSExtValue(); 1247 if (T == 1) { 1248 // G_SELECT cc, 1, f -> CSINC f, zreg, inv_cc 1249 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1250 True = False; 1251 False = ZReg; 1252 CC = AArch64CC::getInvertedCondCode(CC); 1253 return true; 1254 } 1255 1256 if (T == -1) { 1257 // G_SELECT cc, -1, f -> CSINV f, zreg, inv_cc 1258 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1259 True = False; 1260 False = ZReg; 1261 CC = AArch64CC::getInvertedCondCode(CC); 1262 return true; 1263 } 1264 } 1265 1266 if (FalseCst) { 1267 int64_t F = FalseCst->Value.getSExtValue(); 1268 if (F == 1) { 1269 // G_SELECT cc, t, 1 -> CSINC t, zreg, cc 1270 Opc = Is32Bit ? AArch64::CSINCWr : AArch64::CSINCXr; 1271 False = ZReg; 1272 return true; 1273 } 1274 1275 if (F == -1) { 1276 // G_SELECT cc, t, -1 -> CSINC t, zreg, cc 1277 Opc = Is32Bit ? AArch64::CSINVWr : AArch64::CSINVXr; 1278 False = ZReg; 1279 return true; 1280 } 1281 } 1282 return false; 1283 }; 1284 1285 Optimized |= TryFoldBinOpIntoSelect(False, True, /*Invert = */ false); 1286 Optimized |= TryFoldBinOpIntoSelect(True, False, /*Invert = */ true); 1287 Optimized |= TryOptSelectCst(); 1288 auto SelectInst = MIB.buildInstr(Opc, {Dst}, {True, False}).addImm(CC); 1289 constrainSelectedInstRegOperands(*SelectInst, TII, TRI, RBI); 1290 return &*SelectInst; 1291 } 1292 1293 static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) { 1294 switch (P) { 1295 default: 1296 llvm_unreachable("Unknown condition code!"); 1297 case CmpInst::ICMP_NE: 1298 return AArch64CC::NE; 1299 case CmpInst::ICMP_EQ: 1300 return AArch64CC::EQ; 1301 case CmpInst::ICMP_SGT: 1302 return AArch64CC::GT; 1303 case CmpInst::ICMP_SGE: 1304 return AArch64CC::GE; 1305 case CmpInst::ICMP_SLT: 1306 return AArch64CC::LT; 1307 case CmpInst::ICMP_SLE: 1308 return AArch64CC::LE; 1309 case CmpInst::ICMP_UGT: 1310 return AArch64CC::HI; 1311 case CmpInst::ICMP_UGE: 1312 return AArch64CC::HS; 1313 case CmpInst::ICMP_ULT: 1314 return AArch64CC::LO; 1315 case CmpInst::ICMP_ULE: 1316 return AArch64CC::LS; 1317 } 1318 } 1319 1320 /// changeFPCCToORAArch64CC - Convert an IR fp condition code to an AArch64 CC. 1321 static void changeFPCCToORAArch64CC(CmpInst::Predicate CC, 1322 AArch64CC::CondCode &CondCode, 1323 AArch64CC::CondCode &CondCode2) { 1324 CondCode2 = AArch64CC::AL; 1325 switch (CC) { 1326 default: 1327 llvm_unreachable("Unknown FP condition!"); 1328 case CmpInst::FCMP_OEQ: 1329 CondCode = AArch64CC::EQ; 1330 break; 1331 case CmpInst::FCMP_OGT: 1332 CondCode = AArch64CC::GT; 1333 break; 1334 case CmpInst::FCMP_OGE: 1335 CondCode = AArch64CC::GE; 1336 break; 1337 case CmpInst::FCMP_OLT: 1338 CondCode = AArch64CC::MI; 1339 break; 1340 case CmpInst::FCMP_OLE: 1341 CondCode = AArch64CC::LS; 1342 break; 1343 case CmpInst::FCMP_ONE: 1344 CondCode = AArch64CC::MI; 1345 CondCode2 = AArch64CC::GT; 1346 break; 1347 case CmpInst::FCMP_ORD: 1348 CondCode = AArch64CC::VC; 1349 break; 1350 case CmpInst::FCMP_UNO: 1351 CondCode = AArch64CC::VS; 1352 break; 1353 case CmpInst::FCMP_UEQ: 1354 CondCode = AArch64CC::EQ; 1355 CondCode2 = AArch64CC::VS; 1356 break; 1357 case CmpInst::FCMP_UGT: 1358 CondCode = AArch64CC::HI; 1359 break; 1360 case CmpInst::FCMP_UGE: 1361 CondCode = AArch64CC::PL; 1362 break; 1363 case CmpInst::FCMP_ULT: 1364 CondCode = AArch64CC::LT; 1365 break; 1366 case CmpInst::FCMP_ULE: 1367 CondCode = AArch64CC::LE; 1368 break; 1369 case CmpInst::FCMP_UNE: 1370 CondCode = AArch64CC::NE; 1371 break; 1372 } 1373 } 1374 1375 /// Convert an IR fp condition code to an AArch64 CC. 1376 /// This differs from changeFPCCToAArch64CC in that it returns cond codes that 1377 /// should be AND'ed instead of OR'ed. 1378 static void changeFPCCToANDAArch64CC(CmpInst::Predicate CC, 1379 AArch64CC::CondCode &CondCode, 1380 AArch64CC::CondCode &CondCode2) { 1381 CondCode2 = AArch64CC::AL; 1382 switch (CC) { 1383 default: 1384 changeFPCCToORAArch64CC(CC, CondCode, CondCode2); 1385 assert(CondCode2 == AArch64CC::AL); 1386 break; 1387 case CmpInst::FCMP_ONE: 1388 // (a one b) 1389 // == ((a olt b) || (a ogt b)) 1390 // == ((a ord b) && (a une b)) 1391 CondCode = AArch64CC::VC; 1392 CondCode2 = AArch64CC::NE; 1393 break; 1394 case CmpInst::FCMP_UEQ: 1395 // (a ueq b) 1396 // == ((a uno b) || (a oeq b)) 1397 // == ((a ule b) && (a uge b)) 1398 CondCode = AArch64CC::PL; 1399 CondCode2 = AArch64CC::LE; 1400 break; 1401 } 1402 } 1403 1404 /// Return a register which can be used as a bit to test in a TB(N)Z. 1405 static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert, 1406 MachineRegisterInfo &MRI) { 1407 assert(Reg.isValid() && "Expected valid register!"); 1408 bool HasZext = false; 1409 while (MachineInstr *MI = getDefIgnoringCopies(Reg, MRI)) { 1410 unsigned Opc = MI->getOpcode(); 1411 1412 if (!MI->getOperand(0).isReg() || 1413 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) 1414 break; 1415 1416 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits. 1417 // 1418 // (tbz (trunc x), b) -> (tbz x, b) is always safe, because the bit number 1419 // on the truncated x is the same as the bit number on x. 1420 if (Opc == TargetOpcode::G_ANYEXT || Opc == TargetOpcode::G_ZEXT || 1421 Opc == TargetOpcode::G_TRUNC) { 1422 if (Opc == TargetOpcode::G_ZEXT) 1423 HasZext = true; 1424 1425 Register NextReg = MI->getOperand(1).getReg(); 1426 // Did we find something worth folding? 1427 if (!NextReg.isValid() || !MRI.hasOneNonDBGUse(NextReg)) 1428 break; 1429 1430 // NextReg is worth folding. Keep looking. 1431 Reg = NextReg; 1432 continue; 1433 } 1434 1435 // Attempt to find a suitable operation with a constant on one side. 1436 std::optional<uint64_t> C; 1437 Register TestReg; 1438 switch (Opc) { 1439 default: 1440 break; 1441 case TargetOpcode::G_AND: 1442 case TargetOpcode::G_XOR: { 1443 TestReg = MI->getOperand(1).getReg(); 1444 Register ConstantReg = MI->getOperand(2).getReg(); 1445 auto VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI); 1446 if (!VRegAndVal) { 1447 // AND commutes, check the other side for a constant. 1448 // FIXME: Can we canonicalize the constant so that it's always on the 1449 // same side at some point earlier? 1450 std::swap(ConstantReg, TestReg); 1451 VRegAndVal = getIConstantVRegValWithLookThrough(ConstantReg, MRI); 1452 } 1453 if (VRegAndVal) { 1454 if (HasZext) 1455 C = VRegAndVal->Value.getZExtValue(); 1456 else 1457 C = VRegAndVal->Value.getSExtValue(); 1458 } 1459 break; 1460 } 1461 case TargetOpcode::G_ASHR: 1462 case TargetOpcode::G_LSHR: 1463 case TargetOpcode::G_SHL: { 1464 TestReg = MI->getOperand(1).getReg(); 1465 auto VRegAndVal = 1466 getIConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI); 1467 if (VRegAndVal) 1468 C = VRegAndVal->Value.getSExtValue(); 1469 break; 1470 } 1471 } 1472 1473 // Didn't find a constant or viable register. Bail out of the loop. 1474 if (!C || !TestReg.isValid()) 1475 break; 1476 1477 // We found a suitable instruction with a constant. Check to see if we can 1478 // walk through the instruction. 1479 Register NextReg; 1480 unsigned TestRegSize = MRI.getType(TestReg).getSizeInBits(); 1481 switch (Opc) { 1482 default: 1483 break; 1484 case TargetOpcode::G_AND: 1485 // (tbz (and x, m), b) -> (tbz x, b) when the b-th bit of m is set. 1486 if ((*C >> Bit) & 1) 1487 NextReg = TestReg; 1488 break; 1489 case TargetOpcode::G_SHL: 1490 // (tbz (shl x, c), b) -> (tbz x, b-c) when b-c is positive and fits in 1491 // the type of the register. 1492 if (*C <= Bit && (Bit - *C) < TestRegSize) { 1493 NextReg = TestReg; 1494 Bit = Bit - *C; 1495 } 1496 break; 1497 case TargetOpcode::G_ASHR: 1498 // (tbz (ashr x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits 1499 // in x 1500 NextReg = TestReg; 1501 Bit = Bit + *C; 1502 if (Bit >= TestRegSize) 1503 Bit = TestRegSize - 1; 1504 break; 1505 case TargetOpcode::G_LSHR: 1506 // (tbz (lshr x, c), b) -> (tbz x, b+c) when b + c is < # bits in x 1507 if ((Bit + *C) < TestRegSize) { 1508 NextReg = TestReg; 1509 Bit = Bit + *C; 1510 } 1511 break; 1512 case TargetOpcode::G_XOR: 1513 // We can walk through a G_XOR by inverting whether we use tbz/tbnz when 1514 // appropriate. 1515 // 1516 // e.g. If x' = xor x, c, and the b-th bit is set in c then 1517 // 1518 // tbz x', b -> tbnz x, b 1519 // 1520 // Because x' only has the b-th bit set if x does not. 1521 if ((*C >> Bit) & 1) 1522 Invert = !Invert; 1523 NextReg = TestReg; 1524 break; 1525 } 1526 1527 // Check if we found anything worth folding. 1528 if (!NextReg.isValid()) 1529 return Reg; 1530 Reg = NextReg; 1531 } 1532 1533 return Reg; 1534 } 1535 1536 MachineInstr *AArch64InstructionSelector::emitTestBit( 1537 Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB, 1538 MachineIRBuilder &MIB) const { 1539 assert(TestReg.isValid()); 1540 assert(ProduceNonFlagSettingCondBr && 1541 "Cannot emit TB(N)Z with speculation tracking!"); 1542 MachineRegisterInfo &MRI = *MIB.getMRI(); 1543 1544 // Attempt to optimize the test bit by walking over instructions. 1545 TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI); 1546 LLT Ty = MRI.getType(TestReg); 1547 unsigned Size = Ty.getSizeInBits(); 1548 assert(!Ty.isVector() && "Expected a scalar!"); 1549 assert(Bit < 64 && "Bit is too large!"); 1550 1551 // When the test register is a 64-bit register, we have to narrow to make 1552 // TBNZW work. 1553 bool UseWReg = Bit < 32; 1554 unsigned NecessarySize = UseWReg ? 32 : 64; 1555 if (Size != NecessarySize) 1556 TestReg = moveScalarRegClass( 1557 TestReg, UseWReg ? AArch64::GPR32RegClass : AArch64::GPR64RegClass, 1558 MIB); 1559 1560 static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX}, 1561 {AArch64::TBZW, AArch64::TBNZW}}; 1562 unsigned Opc = OpcTable[UseWReg][IsNegative]; 1563 auto TestBitMI = 1564 MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB); 1565 constrainSelectedInstRegOperands(*TestBitMI, TII, TRI, RBI); 1566 return &*TestBitMI; 1567 } 1568 1569 bool AArch64InstructionSelector::tryOptAndIntoCompareBranch( 1570 MachineInstr &AndInst, bool Invert, MachineBasicBlock *DstMBB, 1571 MachineIRBuilder &MIB) const { 1572 assert(AndInst.getOpcode() == TargetOpcode::G_AND && "Expected G_AND only?"); 1573 // Given something like this: 1574 // 1575 // %x = ...Something... 1576 // %one = G_CONSTANT i64 1 1577 // %zero = G_CONSTANT i64 0 1578 // %and = G_AND %x, %one 1579 // %cmp = G_ICMP intpred(ne), %and, %zero 1580 // %cmp_trunc = G_TRUNC %cmp 1581 // G_BRCOND %cmp_trunc, %bb.3 1582 // 1583 // We want to try and fold the AND into the G_BRCOND and produce either a 1584 // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)). 1585 // 1586 // In this case, we'd get 1587 // 1588 // TBNZ %x %bb.3 1589 // 1590 1591 // Check if the AND has a constant on its RHS which we can use as a mask. 1592 // If it's a power of 2, then it's the same as checking a specific bit. 1593 // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set) 1594 auto MaybeBit = getIConstantVRegValWithLookThrough( 1595 AndInst.getOperand(2).getReg(), *MIB.getMRI()); 1596 if (!MaybeBit) 1597 return false; 1598 1599 int32_t Bit = MaybeBit->Value.exactLogBase2(); 1600 if (Bit < 0) 1601 return false; 1602 1603 Register TestReg = AndInst.getOperand(1).getReg(); 1604 1605 // Emit a TB(N)Z. 1606 emitTestBit(TestReg, Bit, Invert, DstMBB, MIB); 1607 return true; 1608 } 1609 1610 MachineInstr *AArch64InstructionSelector::emitCBZ(Register CompareReg, 1611 bool IsNegative, 1612 MachineBasicBlock *DestMBB, 1613 MachineIRBuilder &MIB) const { 1614 assert(ProduceNonFlagSettingCondBr && "CBZ does not set flags!"); 1615 MachineRegisterInfo &MRI = *MIB.getMRI(); 1616 assert(RBI.getRegBank(CompareReg, MRI, TRI)->getID() == 1617 AArch64::GPRRegBankID && 1618 "Expected GPRs only?"); 1619 auto Ty = MRI.getType(CompareReg); 1620 unsigned Width = Ty.getSizeInBits(); 1621 assert(!Ty.isVector() && "Expected scalar only?"); 1622 assert(Width <= 64 && "Expected width to be at most 64?"); 1623 static const unsigned OpcTable[2][2] = {{AArch64::CBZW, AArch64::CBZX}, 1624 {AArch64::CBNZW, AArch64::CBNZX}}; 1625 unsigned Opc = OpcTable[IsNegative][Width == 64]; 1626 auto BranchMI = MIB.buildInstr(Opc, {}, {CompareReg}).addMBB(DestMBB); 1627 constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI); 1628 return &*BranchMI; 1629 } 1630 1631 bool AArch64InstructionSelector::selectCompareBranchFedByFCmp( 1632 MachineInstr &I, MachineInstr &FCmp, MachineIRBuilder &MIB) const { 1633 assert(FCmp.getOpcode() == TargetOpcode::G_FCMP); 1634 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1635 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't 1636 // totally clean. Some of them require two branches to implement. 1637 auto Pred = (CmpInst::Predicate)FCmp.getOperand(1).getPredicate(); 1638 emitFPCompare(FCmp.getOperand(2).getReg(), FCmp.getOperand(3).getReg(), MIB, 1639 Pred); 1640 AArch64CC::CondCode CC1, CC2; 1641 changeFCMPPredToAArch64CC(static_cast<CmpInst::Predicate>(Pred), CC1, CC2); 1642 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1643 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC1).addMBB(DestMBB); 1644 if (CC2 != AArch64CC::AL) 1645 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC2).addMBB(DestMBB); 1646 I.eraseFromParent(); 1647 return true; 1648 } 1649 1650 bool AArch64InstructionSelector::tryOptCompareBranchFedByICmp( 1651 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 1652 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 1653 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1654 // Attempt to optimize the G_BRCOND + G_ICMP into a TB(N)Z/CB(N)Z. 1655 // 1656 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1657 // instructions will not be produced, as they are conditional branch 1658 // instructions that do not set flags. 1659 if (!ProduceNonFlagSettingCondBr) 1660 return false; 1661 1662 MachineRegisterInfo &MRI = *MIB.getMRI(); 1663 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1664 auto Pred = 1665 static_cast<CmpInst::Predicate>(ICmp.getOperand(1).getPredicate()); 1666 Register LHS = ICmp.getOperand(2).getReg(); 1667 Register RHS = ICmp.getOperand(3).getReg(); 1668 1669 // We're allowed to emit a TB(N)Z/CB(N)Z. Try to do that. 1670 auto VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI); 1671 MachineInstr *AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 1672 1673 // When we can emit a TB(N)Z, prefer that. 1674 // 1675 // Handle non-commutative condition codes first. 1676 // Note that we don't want to do this when we have a G_AND because it can 1677 // become a tst. The tst will make the test bit in the TB(N)Z redundant. 1678 if (VRegAndVal && !AndInst) { 1679 int64_t C = VRegAndVal->Value.getSExtValue(); 1680 1681 // When we have a greater-than comparison, we can just test if the msb is 1682 // zero. 1683 if (C == -1 && Pred == CmpInst::ICMP_SGT) { 1684 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1685 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); 1686 I.eraseFromParent(); 1687 return true; 1688 } 1689 1690 // When we have a less than comparison, we can just test if the msb is not 1691 // zero. 1692 if (C == 0 && Pred == CmpInst::ICMP_SLT) { 1693 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1694 emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB); 1695 I.eraseFromParent(); 1696 return true; 1697 } 1698 1699 // Inversely, if we have a signed greater-than-or-equal comparison to zero, 1700 // we can test if the msb is zero. 1701 if (C == 0 && Pred == CmpInst::ICMP_SGE) { 1702 uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1; 1703 emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB); 1704 I.eraseFromParent(); 1705 return true; 1706 } 1707 } 1708 1709 // Attempt to handle commutative condition codes. Right now, that's only 1710 // eq/ne. 1711 if (ICmpInst::isEquality(Pred)) { 1712 if (!VRegAndVal) { 1713 std::swap(RHS, LHS); 1714 VRegAndVal = getIConstantVRegValWithLookThrough(RHS, MRI); 1715 AndInst = getOpcodeDef(TargetOpcode::G_AND, LHS, MRI); 1716 } 1717 1718 if (VRegAndVal && VRegAndVal->Value == 0) { 1719 // If there's a G_AND feeding into this branch, try to fold it away by 1720 // emitting a TB(N)Z instead. 1721 // 1722 // Note: If we have LT, then it *is* possible to fold, but it wouldn't be 1723 // beneficial. When we have an AND and LT, we need a TST/ANDS, so folding 1724 // would be redundant. 1725 if (AndInst && 1726 tryOptAndIntoCompareBranch( 1727 *AndInst, /*Invert = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB)) { 1728 I.eraseFromParent(); 1729 return true; 1730 } 1731 1732 // Otherwise, try to emit a CB(N)Z instead. 1733 auto LHSTy = MRI.getType(LHS); 1734 if (!LHSTy.isVector() && LHSTy.getSizeInBits() <= 64) { 1735 emitCBZ(LHS, /*IsNegative = */ Pred == CmpInst::ICMP_NE, DestMBB, MIB); 1736 I.eraseFromParent(); 1737 return true; 1738 } 1739 } 1740 } 1741 1742 return false; 1743 } 1744 1745 bool AArch64InstructionSelector::selectCompareBranchFedByICmp( 1746 MachineInstr &I, MachineInstr &ICmp, MachineIRBuilder &MIB) const { 1747 assert(ICmp.getOpcode() == TargetOpcode::G_ICMP); 1748 assert(I.getOpcode() == TargetOpcode::G_BRCOND); 1749 if (tryOptCompareBranchFedByICmp(I, ICmp, MIB)) 1750 return true; 1751 1752 // Couldn't optimize. Emit a compare + a Bcc. 1753 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1754 auto PredOp = ICmp.getOperand(1); 1755 emitIntegerCompare(ICmp.getOperand(2), ICmp.getOperand(3), PredOp, MIB); 1756 const AArch64CC::CondCode CC = changeICMPPredToAArch64CC( 1757 static_cast<CmpInst::Predicate>(PredOp.getPredicate())); 1758 MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB); 1759 I.eraseFromParent(); 1760 return true; 1761 } 1762 1763 bool AArch64InstructionSelector::selectCompareBranch( 1764 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) { 1765 Register CondReg = I.getOperand(0).getReg(); 1766 MachineInstr *CCMI = MRI.getVRegDef(CondReg); 1767 // Try to select the G_BRCOND using whatever is feeding the condition if 1768 // possible. 1769 unsigned CCMIOpc = CCMI->getOpcode(); 1770 if (CCMIOpc == TargetOpcode::G_FCMP) 1771 return selectCompareBranchFedByFCmp(I, *CCMI, MIB); 1772 if (CCMIOpc == TargetOpcode::G_ICMP) 1773 return selectCompareBranchFedByICmp(I, *CCMI, MIB); 1774 1775 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z 1776 // instructions will not be produced, as they are conditional branch 1777 // instructions that do not set flags. 1778 if (ProduceNonFlagSettingCondBr) { 1779 emitTestBit(CondReg, /*Bit = */ 0, /*IsNegative = */ true, 1780 I.getOperand(1).getMBB(), MIB); 1781 I.eraseFromParent(); 1782 return true; 1783 } 1784 1785 // Can't emit TB(N)Z/CB(N)Z. Emit a tst + bcc instead. 1786 auto TstMI = 1787 MIB.buildInstr(AArch64::ANDSWri, {LLT::scalar(32)}, {CondReg}).addImm(1); 1788 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 1789 auto Bcc = MIB.buildInstr(AArch64::Bcc) 1790 .addImm(AArch64CC::NE) 1791 .addMBB(I.getOperand(1).getMBB()); 1792 I.eraseFromParent(); 1793 return constrainSelectedInstRegOperands(*Bcc, TII, TRI, RBI); 1794 } 1795 1796 /// Returns the element immediate value of a vector shift operand if found. 1797 /// This needs to detect a splat-like operation, e.g. a G_BUILD_VECTOR. 1798 static std::optional<int64_t> getVectorShiftImm(Register Reg, 1799 MachineRegisterInfo &MRI) { 1800 assert(MRI.getType(Reg).isVector() && "Expected a *vector* shift operand"); 1801 MachineInstr *OpMI = MRI.getVRegDef(Reg); 1802 return getAArch64VectorSplatScalar(*OpMI, MRI); 1803 } 1804 1805 /// Matches and returns the shift immediate value for a SHL instruction given 1806 /// a shift operand. 1807 static std::optional<int64_t> getVectorSHLImm(LLT SrcTy, Register Reg, 1808 MachineRegisterInfo &MRI) { 1809 std::optional<int64_t> ShiftImm = getVectorShiftImm(Reg, MRI); 1810 if (!ShiftImm) 1811 return std::nullopt; 1812 // Check the immediate is in range for a SHL. 1813 int64_t Imm = *ShiftImm; 1814 if (Imm < 0) 1815 return std::nullopt; 1816 switch (SrcTy.getElementType().getSizeInBits()) { 1817 default: 1818 LLVM_DEBUG(dbgs() << "Unhandled element type for vector shift"); 1819 return std::nullopt; 1820 case 8: 1821 if (Imm > 7) 1822 return std::nullopt; 1823 break; 1824 case 16: 1825 if (Imm > 15) 1826 return std::nullopt; 1827 break; 1828 case 32: 1829 if (Imm > 31) 1830 return std::nullopt; 1831 break; 1832 case 64: 1833 if (Imm > 63) 1834 return std::nullopt; 1835 break; 1836 } 1837 return Imm; 1838 } 1839 1840 bool AArch64InstructionSelector::selectVectorSHL(MachineInstr &I, 1841 MachineRegisterInfo &MRI) { 1842 assert(I.getOpcode() == TargetOpcode::G_SHL); 1843 Register DstReg = I.getOperand(0).getReg(); 1844 const LLT Ty = MRI.getType(DstReg); 1845 Register Src1Reg = I.getOperand(1).getReg(); 1846 Register Src2Reg = I.getOperand(2).getReg(); 1847 1848 if (!Ty.isVector()) 1849 return false; 1850 1851 // Check if we have a vector of constants on RHS that we can select as the 1852 // immediate form. 1853 std::optional<int64_t> ImmVal = getVectorSHLImm(Ty, Src2Reg, MRI); 1854 1855 unsigned Opc = 0; 1856 if (Ty == LLT::fixed_vector(2, 64)) { 1857 Opc = ImmVal ? AArch64::SHLv2i64_shift : AArch64::USHLv2i64; 1858 } else if (Ty == LLT::fixed_vector(4, 32)) { 1859 Opc = ImmVal ? AArch64::SHLv4i32_shift : AArch64::USHLv4i32; 1860 } else if (Ty == LLT::fixed_vector(2, 32)) { 1861 Opc = ImmVal ? AArch64::SHLv2i32_shift : AArch64::USHLv2i32; 1862 } else if (Ty == LLT::fixed_vector(4, 16)) { 1863 Opc = ImmVal ? AArch64::SHLv4i16_shift : AArch64::USHLv4i16; 1864 } else if (Ty == LLT::fixed_vector(8, 16)) { 1865 Opc = ImmVal ? AArch64::SHLv8i16_shift : AArch64::USHLv8i16; 1866 } else if (Ty == LLT::fixed_vector(16, 8)) { 1867 Opc = ImmVal ? AArch64::SHLv16i8_shift : AArch64::USHLv16i8; 1868 } else if (Ty == LLT::fixed_vector(8, 8)) { 1869 Opc = ImmVal ? AArch64::SHLv8i8_shift : AArch64::USHLv8i8; 1870 } else { 1871 LLVM_DEBUG(dbgs() << "Unhandled G_SHL type"); 1872 return false; 1873 } 1874 1875 auto Shl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg}); 1876 if (ImmVal) 1877 Shl.addImm(*ImmVal); 1878 else 1879 Shl.addUse(Src2Reg); 1880 constrainSelectedInstRegOperands(*Shl, TII, TRI, RBI); 1881 I.eraseFromParent(); 1882 return true; 1883 } 1884 1885 bool AArch64InstructionSelector::selectVectorAshrLshr( 1886 MachineInstr &I, MachineRegisterInfo &MRI) { 1887 assert(I.getOpcode() == TargetOpcode::G_ASHR || 1888 I.getOpcode() == TargetOpcode::G_LSHR); 1889 Register DstReg = I.getOperand(0).getReg(); 1890 const LLT Ty = MRI.getType(DstReg); 1891 Register Src1Reg = I.getOperand(1).getReg(); 1892 Register Src2Reg = I.getOperand(2).getReg(); 1893 1894 if (!Ty.isVector()) 1895 return false; 1896 1897 bool IsASHR = I.getOpcode() == TargetOpcode::G_ASHR; 1898 1899 // We expect the immediate case to be lowered in the PostLegalCombiner to 1900 // AArch64ISD::VASHR or AArch64ISD::VLSHR equivalents. 1901 1902 // There is not a shift right register instruction, but the shift left 1903 // register instruction takes a signed value, where negative numbers specify a 1904 // right shift. 1905 1906 unsigned Opc = 0; 1907 unsigned NegOpc = 0; 1908 const TargetRegisterClass *RC = 1909 getRegClassForTypeOnBank(Ty, RBI.getRegBank(AArch64::FPRRegBankID)); 1910 if (Ty == LLT::fixed_vector(2, 64)) { 1911 Opc = IsASHR ? AArch64::SSHLv2i64 : AArch64::USHLv2i64; 1912 NegOpc = AArch64::NEGv2i64; 1913 } else if (Ty == LLT::fixed_vector(4, 32)) { 1914 Opc = IsASHR ? AArch64::SSHLv4i32 : AArch64::USHLv4i32; 1915 NegOpc = AArch64::NEGv4i32; 1916 } else if (Ty == LLT::fixed_vector(2, 32)) { 1917 Opc = IsASHR ? AArch64::SSHLv2i32 : AArch64::USHLv2i32; 1918 NegOpc = AArch64::NEGv2i32; 1919 } else if (Ty == LLT::fixed_vector(4, 16)) { 1920 Opc = IsASHR ? AArch64::SSHLv4i16 : AArch64::USHLv4i16; 1921 NegOpc = AArch64::NEGv4i16; 1922 } else if (Ty == LLT::fixed_vector(8, 16)) { 1923 Opc = IsASHR ? AArch64::SSHLv8i16 : AArch64::USHLv8i16; 1924 NegOpc = AArch64::NEGv8i16; 1925 } else if (Ty == LLT::fixed_vector(16, 8)) { 1926 Opc = IsASHR ? AArch64::SSHLv16i8 : AArch64::USHLv16i8; 1927 NegOpc = AArch64::NEGv16i8; 1928 } else if (Ty == LLT::fixed_vector(8, 8)) { 1929 Opc = IsASHR ? AArch64::SSHLv8i8 : AArch64::USHLv8i8; 1930 NegOpc = AArch64::NEGv8i8; 1931 } else { 1932 LLVM_DEBUG(dbgs() << "Unhandled G_ASHR type"); 1933 return false; 1934 } 1935 1936 auto Neg = MIB.buildInstr(NegOpc, {RC}, {Src2Reg}); 1937 constrainSelectedInstRegOperands(*Neg, TII, TRI, RBI); 1938 auto SShl = MIB.buildInstr(Opc, {DstReg}, {Src1Reg, Neg}); 1939 constrainSelectedInstRegOperands(*SShl, TII, TRI, RBI); 1940 I.eraseFromParent(); 1941 return true; 1942 } 1943 1944 bool AArch64InstructionSelector::selectVaStartAAPCS( 1945 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1946 return false; 1947 } 1948 1949 bool AArch64InstructionSelector::selectVaStartDarwin( 1950 MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const { 1951 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>(); 1952 Register ListReg = I.getOperand(0).getReg(); 1953 1954 Register ArgsAddrReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 1955 1956 int FrameIdx = FuncInfo->getVarArgsStackIndex(); 1957 if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64( 1958 MF.getFunction().getCallingConv())) { 1959 FrameIdx = FuncInfo->getVarArgsGPRSize() > 0 1960 ? FuncInfo->getVarArgsGPRIndex() 1961 : FuncInfo->getVarArgsStackIndex(); 1962 } 1963 1964 auto MIB = 1965 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::ADDXri)) 1966 .addDef(ArgsAddrReg) 1967 .addFrameIndex(FrameIdx) 1968 .addImm(0) 1969 .addImm(0); 1970 1971 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1972 1973 MIB = BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::STRXui)) 1974 .addUse(ArgsAddrReg) 1975 .addUse(ListReg) 1976 .addImm(0) 1977 .addMemOperand(*I.memoperands_begin()); 1978 1979 constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI); 1980 I.eraseFromParent(); 1981 return true; 1982 } 1983 1984 void AArch64InstructionSelector::materializeLargeCMVal( 1985 MachineInstr &I, const Value *V, unsigned OpFlags) { 1986 MachineBasicBlock &MBB = *I.getParent(); 1987 MachineFunction &MF = *MBB.getParent(); 1988 MachineRegisterInfo &MRI = MF.getRegInfo(); 1989 1990 auto MovZ = MIB.buildInstr(AArch64::MOVZXi, {&AArch64::GPR64RegClass}, {}); 1991 MovZ->addOperand(MF, I.getOperand(1)); 1992 MovZ->getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_G0 | 1993 AArch64II::MO_NC); 1994 MovZ->addOperand(MF, MachineOperand::CreateImm(0)); 1995 constrainSelectedInstRegOperands(*MovZ, TII, TRI, RBI); 1996 1997 auto BuildMovK = [&](Register SrcReg, unsigned char Flags, unsigned Offset, 1998 Register ForceDstReg) { 1999 Register DstReg = ForceDstReg 2000 ? ForceDstReg 2001 : MRI.createVirtualRegister(&AArch64::GPR64RegClass); 2002 auto MovI = MIB.buildInstr(AArch64::MOVKXi).addDef(DstReg).addUse(SrcReg); 2003 if (auto *GV = dyn_cast<GlobalValue>(V)) { 2004 MovI->addOperand(MF, MachineOperand::CreateGA( 2005 GV, MovZ->getOperand(1).getOffset(), Flags)); 2006 } else { 2007 MovI->addOperand( 2008 MF, MachineOperand::CreateBA(cast<BlockAddress>(V), 2009 MovZ->getOperand(1).getOffset(), Flags)); 2010 } 2011 MovI->addOperand(MF, MachineOperand::CreateImm(Offset)); 2012 constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); 2013 return DstReg; 2014 }; 2015 Register DstReg = BuildMovK(MovZ.getReg(0), 2016 AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); 2017 DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); 2018 BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); 2019 } 2020 2021 bool AArch64InstructionSelector::preISelLower(MachineInstr &I) { 2022 MachineBasicBlock &MBB = *I.getParent(); 2023 MachineFunction &MF = *MBB.getParent(); 2024 MachineRegisterInfo &MRI = MF.getRegInfo(); 2025 2026 switch (I.getOpcode()) { 2027 case TargetOpcode::G_STORE: { 2028 bool Changed = contractCrossBankCopyIntoStore(I, MRI); 2029 MachineOperand &SrcOp = I.getOperand(0); 2030 if (MRI.getType(SrcOp.getReg()).isPointer()) { 2031 // Allow matching with imported patterns for stores of pointers. Unlike 2032 // G_LOAD/G_PTR_ADD, we may not have selected all users. So, emit a copy 2033 // and constrain. 2034 auto Copy = MIB.buildCopy(LLT::scalar(64), SrcOp); 2035 Register NewSrc = Copy.getReg(0); 2036 SrcOp.setReg(NewSrc); 2037 RBI.constrainGenericRegister(NewSrc, AArch64::GPR64RegClass, MRI); 2038 Changed = true; 2039 } 2040 return Changed; 2041 } 2042 case TargetOpcode::G_PTR_ADD: 2043 return convertPtrAddToAdd(I, MRI); 2044 case TargetOpcode::G_LOAD: { 2045 // For scalar loads of pointers, we try to convert the dest type from p0 2046 // to s64 so that our imported patterns can match. Like with the G_PTR_ADD 2047 // conversion, this should be ok because all users should have been 2048 // selected already, so the type doesn't matter for them. 2049 Register DstReg = I.getOperand(0).getReg(); 2050 const LLT DstTy = MRI.getType(DstReg); 2051 if (!DstTy.isPointer()) 2052 return false; 2053 MRI.setType(DstReg, LLT::scalar(64)); 2054 return true; 2055 } 2056 case AArch64::G_DUP: { 2057 // Convert the type from p0 to s64 to help selection. 2058 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2059 if (!DstTy.getElementType().isPointer()) 2060 return false; 2061 auto NewSrc = MIB.buildCopy(LLT::scalar(64), I.getOperand(1).getReg()); 2062 MRI.setType(I.getOperand(0).getReg(), 2063 DstTy.changeElementType(LLT::scalar(64))); 2064 MRI.setRegClass(NewSrc.getReg(0), &AArch64::GPR64RegClass); 2065 I.getOperand(1).setReg(NewSrc.getReg(0)); 2066 return true; 2067 } 2068 case TargetOpcode::G_UITOFP: 2069 case TargetOpcode::G_SITOFP: { 2070 // If both source and destination regbanks are FPR, then convert the opcode 2071 // to G_SITOF so that the importer can select it to an fpr variant. 2072 // Otherwise, it ends up matching an fpr/gpr variant and adding a cross-bank 2073 // copy. 2074 Register SrcReg = I.getOperand(1).getReg(); 2075 LLT SrcTy = MRI.getType(SrcReg); 2076 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2077 if (SrcTy.isVector() || SrcTy.getSizeInBits() != DstTy.getSizeInBits()) 2078 return false; 2079 2080 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::FPRRegBankID) { 2081 if (I.getOpcode() == TargetOpcode::G_SITOFP) 2082 I.setDesc(TII.get(AArch64::G_SITOF)); 2083 else 2084 I.setDesc(TII.get(AArch64::G_UITOF)); 2085 return true; 2086 } 2087 return false; 2088 } 2089 default: 2090 return false; 2091 } 2092 } 2093 2094 /// This lowering tries to look for G_PTR_ADD instructions and then converts 2095 /// them to a standard G_ADD with a COPY on the source. 2096 /// 2097 /// The motivation behind this is to expose the add semantics to the imported 2098 /// tablegen patterns. We shouldn't need to check for uses being loads/stores, 2099 /// because the selector works bottom up, uses before defs. By the time we 2100 /// end up trying to select a G_PTR_ADD, we should have already attempted to 2101 /// fold this into addressing modes and were therefore unsuccessful. 2102 bool AArch64InstructionSelector::convertPtrAddToAdd( 2103 MachineInstr &I, MachineRegisterInfo &MRI) { 2104 assert(I.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD"); 2105 Register DstReg = I.getOperand(0).getReg(); 2106 Register AddOp1Reg = I.getOperand(1).getReg(); 2107 const LLT PtrTy = MRI.getType(DstReg); 2108 if (PtrTy.getAddressSpace() != 0) 2109 return false; 2110 2111 const LLT CastPtrTy = 2112 PtrTy.isVector() ? LLT::fixed_vector(2, 64) : LLT::scalar(64); 2113 auto PtrToInt = MIB.buildPtrToInt(CastPtrTy, AddOp1Reg); 2114 // Set regbanks on the registers. 2115 if (PtrTy.isVector()) 2116 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::FPRRegBankID)); 2117 else 2118 MRI.setRegBank(PtrToInt.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 2119 2120 // Now turn the %dst(p0) = G_PTR_ADD %base, off into: 2121 // %dst(intty) = G_ADD %intbase, off 2122 I.setDesc(TII.get(TargetOpcode::G_ADD)); 2123 MRI.setType(DstReg, CastPtrTy); 2124 I.getOperand(1).setReg(PtrToInt.getReg(0)); 2125 if (!select(*PtrToInt)) { 2126 LLVM_DEBUG(dbgs() << "Failed to select G_PTRTOINT in convertPtrAddToAdd"); 2127 return false; 2128 } 2129 2130 // Also take the opportunity here to try to do some optimization. 2131 // Try to convert this into a G_SUB if the offset is a 0-x negate idiom. 2132 Register NegatedReg; 2133 if (!mi_match(I.getOperand(2).getReg(), MRI, m_Neg(m_Reg(NegatedReg)))) 2134 return true; 2135 I.getOperand(2).setReg(NegatedReg); 2136 I.setDesc(TII.get(TargetOpcode::G_SUB)); 2137 return true; 2138 } 2139 2140 bool AArch64InstructionSelector::earlySelectSHL(MachineInstr &I, 2141 MachineRegisterInfo &MRI) { 2142 // We try to match the immediate variant of LSL, which is actually an alias 2143 // for a special case of UBFM. Otherwise, we fall back to the imported 2144 // selector which will match the register variant. 2145 assert(I.getOpcode() == TargetOpcode::G_SHL && "unexpected op"); 2146 const auto &MO = I.getOperand(2); 2147 auto VRegAndVal = getIConstantVRegVal(MO.getReg(), MRI); 2148 if (!VRegAndVal) 2149 return false; 2150 2151 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2152 if (DstTy.isVector()) 2153 return false; 2154 bool Is64Bit = DstTy.getSizeInBits() == 64; 2155 auto Imm1Fn = Is64Bit ? selectShiftA_64(MO) : selectShiftA_32(MO); 2156 auto Imm2Fn = Is64Bit ? selectShiftB_64(MO) : selectShiftB_32(MO); 2157 2158 if (!Imm1Fn || !Imm2Fn) 2159 return false; 2160 2161 auto NewI = 2162 MIB.buildInstr(Is64Bit ? AArch64::UBFMXri : AArch64::UBFMWri, 2163 {I.getOperand(0).getReg()}, {I.getOperand(1).getReg()}); 2164 2165 for (auto &RenderFn : *Imm1Fn) 2166 RenderFn(NewI); 2167 for (auto &RenderFn : *Imm2Fn) 2168 RenderFn(NewI); 2169 2170 I.eraseFromParent(); 2171 return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 2172 } 2173 2174 bool AArch64InstructionSelector::contractCrossBankCopyIntoStore( 2175 MachineInstr &I, MachineRegisterInfo &MRI) { 2176 assert(I.getOpcode() == TargetOpcode::G_STORE && "Expected G_STORE"); 2177 // If we're storing a scalar, it doesn't matter what register bank that 2178 // scalar is on. All that matters is the size. 2179 // 2180 // So, if we see something like this (with a 32-bit scalar as an example): 2181 // 2182 // %x:gpr(s32) = ... something ... 2183 // %y:fpr(s32) = COPY %x:gpr(s32) 2184 // G_STORE %y:fpr(s32) 2185 // 2186 // We can fix this up into something like this: 2187 // 2188 // G_STORE %x:gpr(s32) 2189 // 2190 // And then continue the selection process normally. 2191 Register DefDstReg = getSrcRegIgnoringCopies(I.getOperand(0).getReg(), MRI); 2192 if (!DefDstReg.isValid()) 2193 return false; 2194 LLT DefDstTy = MRI.getType(DefDstReg); 2195 Register StoreSrcReg = I.getOperand(0).getReg(); 2196 LLT StoreSrcTy = MRI.getType(StoreSrcReg); 2197 2198 // If we get something strange like a physical register, then we shouldn't 2199 // go any further. 2200 if (!DefDstTy.isValid()) 2201 return false; 2202 2203 // Are the source and dst types the same size? 2204 if (DefDstTy.getSizeInBits() != StoreSrcTy.getSizeInBits()) 2205 return false; 2206 2207 if (RBI.getRegBank(StoreSrcReg, MRI, TRI) == 2208 RBI.getRegBank(DefDstReg, MRI, TRI)) 2209 return false; 2210 2211 // We have a cross-bank copy, which is entering a store. Let's fold it. 2212 I.getOperand(0).setReg(DefDstReg); 2213 return true; 2214 } 2215 2216 bool AArch64InstructionSelector::earlySelect(MachineInstr &I) { 2217 assert(I.getParent() && "Instruction should be in a basic block!"); 2218 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 2219 2220 MachineBasicBlock &MBB = *I.getParent(); 2221 MachineFunction &MF = *MBB.getParent(); 2222 MachineRegisterInfo &MRI = MF.getRegInfo(); 2223 2224 switch (I.getOpcode()) { 2225 case AArch64::G_DUP: { 2226 // Before selecting a DUP instruction, check if it is better selected as a 2227 // MOV or load from a constant pool. 2228 Register Src = I.getOperand(1).getReg(); 2229 auto ValAndVReg = getIConstantVRegValWithLookThrough(Src, MRI); 2230 if (!ValAndVReg) 2231 return false; 2232 LLVMContext &Ctx = MF.getFunction().getContext(); 2233 Register Dst = I.getOperand(0).getReg(); 2234 auto *CV = ConstantDataVector::getSplat( 2235 MRI.getType(Dst).getNumElements(), 2236 ConstantInt::get(Type::getIntNTy(Ctx, MRI.getType(Src).getSizeInBits()), 2237 ValAndVReg->Value)); 2238 if (!emitConstantVector(Dst, CV, MIB, MRI)) 2239 return false; 2240 I.eraseFromParent(); 2241 return true; 2242 } 2243 case TargetOpcode::G_SEXT: 2244 // Check for i64 sext(i32 vector_extract) prior to tablegen to select SMOV 2245 // over a normal extend. 2246 if (selectUSMovFromExtend(I, MRI)) 2247 return true; 2248 return false; 2249 case TargetOpcode::G_BR: 2250 return false; 2251 case TargetOpcode::G_SHL: 2252 return earlySelectSHL(I, MRI); 2253 case TargetOpcode::G_CONSTANT: { 2254 bool IsZero = false; 2255 if (I.getOperand(1).isCImm()) 2256 IsZero = I.getOperand(1).getCImm()->isZero(); 2257 else if (I.getOperand(1).isImm()) 2258 IsZero = I.getOperand(1).getImm() == 0; 2259 2260 if (!IsZero) 2261 return false; 2262 2263 Register DefReg = I.getOperand(0).getReg(); 2264 LLT Ty = MRI.getType(DefReg); 2265 if (Ty.getSizeInBits() == 64) { 2266 I.getOperand(1).ChangeToRegister(AArch64::XZR, false); 2267 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); 2268 } else if (Ty.getSizeInBits() == 32) { 2269 I.getOperand(1).ChangeToRegister(AArch64::WZR, false); 2270 RBI.constrainGenericRegister(DefReg, AArch64::GPR32RegClass, MRI); 2271 } else 2272 return false; 2273 2274 I.setDesc(TII.get(TargetOpcode::COPY)); 2275 return true; 2276 } 2277 2278 case TargetOpcode::G_ADD: { 2279 // Check if this is being fed by a G_ICMP on either side. 2280 // 2281 // (cmp pred, x, y) + z 2282 // 2283 // In the above case, when the cmp is true, we increment z by 1. So, we can 2284 // fold the add into the cset for the cmp by using cinc. 2285 // 2286 // FIXME: This would probably be a lot nicer in PostLegalizerLowering. 2287 Register AddDst = I.getOperand(0).getReg(); 2288 Register AddLHS = I.getOperand(1).getReg(); 2289 Register AddRHS = I.getOperand(2).getReg(); 2290 // Only handle scalars. 2291 LLT Ty = MRI.getType(AddLHS); 2292 if (Ty.isVector()) 2293 return false; 2294 // Since G_ICMP is modeled as ADDS/SUBS/ANDS, we can handle 32 bits or 64 2295 // bits. 2296 unsigned Size = Ty.getSizeInBits(); 2297 if (Size != 32 && Size != 64) 2298 return false; 2299 auto MatchCmp = [&](Register Reg) -> MachineInstr * { 2300 if (!MRI.hasOneNonDBGUse(Reg)) 2301 return nullptr; 2302 // If the LHS of the add is 32 bits, then we want to fold a 32-bit 2303 // compare. 2304 if (Size == 32) 2305 return getOpcodeDef(TargetOpcode::G_ICMP, Reg, MRI); 2306 // We model scalar compares using 32-bit destinations right now. 2307 // If it's a 64-bit compare, it'll have 64-bit sources. 2308 Register ZExt; 2309 if (!mi_match(Reg, MRI, 2310 m_OneNonDBGUse(m_GZExt(m_OneNonDBGUse(m_Reg(ZExt)))))) 2311 return nullptr; 2312 auto *Cmp = getOpcodeDef(TargetOpcode::G_ICMP, ZExt, MRI); 2313 if (!Cmp || 2314 MRI.getType(Cmp->getOperand(2).getReg()).getSizeInBits() != 64) 2315 return nullptr; 2316 return Cmp; 2317 }; 2318 // Try to match 2319 // z + (cmp pred, x, y) 2320 MachineInstr *Cmp = MatchCmp(AddRHS); 2321 if (!Cmp) { 2322 // (cmp pred, x, y) + z 2323 std::swap(AddLHS, AddRHS); 2324 Cmp = MatchCmp(AddRHS); 2325 if (!Cmp) 2326 return false; 2327 } 2328 auto &PredOp = Cmp->getOperand(1); 2329 auto Pred = static_cast<CmpInst::Predicate>(PredOp.getPredicate()); 2330 const AArch64CC::CondCode InvCC = 2331 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred)); 2332 MIB.setInstrAndDebugLoc(I); 2333 emitIntegerCompare(/*LHS=*/Cmp->getOperand(2), 2334 /*RHS=*/Cmp->getOperand(3), PredOp, MIB); 2335 emitCSINC(/*Dst=*/AddDst, /*Src =*/AddLHS, /*Src2=*/AddLHS, InvCC, MIB); 2336 I.eraseFromParent(); 2337 return true; 2338 } 2339 case TargetOpcode::G_OR: { 2340 // Look for operations that take the lower `Width=Size-ShiftImm` bits of 2341 // `ShiftSrc` and insert them into the upper `Width` bits of `MaskSrc` via 2342 // shifting and masking that we can replace with a BFI (encoded as a BFM). 2343 Register Dst = I.getOperand(0).getReg(); 2344 LLT Ty = MRI.getType(Dst); 2345 2346 if (!Ty.isScalar()) 2347 return false; 2348 2349 unsigned Size = Ty.getSizeInBits(); 2350 if (Size != 32 && Size != 64) 2351 return false; 2352 2353 Register ShiftSrc; 2354 int64_t ShiftImm; 2355 Register MaskSrc; 2356 int64_t MaskImm; 2357 if (!mi_match( 2358 Dst, MRI, 2359 m_GOr(m_OneNonDBGUse(m_GShl(m_Reg(ShiftSrc), m_ICst(ShiftImm))), 2360 m_OneNonDBGUse(m_GAnd(m_Reg(MaskSrc), m_ICst(MaskImm)))))) 2361 return false; 2362 2363 if (ShiftImm > Size || ((1ULL << ShiftImm) - 1ULL) != uint64_t(MaskImm)) 2364 return false; 2365 2366 int64_t Immr = Size - ShiftImm; 2367 int64_t Imms = Size - ShiftImm - 1; 2368 unsigned Opc = Size == 32 ? AArch64::BFMWri : AArch64::BFMXri; 2369 emitInstr(Opc, {Dst}, {MaskSrc, ShiftSrc, Immr, Imms}, MIB); 2370 I.eraseFromParent(); 2371 return true; 2372 } 2373 case TargetOpcode::G_FENCE: { 2374 if (I.getOperand(1).getImm() == 0) 2375 BuildMI(MBB, I, MIMetadata(I), TII.get(TargetOpcode::MEMBARRIER)); 2376 else 2377 BuildMI(MBB, I, MIMetadata(I), TII.get(AArch64::DMB)) 2378 .addImm(I.getOperand(0).getImm() == 4 ? 0x9 : 0xb); 2379 I.eraseFromParent(); 2380 return true; 2381 } 2382 default: 2383 return false; 2384 } 2385 } 2386 2387 bool AArch64InstructionSelector::select(MachineInstr &I) { 2388 assert(I.getParent() && "Instruction should be in a basic block!"); 2389 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 2390 2391 MachineBasicBlock &MBB = *I.getParent(); 2392 MachineFunction &MF = *MBB.getParent(); 2393 MachineRegisterInfo &MRI = MF.getRegInfo(); 2394 2395 const AArch64Subtarget *Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 2396 if (Subtarget->requiresStrictAlign()) { 2397 // We don't support this feature yet. 2398 LLVM_DEBUG(dbgs() << "AArch64 GISel does not support strict-align yet\n"); 2399 return false; 2400 } 2401 2402 MIB.setInstrAndDebugLoc(I); 2403 2404 unsigned Opcode = I.getOpcode(); 2405 // G_PHI requires same handling as PHI 2406 if (!I.isPreISelOpcode() || Opcode == TargetOpcode::G_PHI) { 2407 // Certain non-generic instructions also need some special handling. 2408 2409 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 2410 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2411 2412 if (Opcode == TargetOpcode::PHI || Opcode == TargetOpcode::G_PHI) { 2413 const Register DefReg = I.getOperand(0).getReg(); 2414 const LLT DefTy = MRI.getType(DefReg); 2415 2416 const RegClassOrRegBank &RegClassOrBank = 2417 MRI.getRegClassOrRegBank(DefReg); 2418 2419 const TargetRegisterClass *DefRC 2420 = RegClassOrBank.dyn_cast<const TargetRegisterClass *>(); 2421 if (!DefRC) { 2422 if (!DefTy.isValid()) { 2423 LLVM_DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n"); 2424 return false; 2425 } 2426 const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>(); 2427 DefRC = getRegClassForTypeOnBank(DefTy, RB); 2428 if (!DefRC) { 2429 LLVM_DEBUG(dbgs() << "PHI operand has unexpected size/bank\n"); 2430 return false; 2431 } 2432 } 2433 2434 I.setDesc(TII.get(TargetOpcode::PHI)); 2435 2436 return RBI.constrainGenericRegister(DefReg, *DefRC, MRI); 2437 } 2438 2439 if (I.isCopy()) 2440 return selectCopy(I, TII, MRI, TRI, RBI); 2441 2442 if (I.isDebugInstr()) 2443 return selectDebugInstr(I, MRI, RBI); 2444 2445 return true; 2446 } 2447 2448 2449 if (I.getNumOperands() != I.getNumExplicitOperands()) { 2450 LLVM_DEBUG( 2451 dbgs() << "Generic instruction has unexpected implicit operands\n"); 2452 return false; 2453 } 2454 2455 // Try to do some lowering before we start instruction selecting. These 2456 // lowerings are purely transformations on the input G_MIR and so selection 2457 // must continue after any modification of the instruction. 2458 if (preISelLower(I)) { 2459 Opcode = I.getOpcode(); // The opcode may have been modified, refresh it. 2460 } 2461 2462 // There may be patterns where the importer can't deal with them optimally, 2463 // but does select it to a suboptimal sequence so our custom C++ selection 2464 // code later never has a chance to work on it. Therefore, we have an early 2465 // selection attempt here to give priority to certain selection routines 2466 // over the imported ones. 2467 if (earlySelect(I)) 2468 return true; 2469 2470 if (selectImpl(I, *CoverageInfo)) 2471 return true; 2472 2473 LLT Ty = 2474 I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{}; 2475 2476 switch (Opcode) { 2477 case TargetOpcode::G_SBFX: 2478 case TargetOpcode::G_UBFX: { 2479 static const unsigned OpcTable[2][2] = { 2480 {AArch64::UBFMWri, AArch64::UBFMXri}, 2481 {AArch64::SBFMWri, AArch64::SBFMXri}}; 2482 bool IsSigned = Opcode == TargetOpcode::G_SBFX; 2483 unsigned Size = Ty.getSizeInBits(); 2484 unsigned Opc = OpcTable[IsSigned][Size == 64]; 2485 auto Cst1 = 2486 getIConstantVRegValWithLookThrough(I.getOperand(2).getReg(), MRI); 2487 assert(Cst1 && "Should have gotten a constant for src 1?"); 2488 auto Cst2 = 2489 getIConstantVRegValWithLookThrough(I.getOperand(3).getReg(), MRI); 2490 assert(Cst2 && "Should have gotten a constant for src 2?"); 2491 auto LSB = Cst1->Value.getZExtValue(); 2492 auto Width = Cst2->Value.getZExtValue(); 2493 auto BitfieldInst = 2494 MIB.buildInstr(Opc, {I.getOperand(0)}, {I.getOperand(1)}) 2495 .addImm(LSB) 2496 .addImm(LSB + Width - 1); 2497 I.eraseFromParent(); 2498 return constrainSelectedInstRegOperands(*BitfieldInst, TII, TRI, RBI); 2499 } 2500 case TargetOpcode::G_BRCOND: 2501 return selectCompareBranch(I, MF, MRI); 2502 2503 case TargetOpcode::G_BRINDIRECT: { 2504 I.setDesc(TII.get(AArch64::BR)); 2505 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2506 } 2507 2508 case TargetOpcode::G_BRJT: 2509 return selectBrJT(I, MRI); 2510 2511 case AArch64::G_ADD_LOW: { 2512 // This op may have been separated from it's ADRP companion by the localizer 2513 // or some other code motion pass. Given that many CPUs will try to 2514 // macro fuse these operations anyway, select this into a MOVaddr pseudo 2515 // which will later be expanded into an ADRP+ADD pair after scheduling. 2516 MachineInstr *BaseMI = MRI.getVRegDef(I.getOperand(1).getReg()); 2517 if (BaseMI->getOpcode() != AArch64::ADRP) { 2518 I.setDesc(TII.get(AArch64::ADDXri)); 2519 I.addOperand(MachineOperand::CreateImm(0)); 2520 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2521 } 2522 assert(TM.getCodeModel() == CodeModel::Small && 2523 "Expected small code model"); 2524 auto Op1 = BaseMI->getOperand(1); 2525 auto Op2 = I.getOperand(2); 2526 auto MovAddr = MIB.buildInstr(AArch64::MOVaddr, {I.getOperand(0)}, {}) 2527 .addGlobalAddress(Op1.getGlobal(), Op1.getOffset(), 2528 Op1.getTargetFlags()) 2529 .addGlobalAddress(Op2.getGlobal(), Op2.getOffset(), 2530 Op2.getTargetFlags()); 2531 I.eraseFromParent(); 2532 return constrainSelectedInstRegOperands(*MovAddr, TII, TRI, RBI); 2533 } 2534 2535 case TargetOpcode::G_BSWAP: { 2536 // Handle vector types for G_BSWAP directly. 2537 Register DstReg = I.getOperand(0).getReg(); 2538 LLT DstTy = MRI.getType(DstReg); 2539 2540 // We should only get vector types here; everything else is handled by the 2541 // importer right now. 2542 if (!DstTy.isVector() || DstTy.getSizeInBits() > 128) { 2543 LLVM_DEBUG(dbgs() << "Dst type for G_BSWAP currently unsupported.\n"); 2544 return false; 2545 } 2546 2547 // Only handle 4 and 2 element vectors for now. 2548 // TODO: 16-bit elements. 2549 unsigned NumElts = DstTy.getNumElements(); 2550 if (NumElts != 4 && NumElts != 2) { 2551 LLVM_DEBUG(dbgs() << "Unsupported number of elements for G_BSWAP.\n"); 2552 return false; 2553 } 2554 2555 // Choose the correct opcode for the supported types. Right now, that's 2556 // v2s32, v4s32, and v2s64. 2557 unsigned Opc = 0; 2558 unsigned EltSize = DstTy.getElementType().getSizeInBits(); 2559 if (EltSize == 32) 2560 Opc = (DstTy.getNumElements() == 2) ? AArch64::REV32v8i8 2561 : AArch64::REV32v16i8; 2562 else if (EltSize == 64) 2563 Opc = AArch64::REV64v16i8; 2564 2565 // We should always get something by the time we get here... 2566 assert(Opc != 0 && "Didn't get an opcode for G_BSWAP?"); 2567 2568 I.setDesc(TII.get(Opc)); 2569 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2570 } 2571 2572 case TargetOpcode::G_FCONSTANT: 2573 case TargetOpcode::G_CONSTANT: { 2574 const bool isFP = Opcode == TargetOpcode::G_FCONSTANT; 2575 2576 const LLT s8 = LLT::scalar(8); 2577 const LLT s16 = LLT::scalar(16); 2578 const LLT s32 = LLT::scalar(32); 2579 const LLT s64 = LLT::scalar(64); 2580 const LLT s128 = LLT::scalar(128); 2581 const LLT p0 = LLT::pointer(0, 64); 2582 2583 const Register DefReg = I.getOperand(0).getReg(); 2584 const LLT DefTy = MRI.getType(DefReg); 2585 const unsigned DefSize = DefTy.getSizeInBits(); 2586 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 2587 2588 // FIXME: Redundant check, but even less readable when factored out. 2589 if (isFP) { 2590 if (Ty != s16 && Ty != s32 && Ty != s64 && Ty != s128) { 2591 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2592 << " constant, expected: " << s16 << " or " << s32 2593 << " or " << s64 << " or " << s128 << '\n'); 2594 return false; 2595 } 2596 2597 if (RB.getID() != AArch64::FPRRegBankID) { 2598 LLVM_DEBUG(dbgs() << "Unable to materialize FP " << Ty 2599 << " constant on bank: " << RB 2600 << ", expected: FPR\n"); 2601 return false; 2602 } 2603 2604 // The case when we have 0.0 is covered by tablegen. Reject it here so we 2605 // can be sure tablegen works correctly and isn't rescued by this code. 2606 // 0.0 is not covered by tablegen for FP128. So we will handle this 2607 // scenario in the code here. 2608 if (DefSize != 128 && I.getOperand(1).getFPImm()->isExactlyValue(0.0)) 2609 return false; 2610 } else { 2611 // s32 and s64 are covered by tablegen. 2612 if (Ty != p0 && Ty != s8 && Ty != s16) { 2613 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2614 << " constant, expected: " << s32 << ", " << s64 2615 << ", or " << p0 << '\n'); 2616 return false; 2617 } 2618 2619 if (RB.getID() != AArch64::GPRRegBankID) { 2620 LLVM_DEBUG(dbgs() << "Unable to materialize integer " << Ty 2621 << " constant on bank: " << RB 2622 << ", expected: GPR\n"); 2623 return false; 2624 } 2625 } 2626 2627 if (isFP) { 2628 const TargetRegisterClass &FPRRC = *getRegClassForTypeOnBank(DefTy, RB); 2629 // For 16, 64, and 128b values, emit a constant pool load. 2630 switch (DefSize) { 2631 default: 2632 llvm_unreachable("Unexpected destination size for G_FCONSTANT?"); 2633 case 32: 2634 // For s32, use a cp load if we have optsize/minsize. 2635 if (!shouldOptForSize(&MF)) 2636 break; 2637 [[fallthrough]]; 2638 case 16: 2639 case 64: 2640 case 128: { 2641 auto *FPImm = I.getOperand(1).getFPImm(); 2642 auto *LoadMI = emitLoadFromConstantPool(FPImm, MIB); 2643 if (!LoadMI) { 2644 LLVM_DEBUG(dbgs() << "Failed to load double constant pool entry\n"); 2645 return false; 2646 } 2647 MIB.buildCopy({DefReg}, {LoadMI->getOperand(0).getReg()}); 2648 I.eraseFromParent(); 2649 return RBI.constrainGenericRegister(DefReg, FPRRC, MRI); 2650 } 2651 } 2652 2653 // Either emit a FMOV, or emit a copy to emit a normal mov. 2654 assert(DefSize == 32 && 2655 "Expected constant pool loads for all sizes other than 32!"); 2656 const Register DefGPRReg = 2657 MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2658 MachineOperand &RegOp = I.getOperand(0); 2659 RegOp.setReg(DefGPRReg); 2660 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2661 MIB.buildCopy({DefReg}, {DefGPRReg}); 2662 2663 if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) { 2664 LLVM_DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n"); 2665 return false; 2666 } 2667 2668 MachineOperand &ImmOp = I.getOperand(1); 2669 // FIXME: Is going through int64_t always correct? 2670 ImmOp.ChangeToImmediate( 2671 ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue()); 2672 } else if (I.getOperand(1).isCImm()) { 2673 uint64_t Val = I.getOperand(1).getCImm()->getZExtValue(); 2674 I.getOperand(1).ChangeToImmediate(Val); 2675 } else if (I.getOperand(1).isImm()) { 2676 uint64_t Val = I.getOperand(1).getImm(); 2677 I.getOperand(1).ChangeToImmediate(Val); 2678 } 2679 2680 const unsigned MovOpc = 2681 DefSize == 64 ? AArch64::MOVi64imm : AArch64::MOVi32imm; 2682 I.setDesc(TII.get(MovOpc)); 2683 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2684 return true; 2685 } 2686 case TargetOpcode::G_EXTRACT: { 2687 Register DstReg = I.getOperand(0).getReg(); 2688 Register SrcReg = I.getOperand(1).getReg(); 2689 LLT SrcTy = MRI.getType(SrcReg); 2690 LLT DstTy = MRI.getType(DstReg); 2691 (void)DstTy; 2692 unsigned SrcSize = SrcTy.getSizeInBits(); 2693 2694 if (SrcTy.getSizeInBits() > 64) { 2695 // This should be an extract of an s128, which is like a vector extract. 2696 if (SrcTy.getSizeInBits() != 128) 2697 return false; 2698 // Only support extracting 64 bits from an s128 at the moment. 2699 if (DstTy.getSizeInBits() != 64) 2700 return false; 2701 2702 unsigned Offset = I.getOperand(2).getImm(); 2703 if (Offset % 64 != 0) 2704 return false; 2705 2706 // Check we have the right regbank always. 2707 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 2708 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 2709 assert(SrcRB.getID() == DstRB.getID() && "Wrong extract regbank!"); 2710 2711 if (SrcRB.getID() == AArch64::GPRRegBankID) { 2712 auto NewI = 2713 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 2714 .addUse(SrcReg, 0, 2715 Offset == 0 ? AArch64::sube64 : AArch64::subo64); 2716 constrainOperandRegClass(MF, TRI, MRI, TII, RBI, *NewI, 2717 AArch64::GPR64RegClass, NewI->getOperand(0)); 2718 I.eraseFromParent(); 2719 return true; 2720 } 2721 2722 // Emit the same code as a vector extract. 2723 // Offset must be a multiple of 64. 2724 unsigned LaneIdx = Offset / 64; 2725 MachineInstr *Extract = emitExtractVectorElt( 2726 DstReg, DstRB, LLT::scalar(64), SrcReg, LaneIdx, MIB); 2727 if (!Extract) 2728 return false; 2729 I.eraseFromParent(); 2730 return true; 2731 } 2732 2733 I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri)); 2734 MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() + 2735 Ty.getSizeInBits() - 1); 2736 2737 if (SrcSize < 64) { 2738 assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 && 2739 "unexpected G_EXTRACT types"); 2740 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2741 } 2742 2743 DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2744 MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator())); 2745 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 2746 .addReg(DstReg, 0, AArch64::sub_32); 2747 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 2748 AArch64::GPR32RegClass, MRI); 2749 I.getOperand(0).setReg(DstReg); 2750 2751 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2752 } 2753 2754 case TargetOpcode::G_INSERT: { 2755 LLT SrcTy = MRI.getType(I.getOperand(2).getReg()); 2756 LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 2757 unsigned DstSize = DstTy.getSizeInBits(); 2758 // Larger inserts are vectors, same-size ones should be something else by 2759 // now (split up or turned into COPYs). 2760 if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32) 2761 return false; 2762 2763 I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri)); 2764 unsigned LSB = I.getOperand(3).getImm(); 2765 unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits(); 2766 I.getOperand(3).setImm((DstSize - LSB) % DstSize); 2767 MachineInstrBuilder(MF, I).addImm(Width - 1); 2768 2769 if (DstSize < 64) { 2770 assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 && 2771 "unexpected G_INSERT types"); 2772 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2773 } 2774 2775 Register SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64)); 2776 BuildMI(MBB, I.getIterator(), I.getDebugLoc(), 2777 TII.get(AArch64::SUBREG_TO_REG)) 2778 .addDef(SrcReg) 2779 .addImm(0) 2780 .addUse(I.getOperand(2).getReg()) 2781 .addImm(AArch64::sub_32); 2782 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 2783 AArch64::GPR32RegClass, MRI); 2784 I.getOperand(2).setReg(SrcReg); 2785 2786 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2787 } 2788 case TargetOpcode::G_FRAME_INDEX: { 2789 // allocas and G_FRAME_INDEX are only supported in addrspace(0). 2790 if (Ty != LLT::pointer(0, 64)) { 2791 LLVM_DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty 2792 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2793 return false; 2794 } 2795 I.setDesc(TII.get(AArch64::ADDXri)); 2796 2797 // MOs for a #0 shifted immediate. 2798 I.addOperand(MachineOperand::CreateImm(0)); 2799 I.addOperand(MachineOperand::CreateImm(0)); 2800 2801 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2802 } 2803 2804 case TargetOpcode::G_GLOBAL_VALUE: { 2805 auto GV = I.getOperand(1).getGlobal(); 2806 if (GV->isThreadLocal()) 2807 return selectTLSGlobalValue(I, MRI); 2808 2809 unsigned OpFlags = STI.ClassifyGlobalReference(GV, TM); 2810 if (OpFlags & AArch64II::MO_GOT) { 2811 I.setDesc(TII.get(AArch64::LOADgot)); 2812 I.getOperand(1).setTargetFlags(OpFlags); 2813 } else if (TM.getCodeModel() == CodeModel::Large) { 2814 // Materialize the global using movz/movk instructions. 2815 materializeLargeCMVal(I, GV, OpFlags); 2816 I.eraseFromParent(); 2817 return true; 2818 } else if (TM.getCodeModel() == CodeModel::Tiny) { 2819 I.setDesc(TII.get(AArch64::ADR)); 2820 I.getOperand(1).setTargetFlags(OpFlags); 2821 } else { 2822 I.setDesc(TII.get(AArch64::MOVaddr)); 2823 I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE); 2824 MachineInstrBuilder MIB(MF, I); 2825 MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(), 2826 OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 2827 } 2828 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2829 } 2830 2831 case TargetOpcode::G_ZEXTLOAD: 2832 case TargetOpcode::G_LOAD: 2833 case TargetOpcode::G_STORE: { 2834 GLoadStore &LdSt = cast<GLoadStore>(I); 2835 bool IsZExtLoad = I.getOpcode() == TargetOpcode::G_ZEXTLOAD; 2836 LLT PtrTy = MRI.getType(LdSt.getPointerReg()); 2837 2838 if (PtrTy != LLT::pointer(0, 64)) { 2839 LLVM_DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy 2840 << ", expected: " << LLT::pointer(0, 64) << '\n'); 2841 return false; 2842 } 2843 2844 uint64_t MemSizeInBytes = LdSt.getMemSize(); 2845 unsigned MemSizeInBits = LdSt.getMemSizeInBits(); 2846 AtomicOrdering Order = LdSt.getMMO().getSuccessOrdering(); 2847 2848 // Need special instructions for atomics that affect ordering. 2849 if (Order != AtomicOrdering::NotAtomic && 2850 Order != AtomicOrdering::Unordered && 2851 Order != AtomicOrdering::Monotonic) { 2852 assert(!isa<GZExtLoad>(LdSt)); 2853 if (MemSizeInBytes > 64) 2854 return false; 2855 2856 if (isa<GLoad>(LdSt)) { 2857 static constexpr unsigned LDAPROpcodes[] = { 2858 AArch64::LDAPRB, AArch64::LDAPRH, AArch64::LDAPRW, AArch64::LDAPRX}; 2859 static constexpr unsigned LDAROpcodes[] = { 2860 AArch64::LDARB, AArch64::LDARH, AArch64::LDARW, AArch64::LDARX}; 2861 ArrayRef<unsigned> Opcodes = 2862 STI.hasRCPC() && Order != AtomicOrdering::SequentiallyConsistent 2863 ? LDAPROpcodes 2864 : LDAROpcodes; 2865 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); 2866 } else { 2867 static constexpr unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH, 2868 AArch64::STLRW, AArch64::STLRX}; 2869 Register ValReg = LdSt.getReg(0); 2870 if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) { 2871 // Emit a subreg copy of 32 bits. 2872 Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2873 MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {}) 2874 .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32); 2875 I.getOperand(0).setReg(NewVal); 2876 } 2877 I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)])); 2878 } 2879 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 2880 return true; 2881 } 2882 2883 #ifndef NDEBUG 2884 const Register PtrReg = LdSt.getPointerReg(); 2885 const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI); 2886 // Check that the pointer register is valid. 2887 assert(PtrRB.getID() == AArch64::GPRRegBankID && 2888 "Load/Store pointer operand isn't a GPR"); 2889 assert(MRI.getType(PtrReg).isPointer() && 2890 "Load/Store pointer operand isn't a pointer"); 2891 #endif 2892 2893 const Register ValReg = LdSt.getReg(0); 2894 const LLT ValTy = MRI.getType(ValReg); 2895 const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI); 2896 2897 // The code below doesn't support truncating stores, so we need to split it 2898 // again. 2899 if (isa<GStore>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { 2900 unsigned SubReg; 2901 LLT MemTy = LdSt.getMMO().getMemoryType(); 2902 auto *RC = getRegClassForTypeOnBank(MemTy, RB); 2903 if (!getSubRegForClass(RC, TRI, SubReg)) 2904 return false; 2905 2906 // Generate a subreg copy. 2907 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {MemTy}, {}) 2908 .addReg(ValReg, 0, SubReg) 2909 .getReg(0); 2910 RBI.constrainGenericRegister(Copy, *RC, MRI); 2911 LdSt.getOperand(0).setReg(Copy); 2912 } else if (isa<GLoad>(LdSt) && ValTy.getSizeInBits() > MemSizeInBits) { 2913 // If this is an any-extending load from the FPR bank, split it into a regular 2914 // load + extend. 2915 if (RB.getID() == AArch64::FPRRegBankID) { 2916 unsigned SubReg; 2917 LLT MemTy = LdSt.getMMO().getMemoryType(); 2918 auto *RC = getRegClassForTypeOnBank(MemTy, RB); 2919 if (!getSubRegForClass(RC, TRI, SubReg)) 2920 return false; 2921 Register OldDst = LdSt.getReg(0); 2922 Register NewDst = 2923 MRI.createGenericVirtualRegister(LdSt.getMMO().getMemoryType()); 2924 LdSt.getOperand(0).setReg(NewDst); 2925 MRI.setRegBank(NewDst, RB); 2926 // Generate a SUBREG_TO_REG to extend it. 2927 MIB.setInsertPt(MIB.getMBB(), std::next(LdSt.getIterator())); 2928 MIB.buildInstr(AArch64::SUBREG_TO_REG, {OldDst}, {}) 2929 .addImm(0) 2930 .addUse(NewDst) 2931 .addImm(SubReg); 2932 auto SubRegRC = getRegClassForTypeOnBank(MRI.getType(OldDst), RB); 2933 RBI.constrainGenericRegister(OldDst, *SubRegRC, MRI); 2934 MIB.setInstr(LdSt); 2935 } 2936 } 2937 2938 // Helper lambda for partially selecting I. Either returns the original 2939 // instruction with an updated opcode, or a new instruction. 2940 auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * { 2941 bool IsStore = isa<GStore>(I); 2942 const unsigned NewOpc = 2943 selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits); 2944 if (NewOpc == I.getOpcode()) 2945 return nullptr; 2946 // Check if we can fold anything into the addressing mode. 2947 auto AddrModeFns = 2948 selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes); 2949 if (!AddrModeFns) { 2950 // Can't fold anything. Use the original instruction. 2951 I.setDesc(TII.get(NewOpc)); 2952 I.addOperand(MachineOperand::CreateImm(0)); 2953 return &I; 2954 } 2955 2956 // Folded something. Create a new instruction and return it. 2957 auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags()); 2958 Register CurValReg = I.getOperand(0).getReg(); 2959 IsStore ? NewInst.addUse(CurValReg) : NewInst.addDef(CurValReg); 2960 NewInst.cloneMemRefs(I); 2961 for (auto &Fn : *AddrModeFns) 2962 Fn(NewInst); 2963 I.eraseFromParent(); 2964 return &*NewInst; 2965 }; 2966 2967 MachineInstr *LoadStore = SelectLoadStoreAddressingMode(); 2968 if (!LoadStore) 2969 return false; 2970 2971 // If we're storing a 0, use WZR/XZR. 2972 if (Opcode == TargetOpcode::G_STORE) { 2973 auto CVal = getIConstantVRegValWithLookThrough( 2974 LoadStore->getOperand(0).getReg(), MRI); 2975 if (CVal && CVal->Value == 0) { 2976 switch (LoadStore->getOpcode()) { 2977 case AArch64::STRWui: 2978 case AArch64::STRHHui: 2979 case AArch64::STRBBui: 2980 LoadStore->getOperand(0).setReg(AArch64::WZR); 2981 break; 2982 case AArch64::STRXui: 2983 LoadStore->getOperand(0).setReg(AArch64::XZR); 2984 break; 2985 } 2986 } 2987 } 2988 2989 if (IsZExtLoad) { 2990 // The zextload from a smaller type to i32 should be handled by the 2991 // importer. 2992 if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64) 2993 return false; 2994 // If we have a ZEXTLOAD then change the load's type to be a narrower reg 2995 // and zero_extend with SUBREG_TO_REG. 2996 Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 2997 Register DstReg = LoadStore->getOperand(0).getReg(); 2998 LoadStore->getOperand(0).setReg(LdReg); 2999 3000 MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator())); 3001 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {}) 3002 .addImm(0) 3003 .addUse(LdReg) 3004 .addImm(AArch64::sub_32); 3005 constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 3006 return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass, 3007 MRI); 3008 } 3009 return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI); 3010 } 3011 3012 case TargetOpcode::G_SMULH: 3013 case TargetOpcode::G_UMULH: { 3014 // Reject the various things we don't support yet. 3015 if (unsupportedBinOp(I, RBI, MRI, TRI)) 3016 return false; 3017 3018 const Register DefReg = I.getOperand(0).getReg(); 3019 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 3020 3021 if (RB.getID() != AArch64::GPRRegBankID) { 3022 LLVM_DEBUG(dbgs() << "G_[SU]MULH on bank: " << RB << ", expected: GPR\n"); 3023 return false; 3024 } 3025 3026 if (Ty != LLT::scalar(64)) { 3027 LLVM_DEBUG(dbgs() << "G_[SU]MULH has type: " << Ty 3028 << ", expected: " << LLT::scalar(64) << '\n'); 3029 return false; 3030 } 3031 3032 unsigned NewOpc = I.getOpcode() == TargetOpcode::G_SMULH ? AArch64::SMULHrr 3033 : AArch64::UMULHrr; 3034 I.setDesc(TII.get(NewOpc)); 3035 3036 // Now that we selected an opcode, we need to constrain the register 3037 // operands to use appropriate classes. 3038 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3039 } 3040 case TargetOpcode::G_LSHR: 3041 case TargetOpcode::G_ASHR: 3042 if (MRI.getType(I.getOperand(0).getReg()).isVector()) 3043 return selectVectorAshrLshr(I, MRI); 3044 [[fallthrough]]; 3045 case TargetOpcode::G_SHL: 3046 if (Opcode == TargetOpcode::G_SHL && 3047 MRI.getType(I.getOperand(0).getReg()).isVector()) 3048 return selectVectorSHL(I, MRI); 3049 3050 // These shifts were legalized to have 64 bit shift amounts because we 3051 // want to take advantage of the selection patterns that assume the 3052 // immediates are s64s, however, selectBinaryOp will assume both operands 3053 // will have the same bit size. 3054 { 3055 Register SrcReg = I.getOperand(1).getReg(); 3056 Register ShiftReg = I.getOperand(2).getReg(); 3057 const LLT ShiftTy = MRI.getType(ShiftReg); 3058 const LLT SrcTy = MRI.getType(SrcReg); 3059 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 32 && 3060 ShiftTy.getSizeInBits() == 64) { 3061 assert(!ShiftTy.isVector() && "unexpected vector shift ty"); 3062 // Insert a subregister copy to implement a 64->32 trunc 3063 auto Trunc = MIB.buildInstr(TargetOpcode::COPY, {SrcTy}, {}) 3064 .addReg(ShiftReg, 0, AArch64::sub_32); 3065 MRI.setRegBank(Trunc.getReg(0), RBI.getRegBank(AArch64::GPRRegBankID)); 3066 I.getOperand(2).setReg(Trunc.getReg(0)); 3067 } 3068 } 3069 [[fallthrough]]; 3070 case TargetOpcode::G_OR: { 3071 // Reject the various things we don't support yet. 3072 if (unsupportedBinOp(I, RBI, MRI, TRI)) 3073 return false; 3074 3075 const unsigned OpSize = Ty.getSizeInBits(); 3076 3077 const Register DefReg = I.getOperand(0).getReg(); 3078 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 3079 3080 const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize); 3081 if (NewOpc == I.getOpcode()) 3082 return false; 3083 3084 I.setDesc(TII.get(NewOpc)); 3085 // FIXME: Should the type be always reset in setDesc? 3086 3087 // Now that we selected an opcode, we need to constrain the register 3088 // operands to use appropriate classes. 3089 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3090 } 3091 3092 case TargetOpcode::G_PTR_ADD: { 3093 emitADD(I.getOperand(0).getReg(), I.getOperand(1), I.getOperand(2), MIB); 3094 I.eraseFromParent(); 3095 return true; 3096 } 3097 3098 case TargetOpcode::G_SADDE: 3099 case TargetOpcode::G_UADDE: 3100 case TargetOpcode::G_SSUBE: 3101 case TargetOpcode::G_USUBE: 3102 case TargetOpcode::G_SADDO: 3103 case TargetOpcode::G_UADDO: 3104 case TargetOpcode::G_SSUBO: 3105 case TargetOpcode::G_USUBO: 3106 return selectOverflowOp(I, MRI); 3107 3108 case TargetOpcode::G_PTRMASK: { 3109 Register MaskReg = I.getOperand(2).getReg(); 3110 std::optional<int64_t> MaskVal = getIConstantVRegSExtVal(MaskReg, MRI); 3111 // TODO: Implement arbitrary cases 3112 if (!MaskVal || !isShiftedMask_64(*MaskVal)) 3113 return false; 3114 3115 uint64_t Mask = *MaskVal; 3116 I.setDesc(TII.get(AArch64::ANDXri)); 3117 I.getOperand(2).ChangeToImmediate( 3118 AArch64_AM::encodeLogicalImmediate(Mask, 64)); 3119 3120 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3121 } 3122 case TargetOpcode::G_PTRTOINT: 3123 case TargetOpcode::G_TRUNC: { 3124 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3125 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 3126 3127 const Register DstReg = I.getOperand(0).getReg(); 3128 const Register SrcReg = I.getOperand(1).getReg(); 3129 3130 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 3131 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 3132 3133 if (DstRB.getID() != SrcRB.getID()) { 3134 LLVM_DEBUG( 3135 dbgs() << "G_TRUNC/G_PTRTOINT input/output on different banks\n"); 3136 return false; 3137 } 3138 3139 if (DstRB.getID() == AArch64::GPRRegBankID) { 3140 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); 3141 if (!DstRC) 3142 return false; 3143 3144 const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(SrcTy, SrcRB); 3145 if (!SrcRC) 3146 return false; 3147 3148 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 3149 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 3150 LLVM_DEBUG(dbgs() << "Failed to constrain G_TRUNC/G_PTRTOINT\n"); 3151 return false; 3152 } 3153 3154 if (DstRC == SrcRC) { 3155 // Nothing to be done 3156 } else if (Opcode == TargetOpcode::G_TRUNC && DstTy == LLT::scalar(32) && 3157 SrcTy == LLT::scalar(64)) { 3158 llvm_unreachable("TableGen can import this case"); 3159 return false; 3160 } else if (DstRC == &AArch64::GPR32RegClass && 3161 SrcRC == &AArch64::GPR64RegClass) { 3162 I.getOperand(1).setSubReg(AArch64::sub_32); 3163 } else { 3164 LLVM_DEBUG( 3165 dbgs() << "Unhandled mismatched classes in G_TRUNC/G_PTRTOINT\n"); 3166 return false; 3167 } 3168 3169 I.setDesc(TII.get(TargetOpcode::COPY)); 3170 return true; 3171 } else if (DstRB.getID() == AArch64::FPRRegBankID) { 3172 if (DstTy == LLT::fixed_vector(4, 16) && 3173 SrcTy == LLT::fixed_vector(4, 32)) { 3174 I.setDesc(TII.get(AArch64::XTNv4i16)); 3175 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3176 return true; 3177 } 3178 3179 if (!SrcTy.isVector() && SrcTy.getSizeInBits() == 128) { 3180 MachineInstr *Extract = emitExtractVectorElt( 3181 DstReg, DstRB, LLT::scalar(DstTy.getSizeInBits()), SrcReg, 0, MIB); 3182 if (!Extract) 3183 return false; 3184 I.eraseFromParent(); 3185 return true; 3186 } 3187 3188 // We might have a vector G_PTRTOINT, in which case just emit a COPY. 3189 if (Opcode == TargetOpcode::G_PTRTOINT) { 3190 assert(DstTy.isVector() && "Expected an FPR ptrtoint to be a vector"); 3191 I.setDesc(TII.get(TargetOpcode::COPY)); 3192 return selectCopy(I, TII, MRI, TRI, RBI); 3193 } 3194 } 3195 3196 return false; 3197 } 3198 3199 case TargetOpcode::G_ANYEXT: { 3200 if (selectUSMovFromExtend(I, MRI)) 3201 return true; 3202 3203 const Register DstReg = I.getOperand(0).getReg(); 3204 const Register SrcReg = I.getOperand(1).getReg(); 3205 3206 const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI); 3207 if (RBDst.getID() != AArch64::GPRRegBankID) { 3208 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst 3209 << ", expected: GPR\n"); 3210 return false; 3211 } 3212 3213 const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI); 3214 if (RBSrc.getID() != AArch64::GPRRegBankID) { 3215 LLVM_DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc 3216 << ", expected: GPR\n"); 3217 return false; 3218 } 3219 3220 const unsigned DstSize = MRI.getType(DstReg).getSizeInBits(); 3221 3222 if (DstSize == 0) { 3223 LLVM_DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n"); 3224 return false; 3225 } 3226 3227 if (DstSize != 64 && DstSize > 32) { 3228 LLVM_DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize 3229 << ", expected: 32 or 64\n"); 3230 return false; 3231 } 3232 // At this point G_ANYEXT is just like a plain COPY, but we need 3233 // to explicitly form the 64-bit value if any. 3234 if (DstSize > 32) { 3235 Register ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass); 3236 BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG)) 3237 .addDef(ExtSrc) 3238 .addImm(0) 3239 .addUse(SrcReg) 3240 .addImm(AArch64::sub_32); 3241 I.getOperand(1).setReg(ExtSrc); 3242 } 3243 return selectCopy(I, TII, MRI, TRI, RBI); 3244 } 3245 3246 case TargetOpcode::G_ZEXT: 3247 case TargetOpcode::G_SEXT_INREG: 3248 case TargetOpcode::G_SEXT: { 3249 if (selectUSMovFromExtend(I, MRI)) 3250 return true; 3251 3252 unsigned Opcode = I.getOpcode(); 3253 const bool IsSigned = Opcode != TargetOpcode::G_ZEXT; 3254 const Register DefReg = I.getOperand(0).getReg(); 3255 Register SrcReg = I.getOperand(1).getReg(); 3256 const LLT DstTy = MRI.getType(DefReg); 3257 const LLT SrcTy = MRI.getType(SrcReg); 3258 unsigned DstSize = DstTy.getSizeInBits(); 3259 unsigned SrcSize = SrcTy.getSizeInBits(); 3260 3261 // SEXT_INREG has the same src reg size as dst, the size of the value to be 3262 // extended is encoded in the imm. 3263 if (Opcode == TargetOpcode::G_SEXT_INREG) 3264 SrcSize = I.getOperand(2).getImm(); 3265 3266 if (DstTy.isVector()) 3267 return false; // Should be handled by imported patterns. 3268 3269 assert((*RBI.getRegBank(DefReg, MRI, TRI)).getID() == 3270 AArch64::GPRRegBankID && 3271 "Unexpected ext regbank"); 3272 3273 MachineInstr *ExtI; 3274 3275 // First check if we're extending the result of a load which has a dest type 3276 // smaller than 32 bits, then this zext is redundant. GPR32 is the smallest 3277 // GPR register on AArch64 and all loads which are smaller automatically 3278 // zero-extend the upper bits. E.g. 3279 // %v(s8) = G_LOAD %p, :: (load 1) 3280 // %v2(s32) = G_ZEXT %v(s8) 3281 if (!IsSigned) { 3282 auto *LoadMI = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI); 3283 bool IsGPR = 3284 RBI.getRegBank(SrcReg, MRI, TRI)->getID() == AArch64::GPRRegBankID; 3285 if (LoadMI && IsGPR) { 3286 const MachineMemOperand *MemOp = *LoadMI->memoperands_begin(); 3287 unsigned BytesLoaded = MemOp->getSize(); 3288 if (BytesLoaded < 4 && SrcTy.getSizeInBytes() == BytesLoaded) 3289 return selectCopy(I, TII, MRI, TRI, RBI); 3290 } 3291 3292 // For the 32-bit -> 64-bit case, we can emit a mov (ORRWrs) 3293 // + SUBREG_TO_REG. 3294 if (IsGPR && SrcSize == 32 && DstSize == 64) { 3295 Register SubregToRegSrc = 3296 MRI.createVirtualRegister(&AArch64::GPR32RegClass); 3297 const Register ZReg = AArch64::WZR; 3298 MIB.buildInstr(AArch64::ORRWrs, {SubregToRegSrc}, {ZReg, SrcReg}) 3299 .addImm(0); 3300 3301 MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) 3302 .addImm(0) 3303 .addUse(SubregToRegSrc) 3304 .addImm(AArch64::sub_32); 3305 3306 if (!RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, 3307 MRI)) { 3308 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT destination\n"); 3309 return false; 3310 } 3311 3312 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 3313 MRI)) { 3314 LLVM_DEBUG(dbgs() << "Failed to constrain G_ZEXT source\n"); 3315 return false; 3316 } 3317 3318 I.eraseFromParent(); 3319 return true; 3320 } 3321 } 3322 3323 if (DstSize == 64) { 3324 if (Opcode != TargetOpcode::G_SEXT_INREG) { 3325 // FIXME: Can we avoid manually doing this? 3326 if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, 3327 MRI)) { 3328 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode) 3329 << " operand\n"); 3330 return false; 3331 } 3332 SrcReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, 3333 {&AArch64::GPR64RegClass}, {}) 3334 .addImm(0) 3335 .addUse(SrcReg) 3336 .addImm(AArch64::sub_32) 3337 .getReg(0); 3338 } 3339 3340 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMXri : AArch64::UBFMXri, 3341 {DefReg}, {SrcReg}) 3342 .addImm(0) 3343 .addImm(SrcSize - 1); 3344 } else if (DstSize <= 32) { 3345 ExtI = MIB.buildInstr(IsSigned ? AArch64::SBFMWri : AArch64::UBFMWri, 3346 {DefReg}, {SrcReg}) 3347 .addImm(0) 3348 .addImm(SrcSize - 1); 3349 } else { 3350 return false; 3351 } 3352 3353 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 3354 I.eraseFromParent(); 3355 return true; 3356 } 3357 3358 case TargetOpcode::G_SITOFP: 3359 case TargetOpcode::G_UITOFP: 3360 case TargetOpcode::G_FPTOSI: 3361 case TargetOpcode::G_FPTOUI: { 3362 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()), 3363 SrcTy = MRI.getType(I.getOperand(1).getReg()); 3364 const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy); 3365 if (NewOpc == Opcode) 3366 return false; 3367 3368 I.setDesc(TII.get(NewOpc)); 3369 constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3370 I.setFlags(MachineInstr::NoFPExcept); 3371 3372 return true; 3373 } 3374 3375 case TargetOpcode::G_FREEZE: 3376 return selectCopy(I, TII, MRI, TRI, RBI); 3377 3378 case TargetOpcode::G_INTTOPTR: 3379 // The importer is currently unable to import pointer types since they 3380 // didn't exist in SelectionDAG. 3381 return selectCopy(I, TII, MRI, TRI, RBI); 3382 3383 case TargetOpcode::G_BITCAST: 3384 // Imported SelectionDAG rules can handle every bitcast except those that 3385 // bitcast from a type to the same type. Ideally, these shouldn't occur 3386 // but we might not run an optimizer that deletes them. The other exception 3387 // is bitcasts involving pointer types, as SelectionDAG has no knowledge 3388 // of them. 3389 return selectCopy(I, TII, MRI, TRI, RBI); 3390 3391 case TargetOpcode::G_SELECT: { 3392 auto &Sel = cast<GSelect>(I); 3393 const Register CondReg = Sel.getCondReg(); 3394 const Register TReg = Sel.getTrueReg(); 3395 const Register FReg = Sel.getFalseReg(); 3396 3397 if (tryOptSelect(Sel)) 3398 return true; 3399 3400 // Make sure to use an unused vreg instead of wzr, so that the peephole 3401 // optimizations will be able to optimize these. 3402 Register DeadVReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 3403 auto TstMI = MIB.buildInstr(AArch64::ANDSWri, {DeadVReg}, {CondReg}) 3404 .addImm(AArch64_AM::encodeLogicalImmediate(1, 32)); 3405 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 3406 if (!emitSelect(Sel.getReg(0), TReg, FReg, AArch64CC::NE, MIB)) 3407 return false; 3408 Sel.eraseFromParent(); 3409 return true; 3410 } 3411 case TargetOpcode::G_ICMP: { 3412 if (Ty.isVector()) 3413 return selectVectorICmp(I, MRI); 3414 3415 if (Ty != LLT::scalar(32)) { 3416 LLVM_DEBUG(dbgs() << "G_ICMP result has type: " << Ty 3417 << ", expected: " << LLT::scalar(32) << '\n'); 3418 return false; 3419 } 3420 3421 auto Pred = static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 3422 const AArch64CC::CondCode InvCC = 3423 changeICMPPredToAArch64CC(CmpInst::getInversePredicate(Pred)); 3424 emitIntegerCompare(I.getOperand(2), I.getOperand(3), I.getOperand(1), MIB); 3425 emitCSINC(/*Dst=*/I.getOperand(0).getReg(), /*Src1=*/AArch64::WZR, 3426 /*Src2=*/AArch64::WZR, InvCC, MIB); 3427 I.eraseFromParent(); 3428 return true; 3429 } 3430 3431 case TargetOpcode::G_FCMP: { 3432 CmpInst::Predicate Pred = 3433 static_cast<CmpInst::Predicate>(I.getOperand(1).getPredicate()); 3434 if (!emitFPCompare(I.getOperand(2).getReg(), I.getOperand(3).getReg(), MIB, 3435 Pred) || 3436 !emitCSetForFCmp(I.getOperand(0).getReg(), Pred, MIB)) 3437 return false; 3438 I.eraseFromParent(); 3439 return true; 3440 } 3441 case TargetOpcode::G_VASTART: 3442 return STI.isTargetDarwin() ? selectVaStartDarwin(I, MF, MRI) 3443 : selectVaStartAAPCS(I, MF, MRI); 3444 case TargetOpcode::G_INTRINSIC: 3445 return selectIntrinsic(I, MRI); 3446 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 3447 return selectIntrinsicWithSideEffects(I, MRI); 3448 case TargetOpcode::G_IMPLICIT_DEF: { 3449 I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF)); 3450 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 3451 const Register DstReg = I.getOperand(0).getReg(); 3452 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 3453 const TargetRegisterClass *DstRC = getRegClassForTypeOnBank(DstTy, DstRB); 3454 RBI.constrainGenericRegister(DstReg, *DstRC, MRI); 3455 return true; 3456 } 3457 case TargetOpcode::G_BLOCK_ADDR: { 3458 if (TM.getCodeModel() == CodeModel::Large) { 3459 materializeLargeCMVal(I, I.getOperand(1).getBlockAddress(), 0); 3460 I.eraseFromParent(); 3461 return true; 3462 } else { 3463 I.setDesc(TII.get(AArch64::MOVaddrBA)); 3464 auto MovMI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::MOVaddrBA), 3465 I.getOperand(0).getReg()) 3466 .addBlockAddress(I.getOperand(1).getBlockAddress(), 3467 /* Offset */ 0, AArch64II::MO_PAGE) 3468 .addBlockAddress( 3469 I.getOperand(1).getBlockAddress(), /* Offset */ 0, 3470 AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 3471 I.eraseFromParent(); 3472 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 3473 } 3474 } 3475 case AArch64::G_DUP: { 3476 // When the scalar of G_DUP is an s8/s16 gpr, they can't be selected by 3477 // imported patterns. Do it manually here. Avoiding generating s16 gpr is 3478 // difficult because at RBS we may end up pessimizing the fpr case if we 3479 // decided to add an anyextend to fix this. Manual selection is the most 3480 // robust solution for now. 3481 if (RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 3482 AArch64::GPRRegBankID) 3483 return false; // We expect the fpr regbank case to be imported. 3484 LLT VecTy = MRI.getType(I.getOperand(0).getReg()); 3485 if (VecTy == LLT::fixed_vector(8, 8)) 3486 I.setDesc(TII.get(AArch64::DUPv8i8gpr)); 3487 else if (VecTy == LLT::fixed_vector(16, 8)) 3488 I.setDesc(TII.get(AArch64::DUPv16i8gpr)); 3489 else if (VecTy == LLT::fixed_vector(4, 16)) 3490 I.setDesc(TII.get(AArch64::DUPv4i16gpr)); 3491 else if (VecTy == LLT::fixed_vector(8, 16)) 3492 I.setDesc(TII.get(AArch64::DUPv8i16gpr)); 3493 else 3494 return false; 3495 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3496 } 3497 case TargetOpcode::G_INTRINSIC_TRUNC: 3498 return selectIntrinsicTrunc(I, MRI); 3499 case TargetOpcode::G_INTRINSIC_ROUND: 3500 return selectIntrinsicRound(I, MRI); 3501 case TargetOpcode::G_BUILD_VECTOR: 3502 return selectBuildVector(I, MRI); 3503 case TargetOpcode::G_MERGE_VALUES: 3504 return selectMergeValues(I, MRI); 3505 case TargetOpcode::G_UNMERGE_VALUES: 3506 return selectUnmergeValues(I, MRI); 3507 case TargetOpcode::G_SHUFFLE_VECTOR: 3508 return selectShuffleVector(I, MRI); 3509 case TargetOpcode::G_EXTRACT_VECTOR_ELT: 3510 return selectExtractElt(I, MRI); 3511 case TargetOpcode::G_INSERT_VECTOR_ELT: 3512 return selectInsertElt(I, MRI); 3513 case TargetOpcode::G_CONCAT_VECTORS: 3514 return selectConcatVectors(I, MRI); 3515 case TargetOpcode::G_JUMP_TABLE: 3516 return selectJumpTable(I, MRI); 3517 case TargetOpcode::G_VECREDUCE_ADD: 3518 return selectReduction(I, MRI); 3519 case TargetOpcode::G_MEMCPY: 3520 case TargetOpcode::G_MEMCPY_INLINE: 3521 case TargetOpcode::G_MEMMOVE: 3522 case TargetOpcode::G_MEMSET: 3523 assert(STI.hasMOPS() && "Shouldn't get here without +mops feature"); 3524 return selectMOPS(I, MRI); 3525 } 3526 3527 return false; 3528 } 3529 3530 bool AArch64InstructionSelector::selectReduction(MachineInstr &I, 3531 MachineRegisterInfo &MRI) { 3532 Register VecReg = I.getOperand(1).getReg(); 3533 LLT VecTy = MRI.getType(VecReg); 3534 if (I.getOpcode() == TargetOpcode::G_VECREDUCE_ADD) { 3535 // For <2 x i32> ADDPv2i32 generates an FPR64 value, so we need to emit 3536 // a subregister copy afterwards. 3537 if (VecTy == LLT::fixed_vector(2, 32)) { 3538 Register DstReg = I.getOperand(0).getReg(); 3539 auto AddP = MIB.buildInstr(AArch64::ADDPv2i32, {&AArch64::FPR64RegClass}, 3540 {VecReg, VecReg}); 3541 auto Copy = MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 3542 .addReg(AddP.getReg(0), 0, AArch64::ssub) 3543 .getReg(0); 3544 RBI.constrainGenericRegister(Copy, AArch64::FPR32RegClass, MRI); 3545 I.eraseFromParent(); 3546 return constrainSelectedInstRegOperands(*AddP, TII, TRI, RBI); 3547 } 3548 3549 unsigned Opc = 0; 3550 if (VecTy == LLT::fixed_vector(16, 8)) 3551 Opc = AArch64::ADDVv16i8v; 3552 else if (VecTy == LLT::fixed_vector(8, 16)) 3553 Opc = AArch64::ADDVv8i16v; 3554 else if (VecTy == LLT::fixed_vector(4, 32)) 3555 Opc = AArch64::ADDVv4i32v; 3556 else if (VecTy == LLT::fixed_vector(2, 64)) 3557 Opc = AArch64::ADDPv2i64p; 3558 else { 3559 LLVM_DEBUG(dbgs() << "Unhandled type for add reduction"); 3560 return false; 3561 } 3562 I.setDesc(TII.get(Opc)); 3563 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3564 } 3565 3566 return false; 3567 } 3568 3569 bool AArch64InstructionSelector::selectMOPS(MachineInstr &GI, 3570 MachineRegisterInfo &MRI) { 3571 unsigned Mopcode; 3572 switch (GI.getOpcode()) { 3573 case TargetOpcode::G_MEMCPY: 3574 case TargetOpcode::G_MEMCPY_INLINE: 3575 Mopcode = AArch64::MOPSMemoryCopyPseudo; 3576 break; 3577 case TargetOpcode::G_MEMMOVE: 3578 Mopcode = AArch64::MOPSMemoryMovePseudo; 3579 break; 3580 case TargetOpcode::G_MEMSET: 3581 // For tagged memset see llvm.aarch64.mops.memset.tag 3582 Mopcode = AArch64::MOPSMemorySetPseudo; 3583 break; 3584 } 3585 3586 auto &DstPtr = GI.getOperand(0); 3587 auto &SrcOrVal = GI.getOperand(1); 3588 auto &Size = GI.getOperand(2); 3589 3590 // Create copies of the registers that can be clobbered. 3591 const Register DstPtrCopy = MRI.cloneVirtualRegister(DstPtr.getReg()); 3592 const Register SrcValCopy = MRI.cloneVirtualRegister(SrcOrVal.getReg()); 3593 const Register SizeCopy = MRI.cloneVirtualRegister(Size.getReg()); 3594 3595 const bool IsSet = Mopcode == AArch64::MOPSMemorySetPseudo; 3596 const auto &SrcValRegClass = 3597 IsSet ? AArch64::GPR64RegClass : AArch64::GPR64commonRegClass; 3598 3599 // Constrain to specific registers 3600 RBI.constrainGenericRegister(DstPtrCopy, AArch64::GPR64commonRegClass, MRI); 3601 RBI.constrainGenericRegister(SrcValCopy, SrcValRegClass, MRI); 3602 RBI.constrainGenericRegister(SizeCopy, AArch64::GPR64RegClass, MRI); 3603 3604 MIB.buildCopy(DstPtrCopy, DstPtr); 3605 MIB.buildCopy(SrcValCopy, SrcOrVal); 3606 MIB.buildCopy(SizeCopy, Size); 3607 3608 // New instruction uses the copied registers because it must update them. 3609 // The defs are not used since they don't exist in G_MEM*. They are still 3610 // tied. 3611 // Note: order of operands is different from G_MEMSET, G_MEMCPY, G_MEMMOVE 3612 Register DefDstPtr = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass); 3613 Register DefSize = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3614 if (IsSet) { 3615 MIB.buildInstr(Mopcode, {DefDstPtr, DefSize}, 3616 {DstPtrCopy, SizeCopy, SrcValCopy}); 3617 } else { 3618 Register DefSrcPtr = MRI.createVirtualRegister(&SrcValRegClass); 3619 MIB.buildInstr(Mopcode, {DefDstPtr, DefSrcPtr, DefSize}, 3620 {DstPtrCopy, SrcValCopy, SizeCopy}); 3621 } 3622 3623 GI.eraseFromParent(); 3624 return true; 3625 } 3626 3627 bool AArch64InstructionSelector::selectBrJT(MachineInstr &I, 3628 MachineRegisterInfo &MRI) { 3629 assert(I.getOpcode() == TargetOpcode::G_BRJT && "Expected G_BRJT"); 3630 Register JTAddr = I.getOperand(0).getReg(); 3631 unsigned JTI = I.getOperand(1).getIndex(); 3632 Register Index = I.getOperand(2).getReg(); 3633 3634 Register TargetReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 3635 Register ScratchReg = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 3636 3637 MF->getInfo<AArch64FunctionInfo>()->setJumpTableEntryInfo(JTI, 4, nullptr); 3638 auto JumpTableInst = MIB.buildInstr(AArch64::JumpTableDest32, 3639 {TargetReg, ScratchReg}, {JTAddr, Index}) 3640 .addJumpTableIndex(JTI); 3641 // Build the indirect branch. 3642 MIB.buildInstr(AArch64::BR, {}, {TargetReg}); 3643 I.eraseFromParent(); 3644 return constrainSelectedInstRegOperands(*JumpTableInst, TII, TRI, RBI); 3645 } 3646 3647 bool AArch64InstructionSelector::selectJumpTable(MachineInstr &I, 3648 MachineRegisterInfo &MRI) { 3649 assert(I.getOpcode() == TargetOpcode::G_JUMP_TABLE && "Expected jump table"); 3650 assert(I.getOperand(1).isJTI() && "Jump table op should have a JTI!"); 3651 3652 Register DstReg = I.getOperand(0).getReg(); 3653 unsigned JTI = I.getOperand(1).getIndex(); 3654 // We generate a MOVaddrJT which will get expanded to an ADRP + ADD later. 3655 auto MovMI = 3656 MIB.buildInstr(AArch64::MOVaddrJT, {DstReg}, {}) 3657 .addJumpTableIndex(JTI, AArch64II::MO_PAGE) 3658 .addJumpTableIndex(JTI, AArch64II::MO_NC | AArch64II::MO_PAGEOFF); 3659 I.eraseFromParent(); 3660 return constrainSelectedInstRegOperands(*MovMI, TII, TRI, RBI); 3661 } 3662 3663 bool AArch64InstructionSelector::selectTLSGlobalValue( 3664 MachineInstr &I, MachineRegisterInfo &MRI) { 3665 if (!STI.isTargetMachO()) 3666 return false; 3667 MachineFunction &MF = *I.getParent()->getParent(); 3668 MF.getFrameInfo().setAdjustsStack(true); 3669 3670 const auto &GlobalOp = I.getOperand(1); 3671 assert(GlobalOp.getOffset() == 0 && 3672 "Shouldn't have an offset on TLS globals!"); 3673 const GlobalValue &GV = *GlobalOp.getGlobal(); 3674 3675 auto LoadGOT = 3676 MIB.buildInstr(AArch64::LOADgot, {&AArch64::GPR64commonRegClass}, {}) 3677 .addGlobalAddress(&GV, 0, AArch64II::MO_TLS); 3678 3679 auto Load = MIB.buildInstr(AArch64::LDRXui, {&AArch64::GPR64commonRegClass}, 3680 {LoadGOT.getReg(0)}) 3681 .addImm(0); 3682 3683 MIB.buildCopy(Register(AArch64::X0), LoadGOT.getReg(0)); 3684 // TLS calls preserve all registers except those that absolutely must be 3685 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be 3686 // silly). 3687 MIB.buildInstr(getBLRCallOpcode(MF), {}, {Load}) 3688 .addUse(AArch64::X0, RegState::Implicit) 3689 .addDef(AArch64::X0, RegState::Implicit) 3690 .addRegMask(TRI.getTLSCallPreservedMask()); 3691 3692 MIB.buildCopy(I.getOperand(0).getReg(), Register(AArch64::X0)); 3693 RBI.constrainGenericRegister(I.getOperand(0).getReg(), AArch64::GPR64RegClass, 3694 MRI); 3695 I.eraseFromParent(); 3696 return true; 3697 } 3698 3699 bool AArch64InstructionSelector::selectIntrinsicTrunc( 3700 MachineInstr &I, MachineRegisterInfo &MRI) const { 3701 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 3702 3703 // Select the correct opcode. 3704 unsigned Opc = 0; 3705 if (!SrcTy.isVector()) { 3706 switch (SrcTy.getSizeInBits()) { 3707 default: 3708 case 16: 3709 Opc = AArch64::FRINTZHr; 3710 break; 3711 case 32: 3712 Opc = AArch64::FRINTZSr; 3713 break; 3714 case 64: 3715 Opc = AArch64::FRINTZDr; 3716 break; 3717 } 3718 } else { 3719 unsigned NumElts = SrcTy.getNumElements(); 3720 switch (SrcTy.getElementType().getSizeInBits()) { 3721 default: 3722 break; 3723 case 16: 3724 if (NumElts == 4) 3725 Opc = AArch64::FRINTZv4f16; 3726 else if (NumElts == 8) 3727 Opc = AArch64::FRINTZv8f16; 3728 break; 3729 case 32: 3730 if (NumElts == 2) 3731 Opc = AArch64::FRINTZv2f32; 3732 else if (NumElts == 4) 3733 Opc = AArch64::FRINTZv4f32; 3734 break; 3735 case 64: 3736 if (NumElts == 2) 3737 Opc = AArch64::FRINTZv2f64; 3738 break; 3739 } 3740 } 3741 3742 if (!Opc) { 3743 // Didn't get an opcode above, bail. 3744 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_TRUNC!\n"); 3745 return false; 3746 } 3747 3748 // Legalization would have set us up perfectly for this; we just need to 3749 // set the opcode and move on. 3750 I.setDesc(TII.get(Opc)); 3751 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3752 } 3753 3754 bool AArch64InstructionSelector::selectIntrinsicRound( 3755 MachineInstr &I, MachineRegisterInfo &MRI) const { 3756 const LLT SrcTy = MRI.getType(I.getOperand(0).getReg()); 3757 3758 // Select the correct opcode. 3759 unsigned Opc = 0; 3760 if (!SrcTy.isVector()) { 3761 switch (SrcTy.getSizeInBits()) { 3762 default: 3763 case 16: 3764 Opc = AArch64::FRINTAHr; 3765 break; 3766 case 32: 3767 Opc = AArch64::FRINTASr; 3768 break; 3769 case 64: 3770 Opc = AArch64::FRINTADr; 3771 break; 3772 } 3773 } else { 3774 unsigned NumElts = SrcTy.getNumElements(); 3775 switch (SrcTy.getElementType().getSizeInBits()) { 3776 default: 3777 break; 3778 case 16: 3779 if (NumElts == 4) 3780 Opc = AArch64::FRINTAv4f16; 3781 else if (NumElts == 8) 3782 Opc = AArch64::FRINTAv8f16; 3783 break; 3784 case 32: 3785 if (NumElts == 2) 3786 Opc = AArch64::FRINTAv2f32; 3787 else if (NumElts == 4) 3788 Opc = AArch64::FRINTAv4f32; 3789 break; 3790 case 64: 3791 if (NumElts == 2) 3792 Opc = AArch64::FRINTAv2f64; 3793 break; 3794 } 3795 } 3796 3797 if (!Opc) { 3798 // Didn't get an opcode above, bail. 3799 LLVM_DEBUG(dbgs() << "Unsupported type for G_INTRINSIC_ROUND!\n"); 3800 return false; 3801 } 3802 3803 // Legalization would have set us up perfectly for this; we just need to 3804 // set the opcode and move on. 3805 I.setDesc(TII.get(Opc)); 3806 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 3807 } 3808 3809 bool AArch64InstructionSelector::selectVectorICmp( 3810 MachineInstr &I, MachineRegisterInfo &MRI) { 3811 Register DstReg = I.getOperand(0).getReg(); 3812 LLT DstTy = MRI.getType(DstReg); 3813 Register SrcReg = I.getOperand(2).getReg(); 3814 Register Src2Reg = I.getOperand(3).getReg(); 3815 LLT SrcTy = MRI.getType(SrcReg); 3816 3817 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits(); 3818 unsigned NumElts = DstTy.getNumElements(); 3819 3820 // First index is element size, 0 == 8b, 1 == 16b, 2 == 32b, 3 == 64b 3821 // Second index is num elts, 0 == v2, 1 == v4, 2 == v8, 3 == v16 3822 // Third index is cc opcode: 3823 // 0 == eq 3824 // 1 == ugt 3825 // 2 == uge 3826 // 3 == ult 3827 // 4 == ule 3828 // 5 == sgt 3829 // 6 == sge 3830 // 7 == slt 3831 // 8 == sle 3832 // ne is done by negating 'eq' result. 3833 3834 // This table below assumes that for some comparisons the operands will be 3835 // commuted. 3836 // ult op == commute + ugt op 3837 // ule op == commute + uge op 3838 // slt op == commute + sgt op 3839 // sle op == commute + sge op 3840 unsigned PredIdx = 0; 3841 bool SwapOperands = false; 3842 CmpInst::Predicate Pred = (CmpInst::Predicate)I.getOperand(1).getPredicate(); 3843 switch (Pred) { 3844 case CmpInst::ICMP_NE: 3845 case CmpInst::ICMP_EQ: 3846 PredIdx = 0; 3847 break; 3848 case CmpInst::ICMP_UGT: 3849 PredIdx = 1; 3850 break; 3851 case CmpInst::ICMP_UGE: 3852 PredIdx = 2; 3853 break; 3854 case CmpInst::ICMP_ULT: 3855 PredIdx = 3; 3856 SwapOperands = true; 3857 break; 3858 case CmpInst::ICMP_ULE: 3859 PredIdx = 4; 3860 SwapOperands = true; 3861 break; 3862 case CmpInst::ICMP_SGT: 3863 PredIdx = 5; 3864 break; 3865 case CmpInst::ICMP_SGE: 3866 PredIdx = 6; 3867 break; 3868 case CmpInst::ICMP_SLT: 3869 PredIdx = 7; 3870 SwapOperands = true; 3871 break; 3872 case CmpInst::ICMP_SLE: 3873 PredIdx = 8; 3874 SwapOperands = true; 3875 break; 3876 default: 3877 llvm_unreachable("Unhandled icmp predicate"); 3878 return false; 3879 } 3880 3881 // This table obviously should be tablegen'd when we have our GISel native 3882 // tablegen selector. 3883 3884 static const unsigned OpcTable[4][4][9] = { 3885 { 3886 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3887 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3888 0 /* invalid */}, 3889 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3890 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3891 0 /* invalid */}, 3892 {AArch64::CMEQv8i8, AArch64::CMHIv8i8, AArch64::CMHSv8i8, 3893 AArch64::CMHIv8i8, AArch64::CMHSv8i8, AArch64::CMGTv8i8, 3894 AArch64::CMGEv8i8, AArch64::CMGTv8i8, AArch64::CMGEv8i8}, 3895 {AArch64::CMEQv16i8, AArch64::CMHIv16i8, AArch64::CMHSv16i8, 3896 AArch64::CMHIv16i8, AArch64::CMHSv16i8, AArch64::CMGTv16i8, 3897 AArch64::CMGEv16i8, AArch64::CMGTv16i8, AArch64::CMGEv16i8} 3898 }, 3899 { 3900 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3901 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3902 0 /* invalid */}, 3903 {AArch64::CMEQv4i16, AArch64::CMHIv4i16, AArch64::CMHSv4i16, 3904 AArch64::CMHIv4i16, AArch64::CMHSv4i16, AArch64::CMGTv4i16, 3905 AArch64::CMGEv4i16, AArch64::CMGTv4i16, AArch64::CMGEv4i16}, 3906 {AArch64::CMEQv8i16, AArch64::CMHIv8i16, AArch64::CMHSv8i16, 3907 AArch64::CMHIv8i16, AArch64::CMHSv8i16, AArch64::CMGTv8i16, 3908 AArch64::CMGEv8i16, AArch64::CMGTv8i16, AArch64::CMGEv8i16}, 3909 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3910 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3911 0 /* invalid */} 3912 }, 3913 { 3914 {AArch64::CMEQv2i32, AArch64::CMHIv2i32, AArch64::CMHSv2i32, 3915 AArch64::CMHIv2i32, AArch64::CMHSv2i32, AArch64::CMGTv2i32, 3916 AArch64::CMGEv2i32, AArch64::CMGTv2i32, AArch64::CMGEv2i32}, 3917 {AArch64::CMEQv4i32, AArch64::CMHIv4i32, AArch64::CMHSv4i32, 3918 AArch64::CMHIv4i32, AArch64::CMHSv4i32, AArch64::CMGTv4i32, 3919 AArch64::CMGEv4i32, AArch64::CMGTv4i32, AArch64::CMGEv4i32}, 3920 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3921 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3922 0 /* invalid */}, 3923 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3924 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3925 0 /* invalid */} 3926 }, 3927 { 3928 {AArch64::CMEQv2i64, AArch64::CMHIv2i64, AArch64::CMHSv2i64, 3929 AArch64::CMHIv2i64, AArch64::CMHSv2i64, AArch64::CMGTv2i64, 3930 AArch64::CMGEv2i64, AArch64::CMGTv2i64, AArch64::CMGEv2i64}, 3931 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3932 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3933 0 /* invalid */}, 3934 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3935 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3936 0 /* invalid */}, 3937 {0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3938 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 0 /* invalid */, 3939 0 /* invalid */} 3940 }, 3941 }; 3942 unsigned EltIdx = Log2_32(SrcEltSize / 8); 3943 unsigned NumEltsIdx = Log2_32(NumElts / 2); 3944 unsigned Opc = OpcTable[EltIdx][NumEltsIdx][PredIdx]; 3945 if (!Opc) { 3946 LLVM_DEBUG(dbgs() << "Could not map G_ICMP to cmp opcode"); 3947 return false; 3948 } 3949 3950 const RegisterBank &VecRB = *RBI.getRegBank(SrcReg, MRI, TRI); 3951 const TargetRegisterClass *SrcRC = 3952 getRegClassForTypeOnBank(SrcTy, VecRB, true); 3953 if (!SrcRC) { 3954 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 3955 return false; 3956 } 3957 3958 unsigned NotOpc = Pred == ICmpInst::ICMP_NE ? AArch64::NOTv8i8 : 0; 3959 if (SrcTy.getSizeInBits() == 128) 3960 NotOpc = NotOpc ? AArch64::NOTv16i8 : 0; 3961 3962 if (SwapOperands) 3963 std::swap(SrcReg, Src2Reg); 3964 3965 auto Cmp = MIB.buildInstr(Opc, {SrcRC}, {SrcReg, Src2Reg}); 3966 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3967 3968 // Invert if we had a 'ne' cc. 3969 if (NotOpc) { 3970 Cmp = MIB.buildInstr(NotOpc, {DstReg}, {Cmp}); 3971 constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI); 3972 } else { 3973 MIB.buildCopy(DstReg, Cmp.getReg(0)); 3974 } 3975 RBI.constrainGenericRegister(DstReg, *SrcRC, MRI); 3976 I.eraseFromParent(); 3977 return true; 3978 } 3979 3980 MachineInstr *AArch64InstructionSelector::emitScalarToVector( 3981 unsigned EltSize, const TargetRegisterClass *DstRC, Register Scalar, 3982 MachineIRBuilder &MIRBuilder) const { 3983 auto Undef = MIRBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstRC}, {}); 3984 3985 auto BuildFn = [&](unsigned SubregIndex) { 3986 auto Ins = 3987 MIRBuilder 3988 .buildInstr(TargetOpcode::INSERT_SUBREG, {DstRC}, {Undef, Scalar}) 3989 .addImm(SubregIndex); 3990 constrainSelectedInstRegOperands(*Undef, TII, TRI, RBI); 3991 constrainSelectedInstRegOperands(*Ins, TII, TRI, RBI); 3992 return &*Ins; 3993 }; 3994 3995 switch (EltSize) { 3996 case 8: 3997 return BuildFn(AArch64::bsub); 3998 case 16: 3999 return BuildFn(AArch64::hsub); 4000 case 32: 4001 return BuildFn(AArch64::ssub); 4002 case 64: 4003 return BuildFn(AArch64::dsub); 4004 default: 4005 return nullptr; 4006 } 4007 } 4008 4009 bool AArch64InstructionSelector::selectMergeValues( 4010 MachineInstr &I, MachineRegisterInfo &MRI) { 4011 assert(I.getOpcode() == TargetOpcode::G_MERGE_VALUES && "unexpected opcode"); 4012 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 4013 const LLT SrcTy = MRI.getType(I.getOperand(1).getReg()); 4014 assert(!DstTy.isVector() && !SrcTy.isVector() && "invalid merge operation"); 4015 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 4016 4017 if (I.getNumOperands() != 3) 4018 return false; 4019 4020 // Merging 2 s64s into an s128. 4021 if (DstTy == LLT::scalar(128)) { 4022 if (SrcTy.getSizeInBits() != 64) 4023 return false; 4024 Register DstReg = I.getOperand(0).getReg(); 4025 Register Src1Reg = I.getOperand(1).getReg(); 4026 Register Src2Reg = I.getOperand(2).getReg(); 4027 auto Tmp = MIB.buildInstr(TargetOpcode::IMPLICIT_DEF, {DstTy}, {}); 4028 MachineInstr *InsMI = emitLaneInsert(std::nullopt, Tmp.getReg(0), Src1Reg, 4029 /* LaneIdx */ 0, RB, MIB); 4030 if (!InsMI) 4031 return false; 4032 MachineInstr *Ins2MI = emitLaneInsert(DstReg, InsMI->getOperand(0).getReg(), 4033 Src2Reg, /* LaneIdx */ 1, RB, MIB); 4034 if (!Ins2MI) 4035 return false; 4036 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 4037 constrainSelectedInstRegOperands(*Ins2MI, TII, TRI, RBI); 4038 I.eraseFromParent(); 4039 return true; 4040 } 4041 4042 if (RB.getID() != AArch64::GPRRegBankID) 4043 return false; 4044 4045 if (DstTy.getSizeInBits() != 64 || SrcTy.getSizeInBits() != 32) 4046 return false; 4047 4048 auto *DstRC = &AArch64::GPR64RegClass; 4049 Register SubToRegDef = MRI.createVirtualRegister(DstRC); 4050 MachineInstr &SubRegMI = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 4051 TII.get(TargetOpcode::SUBREG_TO_REG)) 4052 .addDef(SubToRegDef) 4053 .addImm(0) 4054 .addUse(I.getOperand(1).getReg()) 4055 .addImm(AArch64::sub_32); 4056 Register SubToRegDef2 = MRI.createVirtualRegister(DstRC); 4057 // Need to anyext the second scalar before we can use bfm 4058 MachineInstr &SubRegMI2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 4059 TII.get(TargetOpcode::SUBREG_TO_REG)) 4060 .addDef(SubToRegDef2) 4061 .addImm(0) 4062 .addUse(I.getOperand(2).getReg()) 4063 .addImm(AArch64::sub_32); 4064 MachineInstr &BFM = 4065 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::BFMXri)) 4066 .addDef(I.getOperand(0).getReg()) 4067 .addUse(SubToRegDef) 4068 .addUse(SubToRegDef2) 4069 .addImm(32) 4070 .addImm(31); 4071 constrainSelectedInstRegOperands(SubRegMI, TII, TRI, RBI); 4072 constrainSelectedInstRegOperands(SubRegMI2, TII, TRI, RBI); 4073 constrainSelectedInstRegOperands(BFM, TII, TRI, RBI); 4074 I.eraseFromParent(); 4075 return true; 4076 } 4077 4078 static bool getLaneCopyOpcode(unsigned &CopyOpc, unsigned &ExtractSubReg, 4079 const unsigned EltSize) { 4080 // Choose a lane copy opcode and subregister based off of the size of the 4081 // vector's elements. 4082 switch (EltSize) { 4083 case 8: 4084 CopyOpc = AArch64::DUPi8; 4085 ExtractSubReg = AArch64::bsub; 4086 break; 4087 case 16: 4088 CopyOpc = AArch64::DUPi16; 4089 ExtractSubReg = AArch64::hsub; 4090 break; 4091 case 32: 4092 CopyOpc = AArch64::DUPi32; 4093 ExtractSubReg = AArch64::ssub; 4094 break; 4095 case 64: 4096 CopyOpc = AArch64::DUPi64; 4097 ExtractSubReg = AArch64::dsub; 4098 break; 4099 default: 4100 // Unknown size, bail out. 4101 LLVM_DEBUG(dbgs() << "Elt size '" << EltSize << "' unsupported.\n"); 4102 return false; 4103 } 4104 return true; 4105 } 4106 4107 MachineInstr *AArch64InstructionSelector::emitExtractVectorElt( 4108 std::optional<Register> DstReg, const RegisterBank &DstRB, LLT ScalarTy, 4109 Register VecReg, unsigned LaneIdx, MachineIRBuilder &MIRBuilder) const { 4110 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4111 unsigned CopyOpc = 0; 4112 unsigned ExtractSubReg = 0; 4113 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, ScalarTy.getSizeInBits())) { 4114 LLVM_DEBUG( 4115 dbgs() << "Couldn't determine lane copy opcode for instruction.\n"); 4116 return nullptr; 4117 } 4118 4119 const TargetRegisterClass *DstRC = 4120 getRegClassForTypeOnBank(ScalarTy, DstRB, true); 4121 if (!DstRC) { 4122 LLVM_DEBUG(dbgs() << "Could not determine destination register class.\n"); 4123 return nullptr; 4124 } 4125 4126 const RegisterBank &VecRB = *RBI.getRegBank(VecReg, MRI, TRI); 4127 const LLT &VecTy = MRI.getType(VecReg); 4128 const TargetRegisterClass *VecRC = 4129 getRegClassForTypeOnBank(VecTy, VecRB, true); 4130 if (!VecRC) { 4131 LLVM_DEBUG(dbgs() << "Could not determine source register class.\n"); 4132 return nullptr; 4133 } 4134 4135 // The register that we're going to copy into. 4136 Register InsertReg = VecReg; 4137 if (!DstReg) 4138 DstReg = MRI.createVirtualRegister(DstRC); 4139 // If the lane index is 0, we just use a subregister COPY. 4140 if (LaneIdx == 0) { 4141 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {*DstReg}, {}) 4142 .addReg(VecReg, 0, ExtractSubReg); 4143 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 4144 return &*Copy; 4145 } 4146 4147 // Lane copies require 128-bit wide registers. If we're dealing with an 4148 // unpacked vector, then we need to move up to that width. Insert an implicit 4149 // def and a subregister insert to get us there. 4150 if (VecTy.getSizeInBits() != 128) { 4151 MachineInstr *ScalarToVector = emitScalarToVector( 4152 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, VecReg, MIRBuilder); 4153 if (!ScalarToVector) 4154 return nullptr; 4155 InsertReg = ScalarToVector->getOperand(0).getReg(); 4156 } 4157 4158 MachineInstr *LaneCopyMI = 4159 MIRBuilder.buildInstr(CopyOpc, {*DstReg}, {InsertReg}).addImm(LaneIdx); 4160 constrainSelectedInstRegOperands(*LaneCopyMI, TII, TRI, RBI); 4161 4162 // Make sure that we actually constrain the initial copy. 4163 RBI.constrainGenericRegister(*DstReg, *DstRC, MRI); 4164 return LaneCopyMI; 4165 } 4166 4167 bool AArch64InstructionSelector::selectExtractElt( 4168 MachineInstr &I, MachineRegisterInfo &MRI) { 4169 assert(I.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT && 4170 "unexpected opcode!"); 4171 Register DstReg = I.getOperand(0).getReg(); 4172 const LLT NarrowTy = MRI.getType(DstReg); 4173 const Register SrcReg = I.getOperand(1).getReg(); 4174 const LLT WideTy = MRI.getType(SrcReg); 4175 (void)WideTy; 4176 assert(WideTy.getSizeInBits() >= NarrowTy.getSizeInBits() && 4177 "source register size too small!"); 4178 assert(!NarrowTy.isVector() && "cannot extract vector into vector!"); 4179 4180 // Need the lane index to determine the correct copy opcode. 4181 MachineOperand &LaneIdxOp = I.getOperand(2); 4182 assert(LaneIdxOp.isReg() && "Lane index operand was not a register?"); 4183 4184 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 4185 LLVM_DEBUG(dbgs() << "Cannot extract into GPR.\n"); 4186 return false; 4187 } 4188 4189 // Find the index to extract from. 4190 auto VRegAndVal = getIConstantVRegValWithLookThrough(LaneIdxOp.getReg(), MRI); 4191 if (!VRegAndVal) 4192 return false; 4193 unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 4194 4195 4196 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 4197 MachineInstr *Extract = emitExtractVectorElt(DstReg, DstRB, NarrowTy, SrcReg, 4198 LaneIdx, MIB); 4199 if (!Extract) 4200 return false; 4201 4202 I.eraseFromParent(); 4203 return true; 4204 } 4205 4206 bool AArch64InstructionSelector::selectSplitVectorUnmerge( 4207 MachineInstr &I, MachineRegisterInfo &MRI) { 4208 unsigned NumElts = I.getNumOperands() - 1; 4209 Register SrcReg = I.getOperand(NumElts).getReg(); 4210 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 4211 const LLT SrcTy = MRI.getType(SrcReg); 4212 4213 assert(NarrowTy.isVector() && "Expected an unmerge into vectors"); 4214 if (SrcTy.getSizeInBits() > 128) { 4215 LLVM_DEBUG(dbgs() << "Unexpected vector type for vec split unmerge"); 4216 return false; 4217 } 4218 4219 // We implement a split vector operation by treating the sub-vectors as 4220 // scalars and extracting them. 4221 const RegisterBank &DstRB = 4222 *RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI); 4223 for (unsigned OpIdx = 0; OpIdx < NumElts; ++OpIdx) { 4224 Register Dst = I.getOperand(OpIdx).getReg(); 4225 MachineInstr *Extract = 4226 emitExtractVectorElt(Dst, DstRB, NarrowTy, SrcReg, OpIdx, MIB); 4227 if (!Extract) 4228 return false; 4229 } 4230 I.eraseFromParent(); 4231 return true; 4232 } 4233 4234 bool AArch64InstructionSelector::selectUnmergeValues(MachineInstr &I, 4235 MachineRegisterInfo &MRI) { 4236 assert(I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 4237 "unexpected opcode"); 4238 4239 // TODO: Handle unmerging into GPRs and from scalars to scalars. 4240 if (RBI.getRegBank(I.getOperand(0).getReg(), MRI, TRI)->getID() != 4241 AArch64::FPRRegBankID || 4242 RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI)->getID() != 4243 AArch64::FPRRegBankID) { 4244 LLVM_DEBUG(dbgs() << "Unmerging vector-to-gpr and scalar-to-scalar " 4245 "currently unsupported.\n"); 4246 return false; 4247 } 4248 4249 // The last operand is the vector source register, and every other operand is 4250 // a register to unpack into. 4251 unsigned NumElts = I.getNumOperands() - 1; 4252 Register SrcReg = I.getOperand(NumElts).getReg(); 4253 const LLT NarrowTy = MRI.getType(I.getOperand(0).getReg()); 4254 const LLT WideTy = MRI.getType(SrcReg); 4255 (void)WideTy; 4256 assert((WideTy.isVector() || WideTy.getSizeInBits() == 128) && 4257 "can only unmerge from vector or s128 types!"); 4258 assert(WideTy.getSizeInBits() > NarrowTy.getSizeInBits() && 4259 "source register size too small!"); 4260 4261 if (!NarrowTy.isScalar()) 4262 return selectSplitVectorUnmerge(I, MRI); 4263 4264 // Choose a lane copy opcode and subregister based off of the size of the 4265 // vector's elements. 4266 unsigned CopyOpc = 0; 4267 unsigned ExtractSubReg = 0; 4268 if (!getLaneCopyOpcode(CopyOpc, ExtractSubReg, NarrowTy.getSizeInBits())) 4269 return false; 4270 4271 // Set up for the lane copies. 4272 MachineBasicBlock &MBB = *I.getParent(); 4273 4274 // Stores the registers we'll be copying from. 4275 SmallVector<Register, 4> InsertRegs; 4276 4277 // We'll use the first register twice, so we only need NumElts-1 registers. 4278 unsigned NumInsertRegs = NumElts - 1; 4279 4280 // If our elements fit into exactly 128 bits, then we can copy from the source 4281 // directly. Otherwise, we need to do a bit of setup with some subregister 4282 // inserts. 4283 if (NarrowTy.getSizeInBits() * NumElts == 128) { 4284 InsertRegs = SmallVector<Register, 4>(NumInsertRegs, SrcReg); 4285 } else { 4286 // No. We have to perform subregister inserts. For each insert, create an 4287 // implicit def and a subregister insert, and save the register we create. 4288 const TargetRegisterClass *RC = getRegClassForTypeOnBank( 4289 LLT::fixed_vector(NumElts, WideTy.getScalarSizeInBits()), 4290 *RBI.getRegBank(SrcReg, MRI, TRI)); 4291 unsigned SubReg = 0; 4292 bool Found = getSubRegForClass(RC, TRI, SubReg); 4293 (void)Found; 4294 assert(Found && "expected to find last operand's subeg idx"); 4295 for (unsigned Idx = 0; Idx < NumInsertRegs; ++Idx) { 4296 Register ImpDefReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 4297 MachineInstr &ImpDefMI = 4298 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(TargetOpcode::IMPLICIT_DEF), 4299 ImpDefReg); 4300 4301 // Now, create the subregister insert from SrcReg. 4302 Register InsertReg = MRI.createVirtualRegister(&AArch64::FPR128RegClass); 4303 MachineInstr &InsMI = 4304 *BuildMI(MBB, I, I.getDebugLoc(), 4305 TII.get(TargetOpcode::INSERT_SUBREG), InsertReg) 4306 .addUse(ImpDefReg) 4307 .addUse(SrcReg) 4308 .addImm(SubReg); 4309 4310 constrainSelectedInstRegOperands(ImpDefMI, TII, TRI, RBI); 4311 constrainSelectedInstRegOperands(InsMI, TII, TRI, RBI); 4312 4313 // Save the register so that we can copy from it after. 4314 InsertRegs.push_back(InsertReg); 4315 } 4316 } 4317 4318 // Now that we've created any necessary subregister inserts, we can 4319 // create the copies. 4320 // 4321 // Perform the first copy separately as a subregister copy. 4322 Register CopyTo = I.getOperand(0).getReg(); 4323 auto FirstCopy = MIB.buildInstr(TargetOpcode::COPY, {CopyTo}, {}) 4324 .addReg(InsertRegs[0], 0, ExtractSubReg); 4325 constrainSelectedInstRegOperands(*FirstCopy, TII, TRI, RBI); 4326 4327 // Now, perform the remaining copies as vector lane copies. 4328 unsigned LaneIdx = 1; 4329 for (Register InsReg : InsertRegs) { 4330 Register CopyTo = I.getOperand(LaneIdx).getReg(); 4331 MachineInstr &CopyInst = 4332 *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CopyOpc), CopyTo) 4333 .addUse(InsReg) 4334 .addImm(LaneIdx); 4335 constrainSelectedInstRegOperands(CopyInst, TII, TRI, RBI); 4336 ++LaneIdx; 4337 } 4338 4339 // Separately constrain the first copy's destination. Because of the 4340 // limitation in constrainOperandRegClass, we can't guarantee that this will 4341 // actually be constrained. So, do it ourselves using the second operand. 4342 const TargetRegisterClass *RC = 4343 MRI.getRegClassOrNull(I.getOperand(1).getReg()); 4344 if (!RC) { 4345 LLVM_DEBUG(dbgs() << "Couldn't constrain copy destination.\n"); 4346 return false; 4347 } 4348 4349 RBI.constrainGenericRegister(CopyTo, *RC, MRI); 4350 I.eraseFromParent(); 4351 return true; 4352 } 4353 4354 bool AArch64InstructionSelector::selectConcatVectors( 4355 MachineInstr &I, MachineRegisterInfo &MRI) { 4356 assert(I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && 4357 "Unexpected opcode"); 4358 Register Dst = I.getOperand(0).getReg(); 4359 Register Op1 = I.getOperand(1).getReg(); 4360 Register Op2 = I.getOperand(2).getReg(); 4361 MachineInstr *ConcatMI = emitVectorConcat(Dst, Op1, Op2, MIB); 4362 if (!ConcatMI) 4363 return false; 4364 I.eraseFromParent(); 4365 return true; 4366 } 4367 4368 unsigned 4369 AArch64InstructionSelector::emitConstantPoolEntry(const Constant *CPVal, 4370 MachineFunction &MF) const { 4371 Type *CPTy = CPVal->getType(); 4372 Align Alignment = MF.getDataLayout().getPrefTypeAlign(CPTy); 4373 4374 MachineConstantPool *MCP = MF.getConstantPool(); 4375 return MCP->getConstantPoolIndex(CPVal, Alignment); 4376 } 4377 4378 MachineInstr *AArch64InstructionSelector::emitLoadFromConstantPool( 4379 const Constant *CPVal, MachineIRBuilder &MIRBuilder) const { 4380 const TargetRegisterClass *RC; 4381 unsigned Opc; 4382 bool IsTiny = TM.getCodeModel() == CodeModel::Tiny; 4383 unsigned Size = MIRBuilder.getDataLayout().getTypeStoreSize(CPVal->getType()); 4384 switch (Size) { 4385 case 16: 4386 RC = &AArch64::FPR128RegClass; 4387 Opc = IsTiny ? AArch64::LDRQl : AArch64::LDRQui; 4388 break; 4389 case 8: 4390 RC = &AArch64::FPR64RegClass; 4391 Opc = IsTiny ? AArch64::LDRDl : AArch64::LDRDui; 4392 break; 4393 case 4: 4394 RC = &AArch64::FPR32RegClass; 4395 Opc = IsTiny ? AArch64::LDRSl : AArch64::LDRSui; 4396 break; 4397 case 2: 4398 RC = &AArch64::FPR16RegClass; 4399 Opc = AArch64::LDRHui; 4400 break; 4401 default: 4402 LLVM_DEBUG(dbgs() << "Could not load from constant pool of type " 4403 << *CPVal->getType()); 4404 return nullptr; 4405 } 4406 4407 MachineInstr *LoadMI = nullptr; 4408 auto &MF = MIRBuilder.getMF(); 4409 unsigned CPIdx = emitConstantPoolEntry(CPVal, MF); 4410 if (IsTiny && (Size == 16 || Size == 8 || Size == 4)) { 4411 // Use load(literal) for tiny code model. 4412 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {}).addConstantPoolIndex(CPIdx); 4413 } else { 4414 auto Adrp = 4415 MIRBuilder.buildInstr(AArch64::ADRP, {&AArch64::GPR64RegClass}, {}) 4416 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE); 4417 4418 LoadMI = &*MIRBuilder.buildInstr(Opc, {RC}, {Adrp}) 4419 .addConstantPoolIndex( 4420 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC); 4421 4422 constrainSelectedInstRegOperands(*Adrp, TII, TRI, RBI); 4423 } 4424 4425 MachinePointerInfo PtrInfo = MachinePointerInfo::getConstantPool(MF); 4426 LoadMI->addMemOperand(MF, MF.getMachineMemOperand(PtrInfo, 4427 MachineMemOperand::MOLoad, 4428 Size, Align(Size))); 4429 constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI); 4430 return LoadMI; 4431 } 4432 4433 /// Return an <Opcode, SubregIndex> pair to do an vector elt insert of a given 4434 /// size and RB. 4435 static std::pair<unsigned, unsigned> 4436 getInsertVecEltOpInfo(const RegisterBank &RB, unsigned EltSize) { 4437 unsigned Opc, SubregIdx; 4438 if (RB.getID() == AArch64::GPRRegBankID) { 4439 if (EltSize == 8) { 4440 Opc = AArch64::INSvi8gpr; 4441 SubregIdx = AArch64::bsub; 4442 } else if (EltSize == 16) { 4443 Opc = AArch64::INSvi16gpr; 4444 SubregIdx = AArch64::ssub; 4445 } else if (EltSize == 32) { 4446 Opc = AArch64::INSvi32gpr; 4447 SubregIdx = AArch64::ssub; 4448 } else if (EltSize == 64) { 4449 Opc = AArch64::INSvi64gpr; 4450 SubregIdx = AArch64::dsub; 4451 } else { 4452 llvm_unreachable("invalid elt size!"); 4453 } 4454 } else { 4455 if (EltSize == 8) { 4456 Opc = AArch64::INSvi8lane; 4457 SubregIdx = AArch64::bsub; 4458 } else if (EltSize == 16) { 4459 Opc = AArch64::INSvi16lane; 4460 SubregIdx = AArch64::hsub; 4461 } else if (EltSize == 32) { 4462 Opc = AArch64::INSvi32lane; 4463 SubregIdx = AArch64::ssub; 4464 } else if (EltSize == 64) { 4465 Opc = AArch64::INSvi64lane; 4466 SubregIdx = AArch64::dsub; 4467 } else { 4468 llvm_unreachable("invalid elt size!"); 4469 } 4470 } 4471 return std::make_pair(Opc, SubregIdx); 4472 } 4473 4474 MachineInstr *AArch64InstructionSelector::emitInstr( 4475 unsigned Opcode, std::initializer_list<llvm::DstOp> DstOps, 4476 std::initializer_list<llvm::SrcOp> SrcOps, MachineIRBuilder &MIRBuilder, 4477 const ComplexRendererFns &RenderFns) const { 4478 assert(Opcode && "Expected an opcode?"); 4479 assert(!isPreISelGenericOpcode(Opcode) && 4480 "Function should only be used to produce selected instructions!"); 4481 auto MI = MIRBuilder.buildInstr(Opcode, DstOps, SrcOps); 4482 if (RenderFns) 4483 for (auto &Fn : *RenderFns) 4484 Fn(MI); 4485 constrainSelectedInstRegOperands(*MI, TII, TRI, RBI); 4486 return &*MI; 4487 } 4488 4489 MachineInstr *AArch64InstructionSelector::emitAddSub( 4490 const std::array<std::array<unsigned, 2>, 5> &AddrModeAndSizeToOpcode, 4491 Register Dst, MachineOperand &LHS, MachineOperand &RHS, 4492 MachineIRBuilder &MIRBuilder) const { 4493 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4494 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 4495 auto Ty = MRI.getType(LHS.getReg()); 4496 assert(!Ty.isVector() && "Expected a scalar or pointer?"); 4497 unsigned Size = Ty.getSizeInBits(); 4498 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit type only"); 4499 bool Is32Bit = Size == 32; 4500 4501 // INSTRri form with positive arithmetic immediate. 4502 if (auto Fns = selectArithImmed(RHS)) 4503 return emitInstr(AddrModeAndSizeToOpcode[0][Is32Bit], {Dst}, {LHS}, 4504 MIRBuilder, Fns); 4505 4506 // INSTRri form with negative arithmetic immediate. 4507 if (auto Fns = selectNegArithImmed(RHS)) 4508 return emitInstr(AddrModeAndSizeToOpcode[3][Is32Bit], {Dst}, {LHS}, 4509 MIRBuilder, Fns); 4510 4511 // INSTRrx form. 4512 if (auto Fns = selectArithExtendedRegister(RHS)) 4513 return emitInstr(AddrModeAndSizeToOpcode[4][Is32Bit], {Dst}, {LHS}, 4514 MIRBuilder, Fns); 4515 4516 // INSTRrs form. 4517 if (auto Fns = selectShiftedRegister(RHS)) 4518 return emitInstr(AddrModeAndSizeToOpcode[1][Is32Bit], {Dst}, {LHS}, 4519 MIRBuilder, Fns); 4520 return emitInstr(AddrModeAndSizeToOpcode[2][Is32Bit], {Dst}, {LHS, RHS}, 4521 MIRBuilder); 4522 } 4523 4524 MachineInstr * 4525 AArch64InstructionSelector::emitADD(Register DefReg, MachineOperand &LHS, 4526 MachineOperand &RHS, 4527 MachineIRBuilder &MIRBuilder) const { 4528 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4529 {{AArch64::ADDXri, AArch64::ADDWri}, 4530 {AArch64::ADDXrs, AArch64::ADDWrs}, 4531 {AArch64::ADDXrr, AArch64::ADDWrr}, 4532 {AArch64::SUBXri, AArch64::SUBWri}, 4533 {AArch64::ADDXrx, AArch64::ADDWrx}}}; 4534 return emitAddSub(OpcTable, DefReg, LHS, RHS, MIRBuilder); 4535 } 4536 4537 MachineInstr * 4538 AArch64InstructionSelector::emitADDS(Register Dst, MachineOperand &LHS, 4539 MachineOperand &RHS, 4540 MachineIRBuilder &MIRBuilder) const { 4541 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4542 {{AArch64::ADDSXri, AArch64::ADDSWri}, 4543 {AArch64::ADDSXrs, AArch64::ADDSWrs}, 4544 {AArch64::ADDSXrr, AArch64::ADDSWrr}, 4545 {AArch64::SUBSXri, AArch64::SUBSWri}, 4546 {AArch64::ADDSXrx, AArch64::ADDSWrx}}}; 4547 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 4548 } 4549 4550 MachineInstr * 4551 AArch64InstructionSelector::emitSUBS(Register Dst, MachineOperand &LHS, 4552 MachineOperand &RHS, 4553 MachineIRBuilder &MIRBuilder) const { 4554 const std::array<std::array<unsigned, 2>, 5> OpcTable{ 4555 {{AArch64::SUBSXri, AArch64::SUBSWri}, 4556 {AArch64::SUBSXrs, AArch64::SUBSWrs}, 4557 {AArch64::SUBSXrr, AArch64::SUBSWrr}, 4558 {AArch64::ADDSXri, AArch64::ADDSWri}, 4559 {AArch64::SUBSXrx, AArch64::SUBSWrx}}}; 4560 return emitAddSub(OpcTable, Dst, LHS, RHS, MIRBuilder); 4561 } 4562 4563 MachineInstr * 4564 AArch64InstructionSelector::emitADCS(Register Dst, MachineOperand &LHS, 4565 MachineOperand &RHS, 4566 MachineIRBuilder &MIRBuilder) const { 4567 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 4568 MachineRegisterInfo *MRI = MIRBuilder.getMRI(); 4569 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32); 4570 static const unsigned OpcTable[2] = {AArch64::ADCSXr, AArch64::ADCSWr}; 4571 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder); 4572 } 4573 4574 MachineInstr * 4575 AArch64InstructionSelector::emitSBCS(Register Dst, MachineOperand &LHS, 4576 MachineOperand &RHS, 4577 MachineIRBuilder &MIRBuilder) const { 4578 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 4579 MachineRegisterInfo *MRI = MIRBuilder.getMRI(); 4580 bool Is32Bit = (MRI->getType(LHS.getReg()).getSizeInBits() == 32); 4581 static const unsigned OpcTable[2] = {AArch64::SBCSXr, AArch64::SBCSWr}; 4582 return emitInstr(OpcTable[Is32Bit], {Dst}, {LHS, RHS}, MIRBuilder); 4583 } 4584 4585 MachineInstr * 4586 AArch64InstructionSelector::emitCMN(MachineOperand &LHS, MachineOperand &RHS, 4587 MachineIRBuilder &MIRBuilder) const { 4588 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4589 bool Is32Bit = (MRI.getType(LHS.getReg()).getSizeInBits() == 32); 4590 auto RC = Is32Bit ? &AArch64::GPR32RegClass : &AArch64::GPR64RegClass; 4591 return emitADDS(MRI.createVirtualRegister(RC), LHS, RHS, MIRBuilder); 4592 } 4593 4594 MachineInstr * 4595 AArch64InstructionSelector::emitTST(MachineOperand &LHS, MachineOperand &RHS, 4596 MachineIRBuilder &MIRBuilder) const { 4597 assert(LHS.isReg() && RHS.isReg() && "Expected register operands?"); 4598 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4599 LLT Ty = MRI.getType(LHS.getReg()); 4600 unsigned RegSize = Ty.getSizeInBits(); 4601 bool Is32Bit = (RegSize == 32); 4602 const unsigned OpcTable[3][2] = {{AArch64::ANDSXri, AArch64::ANDSWri}, 4603 {AArch64::ANDSXrs, AArch64::ANDSWrs}, 4604 {AArch64::ANDSXrr, AArch64::ANDSWrr}}; 4605 // ANDS needs a logical immediate for its immediate form. Check if we can 4606 // fold one in. 4607 if (auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI)) { 4608 int64_t Imm = ValAndVReg->Value.getSExtValue(); 4609 4610 if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) { 4611 auto TstMI = MIRBuilder.buildInstr(OpcTable[0][Is32Bit], {Ty}, {LHS}); 4612 TstMI.addImm(AArch64_AM::encodeLogicalImmediate(Imm, RegSize)); 4613 constrainSelectedInstRegOperands(*TstMI, TII, TRI, RBI); 4614 return &*TstMI; 4615 } 4616 } 4617 4618 if (auto Fns = selectLogicalShiftedRegister(RHS)) 4619 return emitInstr(OpcTable[1][Is32Bit], {Ty}, {LHS}, MIRBuilder, Fns); 4620 return emitInstr(OpcTable[2][Is32Bit], {Ty}, {LHS, RHS}, MIRBuilder); 4621 } 4622 4623 MachineInstr *AArch64InstructionSelector::emitIntegerCompare( 4624 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 4625 MachineIRBuilder &MIRBuilder) const { 4626 assert(LHS.isReg() && RHS.isReg() && "Expected LHS and RHS to be registers!"); 4627 assert(Predicate.isPredicate() && "Expected predicate?"); 4628 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4629 LLT CmpTy = MRI.getType(LHS.getReg()); 4630 assert(!CmpTy.isVector() && "Expected scalar or pointer"); 4631 unsigned Size = CmpTy.getSizeInBits(); 4632 (void)Size; 4633 assert((Size == 32 || Size == 64) && "Expected a 32-bit or 64-bit LHS/RHS?"); 4634 // Fold the compare into a cmn or tst if possible. 4635 if (auto FoldCmp = tryFoldIntegerCompare(LHS, RHS, Predicate, MIRBuilder)) 4636 return FoldCmp; 4637 auto Dst = MRI.cloneVirtualRegister(LHS.getReg()); 4638 return emitSUBS(Dst, LHS, RHS, MIRBuilder); 4639 } 4640 4641 MachineInstr *AArch64InstructionSelector::emitCSetForFCmp( 4642 Register Dst, CmpInst::Predicate Pred, MachineIRBuilder &MIRBuilder) const { 4643 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4644 #ifndef NDEBUG 4645 LLT Ty = MRI.getType(Dst); 4646 assert(!Ty.isVector() && Ty.getSizeInBits() == 32 && 4647 "Expected a 32-bit scalar register?"); 4648 #endif 4649 const Register ZReg = AArch64::WZR; 4650 AArch64CC::CondCode CC1, CC2; 4651 changeFCMPPredToAArch64CC(Pred, CC1, CC2); 4652 auto InvCC1 = AArch64CC::getInvertedCondCode(CC1); 4653 if (CC2 == AArch64CC::AL) 4654 return emitCSINC(/*Dst=*/Dst, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, 4655 MIRBuilder); 4656 const TargetRegisterClass *RC = &AArch64::GPR32RegClass; 4657 Register Def1Reg = MRI.createVirtualRegister(RC); 4658 Register Def2Reg = MRI.createVirtualRegister(RC); 4659 auto InvCC2 = AArch64CC::getInvertedCondCode(CC2); 4660 emitCSINC(/*Dst=*/Def1Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC1, MIRBuilder); 4661 emitCSINC(/*Dst=*/Def2Reg, /*Src1=*/ZReg, /*Src2=*/ZReg, InvCC2, MIRBuilder); 4662 auto OrMI = MIRBuilder.buildInstr(AArch64::ORRWrr, {Dst}, {Def1Reg, Def2Reg}); 4663 constrainSelectedInstRegOperands(*OrMI, TII, TRI, RBI); 4664 return &*OrMI; 4665 } 4666 4667 MachineInstr *AArch64InstructionSelector::emitFPCompare( 4668 Register LHS, Register RHS, MachineIRBuilder &MIRBuilder, 4669 std::optional<CmpInst::Predicate> Pred) const { 4670 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 4671 LLT Ty = MRI.getType(LHS); 4672 if (Ty.isVector()) 4673 return nullptr; 4674 unsigned OpSize = Ty.getSizeInBits(); 4675 if (OpSize != 32 && OpSize != 64) 4676 return nullptr; 4677 4678 // If this is a compare against +0.0, then we don't have 4679 // to explicitly materialize a constant. 4680 const ConstantFP *FPImm = getConstantFPVRegVal(RHS, MRI); 4681 bool ShouldUseImm = FPImm && (FPImm->isZero() && !FPImm->isNegative()); 4682 4683 auto IsEqualityPred = [](CmpInst::Predicate P) { 4684 return P == CmpInst::FCMP_OEQ || P == CmpInst::FCMP_ONE || 4685 P == CmpInst::FCMP_UEQ || P == CmpInst::FCMP_UNE; 4686 }; 4687 if (!ShouldUseImm && Pred && IsEqualityPred(*Pred)) { 4688 // Try commutating the operands. 4689 const ConstantFP *LHSImm = getConstantFPVRegVal(LHS, MRI); 4690 if (LHSImm && (LHSImm->isZero() && !LHSImm->isNegative())) { 4691 ShouldUseImm = true; 4692 std::swap(LHS, RHS); 4693 } 4694 } 4695 unsigned CmpOpcTbl[2][2] = {{AArch64::FCMPSrr, AArch64::FCMPDrr}, 4696 {AArch64::FCMPSri, AArch64::FCMPDri}}; 4697 unsigned CmpOpc = CmpOpcTbl[ShouldUseImm][OpSize == 64]; 4698 4699 // Partially build the compare. Decide if we need to add a use for the 4700 // third operand based off whether or not we're comparing against 0.0. 4701 auto CmpMI = MIRBuilder.buildInstr(CmpOpc).addUse(LHS); 4702 CmpMI.setMIFlags(MachineInstr::NoFPExcept); 4703 if (!ShouldUseImm) 4704 CmpMI.addUse(RHS); 4705 constrainSelectedInstRegOperands(*CmpMI, TII, TRI, RBI); 4706 return &*CmpMI; 4707 } 4708 4709 MachineInstr *AArch64InstructionSelector::emitVectorConcat( 4710 std::optional<Register> Dst, Register Op1, Register Op2, 4711 MachineIRBuilder &MIRBuilder) const { 4712 // We implement a vector concat by: 4713 // 1. Use scalar_to_vector to insert the lower vector into the larger dest 4714 // 2. Insert the upper vector into the destination's upper element 4715 // TODO: some of this code is common with G_BUILD_VECTOR handling. 4716 MachineRegisterInfo &MRI = MIRBuilder.getMF().getRegInfo(); 4717 4718 const LLT Op1Ty = MRI.getType(Op1); 4719 const LLT Op2Ty = MRI.getType(Op2); 4720 4721 if (Op1Ty != Op2Ty) { 4722 LLVM_DEBUG(dbgs() << "Could not do vector concat of differing vector tys"); 4723 return nullptr; 4724 } 4725 assert(Op1Ty.isVector() && "Expected a vector for vector concat"); 4726 4727 if (Op1Ty.getSizeInBits() >= 128) { 4728 LLVM_DEBUG(dbgs() << "Vector concat not supported for full size vectors"); 4729 return nullptr; 4730 } 4731 4732 // At the moment we just support 64 bit vector concats. 4733 if (Op1Ty.getSizeInBits() != 64) { 4734 LLVM_DEBUG(dbgs() << "Vector concat supported for 64b vectors"); 4735 return nullptr; 4736 } 4737 4738 const LLT ScalarTy = LLT::scalar(Op1Ty.getSizeInBits()); 4739 const RegisterBank &FPRBank = *RBI.getRegBank(Op1, MRI, TRI); 4740 const TargetRegisterClass *DstRC = 4741 getRegClassForTypeOnBank(Op1Ty.multiplyElements(2), FPRBank); 4742 4743 MachineInstr *WidenedOp1 = 4744 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op1, MIRBuilder); 4745 MachineInstr *WidenedOp2 = 4746 emitScalarToVector(ScalarTy.getSizeInBits(), DstRC, Op2, MIRBuilder); 4747 if (!WidenedOp1 || !WidenedOp2) { 4748 LLVM_DEBUG(dbgs() << "Could not emit a vector from scalar value"); 4749 return nullptr; 4750 } 4751 4752 // Now do the insert of the upper element. 4753 unsigned InsertOpc, InsSubRegIdx; 4754 std::tie(InsertOpc, InsSubRegIdx) = 4755 getInsertVecEltOpInfo(FPRBank, ScalarTy.getSizeInBits()); 4756 4757 if (!Dst) 4758 Dst = MRI.createVirtualRegister(DstRC); 4759 auto InsElt = 4760 MIRBuilder 4761 .buildInstr(InsertOpc, {*Dst}, {WidenedOp1->getOperand(0).getReg()}) 4762 .addImm(1) /* Lane index */ 4763 .addUse(WidenedOp2->getOperand(0).getReg()) 4764 .addImm(0); 4765 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 4766 return &*InsElt; 4767 } 4768 4769 MachineInstr * 4770 AArch64InstructionSelector::emitCSINC(Register Dst, Register Src1, 4771 Register Src2, AArch64CC::CondCode Pred, 4772 MachineIRBuilder &MIRBuilder) const { 4773 auto &MRI = *MIRBuilder.getMRI(); 4774 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Dst); 4775 // If we used a register class, then this won't necessarily have an LLT. 4776 // Compute the size based off whether or not we have a class or bank. 4777 unsigned Size; 4778 if (const auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>()) 4779 Size = TRI.getRegSizeInBits(*RC); 4780 else 4781 Size = MRI.getType(Dst).getSizeInBits(); 4782 // Some opcodes use s1. 4783 assert(Size <= 64 && "Expected 64 bits or less only!"); 4784 static const unsigned OpcTable[2] = {AArch64::CSINCWr, AArch64::CSINCXr}; 4785 unsigned Opc = OpcTable[Size == 64]; 4786 auto CSINC = MIRBuilder.buildInstr(Opc, {Dst}, {Src1, Src2}).addImm(Pred); 4787 constrainSelectedInstRegOperands(*CSINC, TII, TRI, RBI); 4788 return &*CSINC; 4789 } 4790 4791 MachineInstr *AArch64InstructionSelector::emitCarryIn(MachineInstr &I, 4792 Register CarryReg) { 4793 MachineRegisterInfo *MRI = MIB.getMRI(); 4794 unsigned Opcode = I.getOpcode(); 4795 4796 // If the instruction is a SUB, we need to negate the carry, 4797 // because borrowing is indicated by carry-flag == 0. 4798 bool NeedsNegatedCarry = 4799 (Opcode == TargetOpcode::G_USUBE || Opcode == TargetOpcode::G_SSUBE); 4800 4801 // If the previous instruction will already produce the correct carry, do not 4802 // emit a carry generating instruction. E.g. for G_UADDE/G_USUBE sequences 4803 // generated during legalization of wide add/sub. This optimization depends on 4804 // these sequences not being interrupted by other instructions. 4805 MachineInstr *SrcMI = MRI->getVRegDef(CarryReg); 4806 if (SrcMI == I.getPrevNode()) { 4807 if (auto *CarrySrcMI = dyn_cast<GAddSubCarryOut>(SrcMI)) { 4808 bool ProducesNegatedCarry = CarrySrcMI->isSub(); 4809 if (NeedsNegatedCarry == ProducesNegatedCarry && CarrySrcMI->isUnsigned()) 4810 return nullptr; 4811 } 4812 } 4813 4814 Register DeadReg = MRI->createVirtualRegister(&AArch64::GPR32RegClass); 4815 4816 if (NeedsNegatedCarry) { 4817 // (0 - Carry) sets !C in NZCV when Carry == 1 4818 Register ZReg = AArch64::WZR; 4819 return emitInstr(AArch64::SUBSWrr, {DeadReg}, {ZReg, CarryReg}, MIB); 4820 } 4821 4822 // (Carry - 1) sets !C in NZCV when Carry == 0 4823 auto Fns = select12BitValueWithLeftShift(1); 4824 return emitInstr(AArch64::SUBSWri, {DeadReg}, {CarryReg}, MIB, Fns); 4825 } 4826 4827 bool AArch64InstructionSelector::selectOverflowOp(MachineInstr &I, 4828 MachineRegisterInfo &MRI) { 4829 auto &CarryMI = cast<GAddSubCarryOut>(I); 4830 4831 if (auto *CarryInMI = dyn_cast<GAddSubCarryInOut>(&I)) { 4832 // Set NZCV carry according to carry-in VReg 4833 emitCarryIn(I, CarryInMI->getCarryInReg()); 4834 } 4835 4836 // Emit the operation and get the correct condition code. 4837 auto OpAndCC = emitOverflowOp(I.getOpcode(), CarryMI.getDstReg(), 4838 CarryMI.getLHS(), CarryMI.getRHS(), MIB); 4839 4840 Register CarryOutReg = CarryMI.getCarryOutReg(); 4841 4842 // Don't convert carry-out to VReg if it is never used 4843 if (!MRI.use_nodbg_empty(CarryOutReg)) { 4844 // Now, put the overflow result in the register given by the first operand 4845 // to the overflow op. CSINC increments the result when the predicate is 4846 // false, so to get the increment when it's true, we need to use the 4847 // inverse. In this case, we want to increment when carry is set. 4848 Register ZReg = AArch64::WZR; 4849 emitCSINC(/*Dst=*/CarryOutReg, /*Src1=*/ZReg, /*Src2=*/ZReg, 4850 getInvertedCondCode(OpAndCC.second), MIB); 4851 } 4852 4853 I.eraseFromParent(); 4854 return true; 4855 } 4856 4857 std::pair<MachineInstr *, AArch64CC::CondCode> 4858 AArch64InstructionSelector::emitOverflowOp(unsigned Opcode, Register Dst, 4859 MachineOperand &LHS, 4860 MachineOperand &RHS, 4861 MachineIRBuilder &MIRBuilder) const { 4862 switch (Opcode) { 4863 default: 4864 llvm_unreachable("Unexpected opcode!"); 4865 case TargetOpcode::G_SADDO: 4866 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 4867 case TargetOpcode::G_UADDO: 4868 return std::make_pair(emitADDS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); 4869 case TargetOpcode::G_SSUBO: 4870 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 4871 case TargetOpcode::G_USUBO: 4872 return std::make_pair(emitSUBS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); 4873 case TargetOpcode::G_SADDE: 4874 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 4875 case TargetOpcode::G_UADDE: 4876 return std::make_pair(emitADCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::HS); 4877 case TargetOpcode::G_SSUBE: 4878 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::VS); 4879 case TargetOpcode::G_USUBE: 4880 return std::make_pair(emitSBCS(Dst, LHS, RHS, MIRBuilder), AArch64CC::LO); 4881 } 4882 } 4883 4884 /// Returns true if @p Val is a tree of AND/OR/CMP operations that can be 4885 /// expressed as a conjunction. 4886 /// \param CanNegate Set to true if we can negate the whole sub-tree just by 4887 /// changing the conditions on the CMP tests. 4888 /// (this means we can call emitConjunctionRec() with 4889 /// Negate==true on this sub-tree) 4890 /// \param MustBeFirst Set to true if this subtree needs to be negated and we 4891 /// cannot do the negation naturally. We are required to 4892 /// emit the subtree first in this case. 4893 /// \param WillNegate Is true if are called when the result of this 4894 /// subexpression must be negated. This happens when the 4895 /// outer expression is an OR. We can use this fact to know 4896 /// that we have a double negation (or (or ...) ...) that 4897 /// can be implemented for free. 4898 static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst, 4899 bool WillNegate, MachineRegisterInfo &MRI, 4900 unsigned Depth = 0) { 4901 if (!MRI.hasOneNonDBGUse(Val)) 4902 return false; 4903 MachineInstr *ValDef = MRI.getVRegDef(Val); 4904 unsigned Opcode = ValDef->getOpcode(); 4905 if (isa<GAnyCmp>(ValDef)) { 4906 CanNegate = true; 4907 MustBeFirst = false; 4908 return true; 4909 } 4910 // Protect against exponential runtime and stack overflow. 4911 if (Depth > 6) 4912 return false; 4913 if (Opcode == TargetOpcode::G_AND || Opcode == TargetOpcode::G_OR) { 4914 bool IsOR = Opcode == TargetOpcode::G_OR; 4915 Register O0 = ValDef->getOperand(1).getReg(); 4916 Register O1 = ValDef->getOperand(2).getReg(); 4917 bool CanNegateL; 4918 bool MustBeFirstL; 4919 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, MRI, Depth + 1)) 4920 return false; 4921 bool CanNegateR; 4922 bool MustBeFirstR; 4923 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, MRI, Depth + 1)) 4924 return false; 4925 4926 if (MustBeFirstL && MustBeFirstR) 4927 return false; 4928 4929 if (IsOR) { 4930 // For an OR expression we need to be able to naturally negate at least 4931 // one side or we cannot do the transformation at all. 4932 if (!CanNegateL && !CanNegateR) 4933 return false; 4934 // If we the result of the OR will be negated and we can naturally negate 4935 // the leaves, then this sub-tree as a whole negates naturally. 4936 CanNegate = WillNegate && CanNegateL && CanNegateR; 4937 // If we cannot naturally negate the whole sub-tree, then this must be 4938 // emitted first. 4939 MustBeFirst = !CanNegate; 4940 } else { 4941 assert(Opcode == TargetOpcode::G_AND && "Must be G_AND"); 4942 // We cannot naturally negate an AND operation. 4943 CanNegate = false; 4944 MustBeFirst = MustBeFirstL || MustBeFirstR; 4945 } 4946 return true; 4947 } 4948 return false; 4949 } 4950 4951 MachineInstr *AArch64InstructionSelector::emitConditionalComparison( 4952 Register LHS, Register RHS, CmpInst::Predicate CC, 4953 AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC, 4954 MachineIRBuilder &MIB) const { 4955 // TODO: emit CMN as an optimization. 4956 auto &MRI = *MIB.getMRI(); 4957 LLT OpTy = MRI.getType(LHS); 4958 assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64); 4959 unsigned CCmpOpc; 4960 std::optional<ValueAndVReg> C; 4961 if (CmpInst::isIntPredicate(CC)) { 4962 C = getIConstantVRegValWithLookThrough(RHS, MRI); 4963 if (C && C->Value.ult(32)) 4964 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi; 4965 else 4966 CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr; 4967 } else { 4968 switch (OpTy.getSizeInBits()) { 4969 case 16: 4970 CCmpOpc = AArch64::FCCMPHrr; 4971 break; 4972 case 32: 4973 CCmpOpc = AArch64::FCCMPSrr; 4974 break; 4975 case 64: 4976 CCmpOpc = AArch64::FCCMPDrr; 4977 break; 4978 default: 4979 return nullptr; 4980 } 4981 } 4982 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC); 4983 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC); 4984 auto CCmp = 4985 MIB.buildInstr(CCmpOpc, {}, {LHS}); 4986 if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi) 4987 CCmp.addImm(C->Value.getZExtValue()); 4988 else 4989 CCmp.addReg(RHS); 4990 CCmp.addImm(NZCV).addImm(Predicate); 4991 constrainSelectedInstRegOperands(*CCmp, TII, TRI, RBI); 4992 return &*CCmp; 4993 } 4994 4995 MachineInstr *AArch64InstructionSelector::emitConjunctionRec( 4996 Register Val, AArch64CC::CondCode &OutCC, bool Negate, Register CCOp, 4997 AArch64CC::CondCode Predicate, MachineIRBuilder &MIB) const { 4998 // We're at a tree leaf, produce a conditional comparison operation. 4999 auto &MRI = *MIB.getMRI(); 5000 MachineInstr *ValDef = MRI.getVRegDef(Val); 5001 unsigned Opcode = ValDef->getOpcode(); 5002 if (auto *Cmp = dyn_cast<GAnyCmp>(ValDef)) { 5003 Register LHS = Cmp->getLHSReg(); 5004 Register RHS = Cmp->getRHSReg(); 5005 CmpInst::Predicate CC = Cmp->getCond(); 5006 if (Negate) 5007 CC = CmpInst::getInversePredicate(CC); 5008 if (isa<GICmp>(Cmp)) { 5009 OutCC = changeICMPPredToAArch64CC(CC); 5010 } else { 5011 // Handle special FP cases. 5012 AArch64CC::CondCode ExtraCC; 5013 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC); 5014 // Some floating point conditions can't be tested with a single condition 5015 // code. Construct an additional comparison in this case. 5016 if (ExtraCC != AArch64CC::AL) { 5017 MachineInstr *ExtraCmp; 5018 if (!CCOp) 5019 ExtraCmp = emitFPCompare(LHS, RHS, MIB, CC); 5020 else 5021 ExtraCmp = 5022 emitConditionalComparison(LHS, RHS, CC, Predicate, ExtraCC, MIB); 5023 CCOp = ExtraCmp->getOperand(0).getReg(); 5024 Predicate = ExtraCC; 5025 } 5026 } 5027 5028 // Produce a normal comparison if we are first in the chain 5029 if (!CCOp) { 5030 auto Dst = MRI.cloneVirtualRegister(LHS); 5031 if (isa<GICmp>(Cmp)) 5032 return emitSUBS(Dst, Cmp->getOperand(2), Cmp->getOperand(3), MIB); 5033 return emitFPCompare(Cmp->getOperand(2).getReg(), 5034 Cmp->getOperand(3).getReg(), MIB); 5035 } 5036 // Otherwise produce a ccmp. 5037 return emitConditionalComparison(LHS, RHS, CC, Predicate, OutCC, MIB); 5038 } 5039 assert(MRI.hasOneNonDBGUse(Val) && "Valid conjunction/disjunction tree"); 5040 5041 bool IsOR = Opcode == TargetOpcode::G_OR; 5042 5043 Register LHS = ValDef->getOperand(1).getReg(); 5044 bool CanNegateL; 5045 bool MustBeFirstL; 5046 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR, MRI); 5047 assert(ValidL && "Valid conjunction/disjunction tree"); 5048 (void)ValidL; 5049 5050 Register RHS = ValDef->getOperand(2).getReg(); 5051 bool CanNegateR; 5052 bool MustBeFirstR; 5053 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR, MRI); 5054 assert(ValidR && "Valid conjunction/disjunction tree"); 5055 (void)ValidR; 5056 5057 // Swap sub-tree that must come first to the right side. 5058 if (MustBeFirstL) { 5059 assert(!MustBeFirstR && "Valid conjunction/disjunction tree"); 5060 std::swap(LHS, RHS); 5061 std::swap(CanNegateL, CanNegateR); 5062 std::swap(MustBeFirstL, MustBeFirstR); 5063 } 5064 5065 bool NegateR; 5066 bool NegateAfterR; 5067 bool NegateL; 5068 bool NegateAfterAll; 5069 if (Opcode == TargetOpcode::G_OR) { 5070 // Swap the sub-tree that we can negate naturally to the left. 5071 if (!CanNegateL) { 5072 assert(CanNegateR && "at least one side must be negatable"); 5073 assert(!MustBeFirstR && "invalid conjunction/disjunction tree"); 5074 assert(!Negate); 5075 std::swap(LHS, RHS); 5076 NegateR = false; 5077 NegateAfterR = true; 5078 } else { 5079 // Negate the left sub-tree if possible, otherwise negate the result. 5080 NegateR = CanNegateR; 5081 NegateAfterR = !CanNegateR; 5082 } 5083 NegateL = true; 5084 NegateAfterAll = !Negate; 5085 } else { 5086 assert(Opcode == TargetOpcode::G_AND && 5087 "Valid conjunction/disjunction tree"); 5088 assert(!Negate && "Valid conjunction/disjunction tree"); 5089 5090 NegateL = false; 5091 NegateR = false; 5092 NegateAfterR = false; 5093 NegateAfterAll = false; 5094 } 5095 5096 // Emit sub-trees. 5097 AArch64CC::CondCode RHSCC; 5098 MachineInstr *CmpR = 5099 emitConjunctionRec(RHS, RHSCC, NegateR, CCOp, Predicate, MIB); 5100 if (NegateAfterR) 5101 RHSCC = AArch64CC::getInvertedCondCode(RHSCC); 5102 MachineInstr *CmpL = emitConjunctionRec( 5103 LHS, OutCC, NegateL, CmpR->getOperand(0).getReg(), RHSCC, MIB); 5104 if (NegateAfterAll) 5105 OutCC = AArch64CC::getInvertedCondCode(OutCC); 5106 return CmpL; 5107 } 5108 5109 MachineInstr *AArch64InstructionSelector::emitConjunction( 5110 Register Val, AArch64CC::CondCode &OutCC, MachineIRBuilder &MIB) const { 5111 bool DummyCanNegate; 5112 bool DummyMustBeFirst; 5113 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false, 5114 *MIB.getMRI())) 5115 return nullptr; 5116 return emitConjunctionRec(Val, OutCC, false, Register(), AArch64CC::AL, MIB); 5117 } 5118 5119 bool AArch64InstructionSelector::tryOptSelectConjunction(GSelect &SelI, 5120 MachineInstr &CondMI) { 5121 AArch64CC::CondCode AArch64CC; 5122 MachineInstr *ConjMI = emitConjunction(SelI.getCondReg(), AArch64CC, MIB); 5123 if (!ConjMI) 5124 return false; 5125 5126 emitSelect(SelI.getReg(0), SelI.getTrueReg(), SelI.getFalseReg(), AArch64CC, MIB); 5127 SelI.eraseFromParent(); 5128 return true; 5129 } 5130 5131 bool AArch64InstructionSelector::tryOptSelect(GSelect &I) { 5132 MachineRegisterInfo &MRI = *MIB.getMRI(); 5133 // We want to recognize this pattern: 5134 // 5135 // $z = G_FCMP pred, $x, $y 5136 // ... 5137 // $w = G_SELECT $z, $a, $b 5138 // 5139 // Where the value of $z is *only* ever used by the G_SELECT (possibly with 5140 // some copies/truncs in between.) 5141 // 5142 // If we see this, then we can emit something like this: 5143 // 5144 // fcmp $x, $y 5145 // fcsel $w, $a, $b, pred 5146 // 5147 // Rather than emitting both of the rather long sequences in the standard 5148 // G_FCMP/G_SELECT select methods. 5149 5150 // First, check if the condition is defined by a compare. 5151 MachineInstr *CondDef = MRI.getVRegDef(I.getOperand(1).getReg()); 5152 5153 // We can only fold if all of the defs have one use. 5154 Register CondDefReg = CondDef->getOperand(0).getReg(); 5155 if (!MRI.hasOneNonDBGUse(CondDefReg)) { 5156 // Unless it's another select. 5157 for (const MachineInstr &UI : MRI.use_nodbg_instructions(CondDefReg)) { 5158 if (CondDef == &UI) 5159 continue; 5160 if (UI.getOpcode() != TargetOpcode::G_SELECT) 5161 return false; 5162 } 5163 } 5164 5165 // Is the condition defined by a compare? 5166 unsigned CondOpc = CondDef->getOpcode(); 5167 if (CondOpc != TargetOpcode::G_ICMP && CondOpc != TargetOpcode::G_FCMP) { 5168 if (tryOptSelectConjunction(I, *CondDef)) 5169 return true; 5170 return false; 5171 } 5172 5173 AArch64CC::CondCode CondCode; 5174 if (CondOpc == TargetOpcode::G_ICMP) { 5175 auto Pred = 5176 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 5177 CondCode = changeICMPPredToAArch64CC(Pred); 5178 emitIntegerCompare(CondDef->getOperand(2), CondDef->getOperand(3), 5179 CondDef->getOperand(1), MIB); 5180 } else { 5181 // Get the condition code for the select. 5182 auto Pred = 5183 static_cast<CmpInst::Predicate>(CondDef->getOperand(1).getPredicate()); 5184 AArch64CC::CondCode CondCode2; 5185 changeFCMPPredToAArch64CC(Pred, CondCode, CondCode2); 5186 5187 // changeFCMPPredToAArch64CC sets CondCode2 to AL when we require two 5188 // instructions to emit the comparison. 5189 // TODO: Handle FCMP_UEQ and FCMP_ONE. After that, this check will be 5190 // unnecessary. 5191 if (CondCode2 != AArch64CC::AL) 5192 return false; 5193 5194 if (!emitFPCompare(CondDef->getOperand(2).getReg(), 5195 CondDef->getOperand(3).getReg(), MIB)) { 5196 LLVM_DEBUG(dbgs() << "Couldn't emit compare for select!\n"); 5197 return false; 5198 } 5199 } 5200 5201 // Emit the select. 5202 emitSelect(I.getOperand(0).getReg(), I.getOperand(2).getReg(), 5203 I.getOperand(3).getReg(), CondCode, MIB); 5204 I.eraseFromParent(); 5205 return true; 5206 } 5207 5208 MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare( 5209 MachineOperand &LHS, MachineOperand &RHS, MachineOperand &Predicate, 5210 MachineIRBuilder &MIRBuilder) const { 5211 assert(LHS.isReg() && RHS.isReg() && Predicate.isPredicate() && 5212 "Unexpected MachineOperand"); 5213 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 5214 // We want to find this sort of thing: 5215 // x = G_SUB 0, y 5216 // G_ICMP z, x 5217 // 5218 // In this case, we can fold the G_SUB into the G_ICMP using a CMN instead. 5219 // e.g: 5220 // 5221 // cmn z, y 5222 5223 // Check if the RHS or LHS of the G_ICMP is defined by a SUB 5224 MachineInstr *LHSDef = getDefIgnoringCopies(LHS.getReg(), MRI); 5225 MachineInstr *RHSDef = getDefIgnoringCopies(RHS.getReg(), MRI); 5226 auto P = static_cast<CmpInst::Predicate>(Predicate.getPredicate()); 5227 // Given this: 5228 // 5229 // x = G_SUB 0, y 5230 // G_ICMP x, z 5231 // 5232 // Produce this: 5233 // 5234 // cmn y, z 5235 if (isCMN(LHSDef, P, MRI)) 5236 return emitCMN(LHSDef->getOperand(2), RHS, MIRBuilder); 5237 5238 // Same idea here, but with the RHS of the compare instead: 5239 // 5240 // Given this: 5241 // 5242 // x = G_SUB 0, y 5243 // G_ICMP z, x 5244 // 5245 // Produce this: 5246 // 5247 // cmn z, y 5248 if (isCMN(RHSDef, P, MRI)) 5249 return emitCMN(LHS, RHSDef->getOperand(2), MIRBuilder); 5250 5251 // Given this: 5252 // 5253 // z = G_AND x, y 5254 // G_ICMP z, 0 5255 // 5256 // Produce this if the compare is signed: 5257 // 5258 // tst x, y 5259 if (!CmpInst::isUnsigned(P) && LHSDef && 5260 LHSDef->getOpcode() == TargetOpcode::G_AND) { 5261 // Make sure that the RHS is 0. 5262 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI); 5263 if (!ValAndVReg || ValAndVReg->Value != 0) 5264 return nullptr; 5265 5266 return emitTST(LHSDef->getOperand(1), 5267 LHSDef->getOperand(2), MIRBuilder); 5268 } 5269 5270 return nullptr; 5271 } 5272 5273 bool AArch64InstructionSelector::selectShuffleVector( 5274 MachineInstr &I, MachineRegisterInfo &MRI) { 5275 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 5276 Register Src1Reg = I.getOperand(1).getReg(); 5277 const LLT Src1Ty = MRI.getType(Src1Reg); 5278 Register Src2Reg = I.getOperand(2).getReg(); 5279 const LLT Src2Ty = MRI.getType(Src2Reg); 5280 ArrayRef<int> Mask = I.getOperand(3).getShuffleMask(); 5281 5282 MachineBasicBlock &MBB = *I.getParent(); 5283 MachineFunction &MF = *MBB.getParent(); 5284 LLVMContext &Ctx = MF.getFunction().getContext(); 5285 5286 // G_SHUFFLE_VECTOR is weird in that the source operands can be scalars, if 5287 // it's originated from a <1 x T> type. Those should have been lowered into 5288 // G_BUILD_VECTOR earlier. 5289 if (!Src1Ty.isVector() || !Src2Ty.isVector()) { 5290 LLVM_DEBUG(dbgs() << "Could not select a \"scalar\" G_SHUFFLE_VECTOR\n"); 5291 return false; 5292 } 5293 5294 unsigned BytesPerElt = DstTy.getElementType().getSizeInBits() / 8; 5295 5296 SmallVector<Constant *, 64> CstIdxs; 5297 for (int Val : Mask) { 5298 // For now, any undef indexes we'll just assume to be 0. This should be 5299 // optimized in future, e.g. to select DUP etc. 5300 Val = Val < 0 ? 0 : Val; 5301 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) { 5302 unsigned Offset = Byte + Val * BytesPerElt; 5303 CstIdxs.emplace_back(ConstantInt::get(Type::getInt8Ty(Ctx), Offset)); 5304 } 5305 } 5306 5307 // Use a constant pool to load the index vector for TBL. 5308 Constant *CPVal = ConstantVector::get(CstIdxs); 5309 MachineInstr *IndexLoad = emitLoadFromConstantPool(CPVal, MIB); 5310 if (!IndexLoad) { 5311 LLVM_DEBUG(dbgs() << "Could not load from a constant pool"); 5312 return false; 5313 } 5314 5315 if (DstTy.getSizeInBits() != 128) { 5316 assert(DstTy.getSizeInBits() == 64 && "Unexpected shuffle result ty"); 5317 // This case can be done with TBL1. 5318 MachineInstr *Concat = 5319 emitVectorConcat(std::nullopt, Src1Reg, Src2Reg, MIB); 5320 if (!Concat) { 5321 LLVM_DEBUG(dbgs() << "Could not do vector concat for tbl1"); 5322 return false; 5323 } 5324 5325 // The constant pool load will be 64 bits, so need to convert to FPR128 reg. 5326 IndexLoad = emitScalarToVector(64, &AArch64::FPR128RegClass, 5327 IndexLoad->getOperand(0).getReg(), MIB); 5328 5329 auto TBL1 = MIB.buildInstr( 5330 AArch64::TBLv16i8One, {&AArch64::FPR128RegClass}, 5331 {Concat->getOperand(0).getReg(), IndexLoad->getOperand(0).getReg()}); 5332 constrainSelectedInstRegOperands(*TBL1, TII, TRI, RBI); 5333 5334 auto Copy = 5335 MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(0).getReg()}, {}) 5336 .addReg(TBL1.getReg(0), 0, AArch64::dsub); 5337 RBI.constrainGenericRegister(Copy.getReg(0), AArch64::FPR64RegClass, MRI); 5338 I.eraseFromParent(); 5339 return true; 5340 } 5341 5342 // For TBL2 we need to emit a REG_SEQUENCE to tie together two consecutive 5343 // Q registers for regalloc. 5344 SmallVector<Register, 2> Regs = {Src1Reg, Src2Reg}; 5345 auto RegSeq = createQTuple(Regs, MIB); 5346 auto TBL2 = MIB.buildInstr(AArch64::TBLv16i8Two, {I.getOperand(0)}, 5347 {RegSeq, IndexLoad->getOperand(0)}); 5348 constrainSelectedInstRegOperands(*TBL2, TII, TRI, RBI); 5349 I.eraseFromParent(); 5350 return true; 5351 } 5352 5353 MachineInstr *AArch64InstructionSelector::emitLaneInsert( 5354 std::optional<Register> DstReg, Register SrcReg, Register EltReg, 5355 unsigned LaneIdx, const RegisterBank &RB, 5356 MachineIRBuilder &MIRBuilder) const { 5357 MachineInstr *InsElt = nullptr; 5358 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 5359 MachineRegisterInfo &MRI = *MIRBuilder.getMRI(); 5360 5361 // Create a register to define with the insert if one wasn't passed in. 5362 if (!DstReg) 5363 DstReg = MRI.createVirtualRegister(DstRC); 5364 5365 unsigned EltSize = MRI.getType(EltReg).getSizeInBits(); 5366 unsigned Opc = getInsertVecEltOpInfo(RB, EltSize).first; 5367 5368 if (RB.getID() == AArch64::FPRRegBankID) { 5369 auto InsSub = emitScalarToVector(EltSize, DstRC, EltReg, MIRBuilder); 5370 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 5371 .addImm(LaneIdx) 5372 .addUse(InsSub->getOperand(0).getReg()) 5373 .addImm(0); 5374 } else { 5375 InsElt = MIRBuilder.buildInstr(Opc, {*DstReg}, {SrcReg}) 5376 .addImm(LaneIdx) 5377 .addUse(EltReg); 5378 } 5379 5380 constrainSelectedInstRegOperands(*InsElt, TII, TRI, RBI); 5381 return InsElt; 5382 } 5383 5384 bool AArch64InstructionSelector::selectUSMovFromExtend( 5385 MachineInstr &MI, MachineRegisterInfo &MRI) { 5386 if (MI.getOpcode() != TargetOpcode::G_SEXT && 5387 MI.getOpcode() != TargetOpcode::G_ZEXT && 5388 MI.getOpcode() != TargetOpcode::G_ANYEXT) 5389 return false; 5390 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SEXT; 5391 const Register DefReg = MI.getOperand(0).getReg(); 5392 const LLT DstTy = MRI.getType(DefReg); 5393 unsigned DstSize = DstTy.getSizeInBits(); 5394 5395 if (DstSize != 32 && DstSize != 64) 5396 return false; 5397 5398 MachineInstr *Extract = getOpcodeDef(TargetOpcode::G_EXTRACT_VECTOR_ELT, 5399 MI.getOperand(1).getReg(), MRI); 5400 int64_t Lane; 5401 if (!Extract || !mi_match(Extract->getOperand(2).getReg(), MRI, m_ICst(Lane))) 5402 return false; 5403 Register Src0 = Extract->getOperand(1).getReg(); 5404 5405 const LLT &VecTy = MRI.getType(Src0); 5406 5407 if (VecTy.getSizeInBits() != 128) { 5408 const MachineInstr *ScalarToVector = emitScalarToVector( 5409 VecTy.getSizeInBits(), &AArch64::FPR128RegClass, Src0, MIB); 5410 assert(ScalarToVector && "Didn't expect emitScalarToVector to fail!"); 5411 Src0 = ScalarToVector->getOperand(0).getReg(); 5412 } 5413 5414 unsigned Opcode; 5415 if (DstSize == 64 && VecTy.getScalarSizeInBits() == 32) 5416 Opcode = IsSigned ? AArch64::SMOVvi32to64 : AArch64::UMOVvi32; 5417 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 16) 5418 Opcode = IsSigned ? AArch64::SMOVvi16to64 : AArch64::UMOVvi16; 5419 else if (DstSize == 64 && VecTy.getScalarSizeInBits() == 8) 5420 Opcode = IsSigned ? AArch64::SMOVvi8to64 : AArch64::UMOVvi8; 5421 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 16) 5422 Opcode = IsSigned ? AArch64::SMOVvi16to32 : AArch64::UMOVvi16; 5423 else if (DstSize == 32 && VecTy.getScalarSizeInBits() == 8) 5424 Opcode = IsSigned ? AArch64::SMOVvi8to32 : AArch64::UMOVvi8; 5425 else 5426 llvm_unreachable("Unexpected type combo for S/UMov!"); 5427 5428 // We may need to generate one of these, depending on the type and sign of the 5429 // input: 5430 // DstReg = SMOV Src0, Lane; 5431 // NewReg = UMOV Src0, Lane; DstReg = SUBREG_TO_REG NewReg, sub_32; 5432 MachineInstr *ExtI = nullptr; 5433 if (DstSize == 64 && !IsSigned) { 5434 Register NewReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass); 5435 MIB.buildInstr(Opcode, {NewReg}, {Src0}).addImm(Lane); 5436 ExtI = MIB.buildInstr(AArch64::SUBREG_TO_REG, {DefReg}, {}) 5437 .addImm(0) 5438 .addUse(NewReg) 5439 .addImm(AArch64::sub_32); 5440 RBI.constrainGenericRegister(DefReg, AArch64::GPR64RegClass, MRI); 5441 } else 5442 ExtI = MIB.buildInstr(Opcode, {DefReg}, {Src0}).addImm(Lane); 5443 5444 constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI); 5445 MI.eraseFromParent(); 5446 return true; 5447 } 5448 5449 bool AArch64InstructionSelector::selectInsertElt(MachineInstr &I, 5450 MachineRegisterInfo &MRI) { 5451 assert(I.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); 5452 5453 // Get information on the destination. 5454 Register DstReg = I.getOperand(0).getReg(); 5455 const LLT DstTy = MRI.getType(DstReg); 5456 unsigned VecSize = DstTy.getSizeInBits(); 5457 5458 // Get information on the element we want to insert into the destination. 5459 Register EltReg = I.getOperand(2).getReg(); 5460 const LLT EltTy = MRI.getType(EltReg); 5461 unsigned EltSize = EltTy.getSizeInBits(); 5462 if (EltSize < 8 || EltSize > 64) 5463 return false; 5464 5465 // Find the definition of the index. Bail out if it's not defined by a 5466 // G_CONSTANT. 5467 Register IdxReg = I.getOperand(3).getReg(); 5468 auto VRegAndVal = getIConstantVRegValWithLookThrough(IdxReg, MRI); 5469 if (!VRegAndVal) 5470 return false; 5471 unsigned LaneIdx = VRegAndVal->Value.getSExtValue(); 5472 5473 // Perform the lane insert. 5474 Register SrcReg = I.getOperand(1).getReg(); 5475 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); 5476 5477 if (VecSize < 128) { 5478 // If the vector we're inserting into is smaller than 128 bits, widen it 5479 // to 128 to do the insert. 5480 MachineInstr *ScalarToVec = 5481 emitScalarToVector(VecSize, &AArch64::FPR128RegClass, SrcReg, MIB); 5482 if (!ScalarToVec) 5483 return false; 5484 SrcReg = ScalarToVec->getOperand(0).getReg(); 5485 } 5486 5487 // Create an insert into a new FPR128 register. 5488 // Note that if our vector is already 128 bits, we end up emitting an extra 5489 // register. 5490 MachineInstr *InsMI = 5491 emitLaneInsert(std::nullopt, SrcReg, EltReg, LaneIdx, EltRB, MIB); 5492 5493 if (VecSize < 128) { 5494 // If we had to widen to perform the insert, then we have to demote back to 5495 // the original size to get the result we want. 5496 Register DemoteVec = InsMI->getOperand(0).getReg(); 5497 const TargetRegisterClass *RC = 5498 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DemoteVec, MRI, TRI)); 5499 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 5500 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 5501 return false; 5502 } 5503 unsigned SubReg = 0; 5504 if (!getSubRegForClass(RC, TRI, SubReg)) 5505 return false; 5506 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 5507 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << VecSize 5508 << "\n"); 5509 return false; 5510 } 5511 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}) 5512 .addReg(DemoteVec, 0, SubReg); 5513 RBI.constrainGenericRegister(DstReg, *RC, MRI); 5514 } else { 5515 // No widening needed. 5516 InsMI->getOperand(0).setReg(DstReg); 5517 constrainSelectedInstRegOperands(*InsMI, TII, TRI, RBI); 5518 } 5519 5520 I.eraseFromParent(); 5521 return true; 5522 } 5523 5524 MachineInstr * 5525 AArch64InstructionSelector::emitConstantVector(Register Dst, Constant *CV, 5526 MachineIRBuilder &MIRBuilder, 5527 MachineRegisterInfo &MRI) { 5528 LLT DstTy = MRI.getType(Dst); 5529 unsigned DstSize = DstTy.getSizeInBits(); 5530 if (CV->isNullValue()) { 5531 if (DstSize == 128) { 5532 auto Mov = 5533 MIRBuilder.buildInstr(AArch64::MOVIv2d_ns, {Dst}, {}).addImm(0); 5534 constrainSelectedInstRegOperands(*Mov, TII, TRI, RBI); 5535 return &*Mov; 5536 } 5537 5538 if (DstSize == 64) { 5539 auto Mov = 5540 MIRBuilder 5541 .buildInstr(AArch64::MOVIv2d_ns, {&AArch64::FPR128RegClass}, {}) 5542 .addImm(0); 5543 auto Copy = MIRBuilder.buildInstr(TargetOpcode::COPY, {Dst}, {}) 5544 .addReg(Mov.getReg(0), 0, AArch64::dsub); 5545 RBI.constrainGenericRegister(Dst, AArch64::FPR64RegClass, MRI); 5546 return &*Copy; 5547 } 5548 } 5549 5550 auto *CPLoad = emitLoadFromConstantPool(CV, MIRBuilder); 5551 if (!CPLoad) { 5552 LLVM_DEBUG(dbgs() << "Could not generate cp load for constant vector!"); 5553 return nullptr; 5554 } 5555 5556 auto Copy = MIRBuilder.buildCopy(Dst, CPLoad->getOperand(0)); 5557 RBI.constrainGenericRegister( 5558 Dst, *MRI.getRegClass(CPLoad->getOperand(0).getReg()), MRI); 5559 return &*Copy; 5560 } 5561 5562 bool AArch64InstructionSelector::tryOptConstantBuildVec( 5563 MachineInstr &I, LLT DstTy, MachineRegisterInfo &MRI) { 5564 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 5565 unsigned DstSize = DstTy.getSizeInBits(); 5566 assert(DstSize <= 128 && "Unexpected build_vec type!"); 5567 if (DstSize < 32) 5568 return false; 5569 // Check if we're building a constant vector, in which case we want to 5570 // generate a constant pool load instead of a vector insert sequence. 5571 SmallVector<Constant *, 16> Csts; 5572 for (unsigned Idx = 1; Idx < I.getNumOperands(); ++Idx) { 5573 // Try to find G_CONSTANT or G_FCONSTANT 5574 auto *OpMI = 5575 getOpcodeDef(TargetOpcode::G_CONSTANT, I.getOperand(Idx).getReg(), MRI); 5576 if (OpMI) 5577 Csts.emplace_back( 5578 const_cast<ConstantInt *>(OpMI->getOperand(1).getCImm())); 5579 else if ((OpMI = getOpcodeDef(TargetOpcode::G_FCONSTANT, 5580 I.getOperand(Idx).getReg(), MRI))) 5581 Csts.emplace_back( 5582 const_cast<ConstantFP *>(OpMI->getOperand(1).getFPImm())); 5583 else 5584 return false; 5585 } 5586 Constant *CV = ConstantVector::get(Csts); 5587 if (!emitConstantVector(I.getOperand(0).getReg(), CV, MIB, MRI)) 5588 return false; 5589 I.eraseFromParent(); 5590 return true; 5591 } 5592 5593 bool AArch64InstructionSelector::tryOptBuildVecToSubregToReg( 5594 MachineInstr &I, MachineRegisterInfo &MRI) { 5595 // Given: 5596 // %vec = G_BUILD_VECTOR %elt, %undef, %undef, ... %undef 5597 // 5598 // Select the G_BUILD_VECTOR as a SUBREG_TO_REG from %elt. 5599 Register Dst = I.getOperand(0).getReg(); 5600 Register EltReg = I.getOperand(1).getReg(); 5601 LLT EltTy = MRI.getType(EltReg); 5602 // If the index isn't on the same bank as its elements, then this can't be a 5603 // SUBREG_TO_REG. 5604 const RegisterBank &EltRB = *RBI.getRegBank(EltReg, MRI, TRI); 5605 const RegisterBank &DstRB = *RBI.getRegBank(Dst, MRI, TRI); 5606 if (EltRB != DstRB) 5607 return false; 5608 if (any_of(make_range(I.operands_begin() + 2, I.operands_end()), 5609 [&MRI](const MachineOperand &Op) { 5610 return !getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, Op.getReg(), 5611 MRI); 5612 })) 5613 return false; 5614 unsigned SubReg; 5615 const TargetRegisterClass *EltRC = getRegClassForTypeOnBank(EltTy, EltRB); 5616 if (!EltRC) 5617 return false; 5618 const TargetRegisterClass *DstRC = 5619 getRegClassForTypeOnBank(MRI.getType(Dst), DstRB); 5620 if (!DstRC) 5621 return false; 5622 if (!getSubRegForClass(EltRC, TRI, SubReg)) 5623 return false; 5624 auto SubregToReg = MIB.buildInstr(AArch64::SUBREG_TO_REG, {Dst}, {}) 5625 .addImm(0) 5626 .addUse(EltReg) 5627 .addImm(SubReg); 5628 I.eraseFromParent(); 5629 constrainSelectedInstRegOperands(*SubregToReg, TII, TRI, RBI); 5630 return RBI.constrainGenericRegister(Dst, *DstRC, MRI); 5631 } 5632 5633 bool AArch64InstructionSelector::selectBuildVector(MachineInstr &I, 5634 MachineRegisterInfo &MRI) { 5635 assert(I.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 5636 // Until we port more of the optimized selections, for now just use a vector 5637 // insert sequence. 5638 const LLT DstTy = MRI.getType(I.getOperand(0).getReg()); 5639 const LLT EltTy = MRI.getType(I.getOperand(1).getReg()); 5640 unsigned EltSize = EltTy.getSizeInBits(); 5641 5642 if (tryOptConstantBuildVec(I, DstTy, MRI)) 5643 return true; 5644 if (tryOptBuildVecToSubregToReg(I, MRI)) 5645 return true; 5646 5647 if (EltSize != 8 && EltSize != 16 && EltSize != 32 && EltSize != 64) 5648 return false; // Don't support all element types yet. 5649 const RegisterBank &RB = *RBI.getRegBank(I.getOperand(1).getReg(), MRI, TRI); 5650 5651 const TargetRegisterClass *DstRC = &AArch64::FPR128RegClass; 5652 MachineInstr *ScalarToVec = 5653 emitScalarToVector(DstTy.getElementType().getSizeInBits(), DstRC, 5654 I.getOperand(1).getReg(), MIB); 5655 if (!ScalarToVec) 5656 return false; 5657 5658 Register DstVec = ScalarToVec->getOperand(0).getReg(); 5659 unsigned DstSize = DstTy.getSizeInBits(); 5660 5661 // Keep track of the last MI we inserted. Later on, we might be able to save 5662 // a copy using it. 5663 MachineInstr *PrevMI = nullptr; 5664 for (unsigned i = 2, e = DstSize / EltSize + 1; i < e; ++i) { 5665 // Note that if we don't do a subregister copy, we can end up making an 5666 // extra register. 5667 PrevMI = &*emitLaneInsert(std::nullopt, DstVec, I.getOperand(i).getReg(), 5668 i - 1, RB, MIB); 5669 DstVec = PrevMI->getOperand(0).getReg(); 5670 } 5671 5672 // If DstTy's size in bits is less than 128, then emit a subregister copy 5673 // from DstVec to the last register we've defined. 5674 if (DstSize < 128) { 5675 // Force this to be FPR using the destination vector. 5676 const TargetRegisterClass *RC = 5677 getRegClassForTypeOnBank(DstTy, *RBI.getRegBank(DstVec, MRI, TRI)); 5678 if (!RC) 5679 return false; 5680 if (RC != &AArch64::FPR32RegClass && RC != &AArch64::FPR64RegClass) { 5681 LLVM_DEBUG(dbgs() << "Unsupported register class!\n"); 5682 return false; 5683 } 5684 5685 unsigned SubReg = 0; 5686 if (!getSubRegForClass(RC, TRI, SubReg)) 5687 return false; 5688 if (SubReg != AArch64::ssub && SubReg != AArch64::dsub) { 5689 LLVM_DEBUG(dbgs() << "Unsupported destination size! (" << DstSize 5690 << "\n"); 5691 return false; 5692 } 5693 5694 Register Reg = MRI.createVirtualRegister(RC); 5695 Register DstReg = I.getOperand(0).getReg(); 5696 5697 MIB.buildInstr(TargetOpcode::COPY, {DstReg}, {}).addReg(DstVec, 0, SubReg); 5698 MachineOperand &RegOp = I.getOperand(1); 5699 RegOp.setReg(Reg); 5700 RBI.constrainGenericRegister(DstReg, *RC, MRI); 5701 } else { 5702 // We don't need a subregister copy. Save a copy by re-using the 5703 // destination register on the final insert. 5704 assert(PrevMI && "PrevMI was null?"); 5705 PrevMI->getOperand(0).setReg(I.getOperand(0).getReg()); 5706 constrainSelectedInstRegOperands(*PrevMI, TII, TRI, RBI); 5707 } 5708 5709 I.eraseFromParent(); 5710 return true; 5711 } 5712 5713 bool AArch64InstructionSelector::selectVectorLoadIntrinsic(unsigned Opc, 5714 unsigned NumVecs, 5715 MachineInstr &I) { 5716 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); 5717 assert(Opc && "Expected an opcode?"); 5718 assert(NumVecs > 1 && NumVecs < 5 && "Only support 2, 3, or 4 vectors"); 5719 auto &MRI = *MIB.getMRI(); 5720 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 5721 unsigned Size = Ty.getSizeInBits(); 5722 assert((Size == 64 || Size == 128) && 5723 "Destination must be 64 bits or 128 bits?"); 5724 unsigned SubReg = Size == 64 ? AArch64::dsub0 : AArch64::qsub0; 5725 auto Ptr = I.getOperand(I.getNumOperands() - 1).getReg(); 5726 assert(MRI.getType(Ptr).isPointer() && "Expected a pointer type?"); 5727 auto Load = MIB.buildInstr(Opc, {Ty}, {Ptr}); 5728 Load.cloneMemRefs(I); 5729 constrainSelectedInstRegOperands(*Load, TII, TRI, RBI); 5730 Register SelectedLoadDst = Load->getOperand(0).getReg(); 5731 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) { 5732 auto Vec = MIB.buildInstr(TargetOpcode::COPY, {I.getOperand(Idx)}, {}) 5733 .addReg(SelectedLoadDst, 0, SubReg + Idx); 5734 // Emit the subreg copies and immediately select them. 5735 // FIXME: We should refactor our copy code into an emitCopy helper and 5736 // clean up uses of this pattern elsewhere in the selector. 5737 selectCopy(*Vec, TII, MRI, TRI, RBI); 5738 } 5739 return true; 5740 } 5741 5742 bool AArch64InstructionSelector::selectIntrinsicWithSideEffects( 5743 MachineInstr &I, MachineRegisterInfo &MRI) { 5744 // Find the intrinsic ID. 5745 unsigned IntrinID = I.getIntrinsicID(); 5746 5747 const LLT S8 = LLT::scalar(8); 5748 const LLT S16 = LLT::scalar(16); 5749 const LLT S32 = LLT::scalar(32); 5750 const LLT S64 = LLT::scalar(64); 5751 const LLT P0 = LLT::pointer(0, 64); 5752 // Select the instruction. 5753 switch (IntrinID) { 5754 default: 5755 return false; 5756 case Intrinsic::aarch64_ldxp: 5757 case Intrinsic::aarch64_ldaxp: { 5758 auto NewI = MIB.buildInstr( 5759 IntrinID == Intrinsic::aarch64_ldxp ? AArch64::LDXPX : AArch64::LDAXPX, 5760 {I.getOperand(0).getReg(), I.getOperand(1).getReg()}, 5761 {I.getOperand(3)}); 5762 NewI.cloneMemRefs(I); 5763 constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI); 5764 break; 5765 } 5766 case Intrinsic::trap: 5767 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(1); 5768 break; 5769 case Intrinsic::debugtrap: 5770 MIB.buildInstr(AArch64::BRK, {}, {}).addImm(0xF000); 5771 break; 5772 case Intrinsic::ubsantrap: 5773 MIB.buildInstr(AArch64::BRK, {}, {}) 5774 .addImm(I.getOperand(1).getImm() | ('U' << 8)); 5775 break; 5776 case Intrinsic::aarch64_neon_ld2: { 5777 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 5778 unsigned Opc = 0; 5779 if (Ty == LLT::fixed_vector(8, S8)) 5780 Opc = AArch64::LD2Twov8b; 5781 else if (Ty == LLT::fixed_vector(16, S8)) 5782 Opc = AArch64::LD2Twov16b; 5783 else if (Ty == LLT::fixed_vector(4, S16)) 5784 Opc = AArch64::LD2Twov4h; 5785 else if (Ty == LLT::fixed_vector(8, S16)) 5786 Opc = AArch64::LD2Twov8h; 5787 else if (Ty == LLT::fixed_vector(2, S32)) 5788 Opc = AArch64::LD2Twov2s; 5789 else if (Ty == LLT::fixed_vector(4, S32)) 5790 Opc = AArch64::LD2Twov4s; 5791 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) 5792 Opc = AArch64::LD2Twov2d; 5793 else if (Ty == S64 || Ty == P0) 5794 Opc = AArch64::LD1Twov1d; 5795 else 5796 llvm_unreachable("Unexpected type for ld2!"); 5797 selectVectorLoadIntrinsic(Opc, 2, I); 5798 break; 5799 } 5800 case Intrinsic::aarch64_neon_ld4: { 5801 LLT Ty = MRI.getType(I.getOperand(0).getReg()); 5802 unsigned Opc = 0; 5803 if (Ty == LLT::fixed_vector(8, S8)) 5804 Opc = AArch64::LD4Fourv8b; 5805 else if (Ty == LLT::fixed_vector(16, S8)) 5806 Opc = AArch64::LD4Fourv16b; 5807 else if (Ty == LLT::fixed_vector(4, S16)) 5808 Opc = AArch64::LD4Fourv4h; 5809 else if (Ty == LLT::fixed_vector(8, S16)) 5810 Opc = AArch64::LD4Fourv8h; 5811 else if (Ty == LLT::fixed_vector(2, S32)) 5812 Opc = AArch64::LD4Fourv2s; 5813 else if (Ty == LLT::fixed_vector(4, S32)) 5814 Opc = AArch64::LD4Fourv4s; 5815 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) 5816 Opc = AArch64::LD4Fourv2d; 5817 else if (Ty == S64 || Ty == P0) 5818 Opc = AArch64::LD1Fourv1d; 5819 else 5820 llvm_unreachable("Unexpected type for ld4!"); 5821 selectVectorLoadIntrinsic(Opc, 4, I); 5822 break; 5823 } 5824 case Intrinsic::aarch64_neon_st2: { 5825 Register Src1 = I.getOperand(1).getReg(); 5826 Register Src2 = I.getOperand(2).getReg(); 5827 Register Ptr = I.getOperand(3).getReg(); 5828 LLT Ty = MRI.getType(Src1); 5829 unsigned Opc; 5830 if (Ty == LLT::fixed_vector(8, S8)) 5831 Opc = AArch64::ST2Twov8b; 5832 else if (Ty == LLT::fixed_vector(16, S8)) 5833 Opc = AArch64::ST2Twov16b; 5834 else if (Ty == LLT::fixed_vector(4, S16)) 5835 Opc = AArch64::ST2Twov4h; 5836 else if (Ty == LLT::fixed_vector(8, S16)) 5837 Opc = AArch64::ST2Twov8h; 5838 else if (Ty == LLT::fixed_vector(2, S32)) 5839 Opc = AArch64::ST2Twov2s; 5840 else if (Ty == LLT::fixed_vector(4, S32)) 5841 Opc = AArch64::ST2Twov4s; 5842 else if (Ty == LLT::fixed_vector(2, S64) || Ty == LLT::fixed_vector(2, P0)) 5843 Opc = AArch64::ST2Twov2d; 5844 else if (Ty == S64 || Ty == P0) 5845 Opc = AArch64::ST1Twov1d; 5846 else 5847 llvm_unreachable("Unexpected type for st2!"); 5848 SmallVector<Register, 2> Regs = {Src1, Src2}; 5849 Register Tuple = Ty.getSizeInBits() == 128 ? createQTuple(Regs, MIB) 5850 : createDTuple(Regs, MIB); 5851 auto Store = MIB.buildInstr(Opc, {}, {Tuple, Ptr}); 5852 Store.cloneMemRefs(I); 5853 constrainSelectedInstRegOperands(*Store, TII, TRI, RBI); 5854 break; 5855 } 5856 case Intrinsic::aarch64_mops_memset_tag: { 5857 // Transform 5858 // %dst:gpr(p0) = \ 5859 // G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.aarch64.mops.memset.tag), 5860 // \ %dst:gpr(p0), %val:gpr(s64), %n:gpr(s64) 5861 // where %dst is updated, into 5862 // %Rd:GPR64common, %Rn:GPR64) = \ 5863 // MOPSMemorySetTaggingPseudo \ 5864 // %Rd:GPR64common, %Rn:GPR64, %Rm:GPR64 5865 // where Rd and Rn are tied. 5866 // It is expected that %val has been extended to s64 in legalization. 5867 // Note that the order of the size/value operands are swapped. 5868 5869 Register DstDef = I.getOperand(0).getReg(); 5870 // I.getOperand(1) is the intrinsic function 5871 Register DstUse = I.getOperand(2).getReg(); 5872 Register ValUse = I.getOperand(3).getReg(); 5873 Register SizeUse = I.getOperand(4).getReg(); 5874 5875 // MOPSMemorySetTaggingPseudo has two defs; the intrinsic call has only one. 5876 // Therefore an additional virtual register is requried for the updated size 5877 // operand. This value is not accessible via the semantics of the intrinsic. 5878 Register SizeDef = MRI.createGenericVirtualRegister(LLT::scalar(64)); 5879 5880 auto Memset = MIB.buildInstr(AArch64::MOPSMemorySetTaggingPseudo, 5881 {DstDef, SizeDef}, {DstUse, SizeUse, ValUse}); 5882 Memset.cloneMemRefs(I); 5883 constrainSelectedInstRegOperands(*Memset, TII, TRI, RBI); 5884 break; 5885 } 5886 } 5887 5888 I.eraseFromParent(); 5889 return true; 5890 } 5891 5892 bool AArch64InstructionSelector::selectIntrinsic(MachineInstr &I, 5893 MachineRegisterInfo &MRI) { 5894 unsigned IntrinID = I.getIntrinsicID(); 5895 5896 switch (IntrinID) { 5897 default: 5898 break; 5899 case Intrinsic::aarch64_crypto_sha1h: { 5900 Register DstReg = I.getOperand(0).getReg(); 5901 Register SrcReg = I.getOperand(2).getReg(); 5902 5903 // FIXME: Should this be an assert? 5904 if (MRI.getType(DstReg).getSizeInBits() != 32 || 5905 MRI.getType(SrcReg).getSizeInBits() != 32) 5906 return false; 5907 5908 // The operation has to happen on FPRs. Set up some new FPR registers for 5909 // the source and destination if they are on GPRs. 5910 if (RBI.getRegBank(SrcReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) { 5911 SrcReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 5912 MIB.buildCopy({SrcReg}, {I.getOperand(2)}); 5913 5914 // Make sure the copy ends up getting constrained properly. 5915 RBI.constrainGenericRegister(I.getOperand(2).getReg(), 5916 AArch64::GPR32RegClass, MRI); 5917 } 5918 5919 if (RBI.getRegBank(DstReg, MRI, TRI)->getID() != AArch64::FPRRegBankID) 5920 DstReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass); 5921 5922 // Actually insert the instruction. 5923 auto SHA1Inst = MIB.buildInstr(AArch64::SHA1Hrr, {DstReg}, {SrcReg}); 5924 constrainSelectedInstRegOperands(*SHA1Inst, TII, TRI, RBI); 5925 5926 // Did we create a new register for the destination? 5927 if (DstReg != I.getOperand(0).getReg()) { 5928 // Yep. Copy the result of the instruction back into the original 5929 // destination. 5930 MIB.buildCopy({I.getOperand(0)}, {DstReg}); 5931 RBI.constrainGenericRegister(I.getOperand(0).getReg(), 5932 AArch64::GPR32RegClass, MRI); 5933 } 5934 5935 I.eraseFromParent(); 5936 return true; 5937 } 5938 case Intrinsic::ptrauth_sign: { 5939 Register DstReg = I.getOperand(0).getReg(); 5940 Register ValReg = I.getOperand(2).getReg(); 5941 uint64_t Key = I.getOperand(3).getImm(); 5942 Register DiscReg = I.getOperand(4).getReg(); 5943 auto DiscVal = getIConstantVRegVal(DiscReg, MRI); 5944 bool IsDiscZero = DiscVal && DiscVal->isZero(); 5945 5946 if (Key > AArch64PACKey::LAST) 5947 return false; 5948 5949 unsigned Opcodes[][4] = { 5950 {AArch64::PACIA, AArch64::PACIB, AArch64::PACDA, AArch64::PACDB}, 5951 {AArch64::PACIZA, AArch64::PACIZB, AArch64::PACDZA, AArch64::PACDZB}}; 5952 unsigned Opcode = Opcodes[IsDiscZero][Key]; 5953 5954 auto PAC = MIB.buildInstr(Opcode, {DstReg}, {ValReg}); 5955 5956 if (!IsDiscZero) { 5957 PAC.addUse(DiscReg); 5958 RBI.constrainGenericRegister(DiscReg, AArch64::GPR64spRegClass, MRI); 5959 } 5960 5961 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); 5962 I.eraseFromParent(); 5963 return true; 5964 } 5965 case Intrinsic::ptrauth_strip: { 5966 Register DstReg = I.getOperand(0).getReg(); 5967 Register ValReg = I.getOperand(2).getReg(); 5968 uint64_t Key = I.getOperand(3).getImm(); 5969 5970 if (Key > AArch64PACKey::LAST) 5971 return false; 5972 unsigned Opcode = getXPACOpcodeForKey((AArch64PACKey::ID)Key); 5973 5974 MIB.buildInstr(Opcode, {DstReg}, {ValReg}); 5975 5976 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); 5977 RBI.constrainGenericRegister(ValReg, AArch64::GPR64RegClass, MRI); 5978 I.eraseFromParent(); 5979 return true; 5980 } 5981 case Intrinsic::ptrauth_blend: { 5982 MachineFunction &MF = *I.getParent()->getParent(); 5983 auto RHS = getIConstantVRegVal(I.getOperand(3).getReg(), MRI); 5984 if (RHS && (RHS->getZExtValue() <= 0xffff)) { 5985 I.setDesc(TII.get(AArch64::MOVKXi)); 5986 I.removeOperand(3); 5987 I.removeOperand(1); 5988 MachineInstrBuilder(MF, I) 5989 .addImm(RHS->getZExtValue() & 0xffff) 5990 .addImm(48) 5991 .constrainAllUses(TII, TRI, RBI); 5992 } else { 5993 I.setDesc(TII.get(AArch64::BFMXri)); 5994 I.removeOperand(1); 5995 MachineInstrBuilder(MF, I).addImm(16).addImm(15).constrainAllUses( 5996 TII, TRI, RBI); 5997 } 5998 return true; 5999 } 6000 case Intrinsic::frameaddress: 6001 case Intrinsic::returnaddress: { 6002 MachineFunction &MF = *I.getParent()->getParent(); 6003 MachineFrameInfo &MFI = MF.getFrameInfo(); 6004 6005 unsigned Depth = I.getOperand(2).getImm(); 6006 Register DstReg = I.getOperand(0).getReg(); 6007 RBI.constrainGenericRegister(DstReg, AArch64::GPR64RegClass, MRI); 6008 6009 if (Depth == 0 && IntrinID == Intrinsic::returnaddress) { 6010 if (!MFReturnAddr) { 6011 // Insert the copy from LR/X30 into the entry block, before it can be 6012 // clobbered by anything. 6013 MFI.setReturnAddressIsTaken(true); 6014 MFReturnAddr = getFunctionLiveInPhysReg( 6015 MF, TII, AArch64::LR, AArch64::GPR64RegClass, I.getDebugLoc()); 6016 } 6017 6018 if (STI.hasPAuth()) { 6019 MIB.buildInstr(AArch64::XPACI, {DstReg}, {MFReturnAddr}); 6020 } else { 6021 MIB.buildCopy({Register(AArch64::LR)}, {MFReturnAddr}); 6022 MIB.buildInstr(AArch64::XPACLRI); 6023 MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); 6024 } 6025 6026 I.eraseFromParent(); 6027 return true; 6028 } 6029 6030 MFI.setFrameAddressIsTaken(true); 6031 Register FrameAddr(AArch64::FP); 6032 while (Depth--) { 6033 Register NextFrame = MRI.createVirtualRegister(&AArch64::GPR64spRegClass); 6034 auto Ldr = 6035 MIB.buildInstr(AArch64::LDRXui, {NextFrame}, {FrameAddr}).addImm(0); 6036 constrainSelectedInstRegOperands(*Ldr, TII, TRI, RBI); 6037 FrameAddr = NextFrame; 6038 } 6039 6040 if (IntrinID == Intrinsic::frameaddress) 6041 MIB.buildCopy({DstReg}, {FrameAddr}); 6042 else { 6043 MFI.setReturnAddressIsTaken(true); 6044 6045 if (STI.hasPAuth()) { 6046 Register TmpReg = MRI.createVirtualRegister(&AArch64::GPR64RegClass); 6047 MIB.buildInstr(AArch64::LDRXui, {TmpReg}, {FrameAddr}).addImm(1); 6048 MIB.buildInstr(AArch64::XPACI, {DstReg}, {TmpReg}); 6049 } else { 6050 MIB.buildInstr(AArch64::LDRXui, {Register(AArch64::LR)}, {FrameAddr}) 6051 .addImm(1); 6052 MIB.buildInstr(AArch64::XPACLRI); 6053 MIB.buildCopy({DstReg}, {Register(AArch64::LR)}); 6054 } 6055 } 6056 6057 I.eraseFromParent(); 6058 return true; 6059 } 6060 case Intrinsic::swift_async_context_addr: 6061 auto Sub = MIB.buildInstr(AArch64::SUBXri, {I.getOperand(0).getReg()}, 6062 {Register(AArch64::FP)}) 6063 .addImm(8) 6064 .addImm(0); 6065 constrainSelectedInstRegOperands(*Sub, TII, TRI, RBI); 6066 6067 MF->getFrameInfo().setFrameAddressIsTaken(true); 6068 MF->getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 6069 I.eraseFromParent(); 6070 return true; 6071 } 6072 return false; 6073 } 6074 6075 InstructionSelector::ComplexRendererFns 6076 AArch64InstructionSelector::selectShiftA_32(const MachineOperand &Root) const { 6077 auto MaybeImmed = getImmedFromMO(Root); 6078 if (MaybeImmed == std::nullopt || *MaybeImmed > 31) 6079 return std::nullopt; 6080 uint64_t Enc = (32 - *MaybeImmed) & 0x1f; 6081 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 6082 } 6083 6084 InstructionSelector::ComplexRendererFns 6085 AArch64InstructionSelector::selectShiftB_32(const MachineOperand &Root) const { 6086 auto MaybeImmed = getImmedFromMO(Root); 6087 if (MaybeImmed == std::nullopt || *MaybeImmed > 31) 6088 return std::nullopt; 6089 uint64_t Enc = 31 - *MaybeImmed; 6090 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 6091 } 6092 6093 InstructionSelector::ComplexRendererFns 6094 AArch64InstructionSelector::selectShiftA_64(const MachineOperand &Root) const { 6095 auto MaybeImmed = getImmedFromMO(Root); 6096 if (MaybeImmed == std::nullopt || *MaybeImmed > 63) 6097 return std::nullopt; 6098 uint64_t Enc = (64 - *MaybeImmed) & 0x3f; 6099 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 6100 } 6101 6102 InstructionSelector::ComplexRendererFns 6103 AArch64InstructionSelector::selectShiftB_64(const MachineOperand &Root) const { 6104 auto MaybeImmed = getImmedFromMO(Root); 6105 if (MaybeImmed == std::nullopt || *MaybeImmed > 63) 6106 return std::nullopt; 6107 uint64_t Enc = 63 - *MaybeImmed; 6108 return {{[=](MachineInstrBuilder &MIB) { MIB.addImm(Enc); }}}; 6109 } 6110 6111 /// Helper to select an immediate value that can be represented as a 12-bit 6112 /// value shifted left by either 0 or 12. If it is possible to do so, return 6113 /// the immediate and shift value. If not, return std::nullopt. 6114 /// 6115 /// Used by selectArithImmed and selectNegArithImmed. 6116 InstructionSelector::ComplexRendererFns 6117 AArch64InstructionSelector::select12BitValueWithLeftShift( 6118 uint64_t Immed) const { 6119 unsigned ShiftAmt; 6120 if (Immed >> 12 == 0) { 6121 ShiftAmt = 0; 6122 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 6123 ShiftAmt = 12; 6124 Immed = Immed >> 12; 6125 } else 6126 return std::nullopt; 6127 6128 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 6129 return {{ 6130 [=](MachineInstrBuilder &MIB) { MIB.addImm(Immed); }, 6131 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShVal); }, 6132 }}; 6133 } 6134 6135 /// SelectArithImmed - Select an immediate value that can be represented as 6136 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 6137 /// Val set to the 12-bit value and Shift set to the shifter operand. 6138 InstructionSelector::ComplexRendererFns 6139 AArch64InstructionSelector::selectArithImmed(MachineOperand &Root) const { 6140 // This function is called from the addsub_shifted_imm ComplexPattern, 6141 // which lists [imm] as the list of opcode it's interested in, however 6142 // we still need to check whether the operand is actually an immediate 6143 // here because the ComplexPattern opcode list is only used in 6144 // root-level opcode matching. 6145 auto MaybeImmed = getImmedFromMO(Root); 6146 if (MaybeImmed == std::nullopt) 6147 return std::nullopt; 6148 return select12BitValueWithLeftShift(*MaybeImmed); 6149 } 6150 6151 /// SelectNegArithImmed - As above, but negates the value before trying to 6152 /// select it. 6153 InstructionSelector::ComplexRendererFns 6154 AArch64InstructionSelector::selectNegArithImmed(MachineOperand &Root) const { 6155 // We need a register here, because we need to know if we have a 64 or 32 6156 // bit immediate. 6157 if (!Root.isReg()) 6158 return std::nullopt; 6159 auto MaybeImmed = getImmedFromMO(Root); 6160 if (MaybeImmed == std::nullopt) 6161 return std::nullopt; 6162 uint64_t Immed = *MaybeImmed; 6163 6164 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 6165 // have the opposite effect on the C flag, so this pattern mustn't match under 6166 // those circumstances. 6167 if (Immed == 0) 6168 return std::nullopt; 6169 6170 // Check if we're dealing with a 32-bit type on the root or a 64-bit type on 6171 // the root. 6172 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 6173 if (MRI.getType(Root.getReg()).getSizeInBits() == 32) 6174 Immed = ~((uint32_t)Immed) + 1; 6175 else 6176 Immed = ~Immed + 1ULL; 6177 6178 if (Immed & 0xFFFFFFFFFF000000ULL) 6179 return std::nullopt; 6180 6181 Immed &= 0xFFFFFFULL; 6182 return select12BitValueWithLeftShift(Immed); 6183 } 6184 6185 /// Return true if it is worth folding MI into an extended register. That is, 6186 /// if it's safe to pull it into the addressing mode of a load or store as a 6187 /// shift. 6188 bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( 6189 MachineInstr &MI, const MachineRegisterInfo &MRI) const { 6190 // Always fold if there is one use, or if we're optimizing for size. 6191 Register DefReg = MI.getOperand(0).getReg(); 6192 if (MRI.hasOneNonDBGUse(DefReg) || 6193 MI.getParent()->getParent()->getFunction().hasOptSize()) 6194 return true; 6195 6196 // It's better to avoid folding and recomputing shifts when we don't have a 6197 // fastpath. 6198 if (!STI.hasLSLFast()) 6199 return false; 6200 6201 // We have a fastpath, so folding a shift in and potentially computing it 6202 // many times may be beneficial. Check if this is only used in memory ops. 6203 // If it is, then we should fold. 6204 return all_of(MRI.use_nodbg_instructions(DefReg), 6205 [](MachineInstr &Use) { return Use.mayLoadOrStore(); }); 6206 } 6207 6208 static bool isSignExtendShiftType(AArch64_AM::ShiftExtendType Type) { 6209 switch (Type) { 6210 case AArch64_AM::SXTB: 6211 case AArch64_AM::SXTH: 6212 case AArch64_AM::SXTW: 6213 return true; 6214 default: 6215 return false; 6216 } 6217 } 6218 6219 InstructionSelector::ComplexRendererFns 6220 AArch64InstructionSelector::selectExtendedSHL( 6221 MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset, 6222 unsigned SizeInBytes, bool WantsExt) const { 6223 assert(Base.isReg() && "Expected base to be a register operand"); 6224 assert(Offset.isReg() && "Expected offset to be a register operand"); 6225 6226 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 6227 MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg()); 6228 6229 unsigned OffsetOpc = OffsetInst->getOpcode(); 6230 bool LookedThroughZExt = false; 6231 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) { 6232 // Try to look through a ZEXT. 6233 if (OffsetOpc != TargetOpcode::G_ZEXT || !WantsExt) 6234 return std::nullopt; 6235 6236 OffsetInst = MRI.getVRegDef(OffsetInst->getOperand(1).getReg()); 6237 OffsetOpc = OffsetInst->getOpcode(); 6238 LookedThroughZExt = true; 6239 6240 if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL) 6241 return std::nullopt; 6242 } 6243 // Make sure that the memory op is a valid size. 6244 int64_t LegalShiftVal = Log2_32(SizeInBytes); 6245 if (LegalShiftVal == 0) 6246 return std::nullopt; 6247 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 6248 return std::nullopt; 6249 6250 // Now, try to find the specific G_CONSTANT. Start by assuming that the 6251 // register we will offset is the LHS, and the register containing the 6252 // constant is the RHS. 6253 Register OffsetReg = OffsetInst->getOperand(1).getReg(); 6254 Register ConstantReg = OffsetInst->getOperand(2).getReg(); 6255 auto ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI); 6256 if (!ValAndVReg) { 6257 // We didn't get a constant on the RHS. If the opcode is a shift, then 6258 // we're done. 6259 if (OffsetOpc == TargetOpcode::G_SHL) 6260 return std::nullopt; 6261 6262 // If we have a G_MUL, we can use either register. Try looking at the RHS. 6263 std::swap(OffsetReg, ConstantReg); 6264 ValAndVReg = getIConstantVRegValWithLookThrough(ConstantReg, MRI); 6265 if (!ValAndVReg) 6266 return std::nullopt; 6267 } 6268 6269 // The value must fit into 3 bits, and must be positive. Make sure that is 6270 // true. 6271 int64_t ImmVal = ValAndVReg->Value.getSExtValue(); 6272 6273 // Since we're going to pull this into a shift, the constant value must be 6274 // a power of 2. If we got a multiply, then we need to check this. 6275 if (OffsetOpc == TargetOpcode::G_MUL) { 6276 if (!llvm::has_single_bit<uint32_t>(ImmVal)) 6277 return std::nullopt; 6278 6279 // Got a power of 2. So, the amount we'll shift is the log base-2 of that. 6280 ImmVal = Log2_32(ImmVal); 6281 } 6282 6283 if ((ImmVal & 0x7) != ImmVal) 6284 return std::nullopt; 6285 6286 // We are only allowed to shift by LegalShiftVal. This shift value is built 6287 // into the instruction, so we can't just use whatever we want. 6288 if (ImmVal != LegalShiftVal) 6289 return std::nullopt; 6290 6291 unsigned SignExtend = 0; 6292 if (WantsExt) { 6293 // Check if the offset is defined by an extend, unless we looked through a 6294 // G_ZEXT earlier. 6295 if (!LookedThroughZExt) { 6296 MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI); 6297 auto Ext = getExtendTypeForInst(*ExtInst, MRI, true); 6298 if (Ext == AArch64_AM::InvalidShiftExtend) 6299 return std::nullopt; 6300 6301 SignExtend = isSignExtendShiftType(Ext) ? 1 : 0; 6302 // We only support SXTW for signed extension here. 6303 if (SignExtend && Ext != AArch64_AM::SXTW) 6304 return std::nullopt; 6305 OffsetReg = ExtInst->getOperand(1).getReg(); 6306 } 6307 6308 // Need a 32-bit wide register here. 6309 MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg())); 6310 OffsetReg = moveScalarRegClass(OffsetReg, AArch64::GPR32RegClass, MIB); 6311 } 6312 6313 // We can use the LHS of the GEP as the base, and the LHS of the shift as an 6314 // offset. Signify that we are shifting by setting the shift flag to 1. 6315 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); }, 6316 [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); }, 6317 [=](MachineInstrBuilder &MIB) { 6318 // Need to add both immediates here to make sure that they are both 6319 // added to the instruction. 6320 MIB.addImm(SignExtend); 6321 MIB.addImm(1); 6322 }}}; 6323 } 6324 6325 /// This is used for computing addresses like this: 6326 /// 6327 /// ldr x1, [x2, x3, lsl #3] 6328 /// 6329 /// Where x2 is the base register, and x3 is an offset register. The shift-left 6330 /// is a constant value specific to this load instruction. That is, we'll never 6331 /// see anything other than a 3 here (which corresponds to the size of the 6332 /// element being loaded.) 6333 InstructionSelector::ComplexRendererFns 6334 AArch64InstructionSelector::selectAddrModeShiftedExtendXReg( 6335 MachineOperand &Root, unsigned SizeInBytes) const { 6336 if (!Root.isReg()) 6337 return std::nullopt; 6338 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 6339 6340 // We want to find something like this: 6341 // 6342 // val = G_CONSTANT LegalShiftVal 6343 // shift = G_SHL off_reg val 6344 // ptr = G_PTR_ADD base_reg shift 6345 // x = G_LOAD ptr 6346 // 6347 // And fold it into this addressing mode: 6348 // 6349 // ldr x, [base_reg, off_reg, lsl #LegalShiftVal] 6350 6351 // Check if we can find the G_PTR_ADD. 6352 MachineInstr *PtrAdd = 6353 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 6354 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 6355 return std::nullopt; 6356 6357 // Now, try to match an opcode which will match our specific offset. 6358 // We want a G_SHL or a G_MUL. 6359 MachineInstr *OffsetInst = 6360 getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI); 6361 return selectExtendedSHL(Root, PtrAdd->getOperand(1), 6362 OffsetInst->getOperand(0), SizeInBytes, 6363 /*WantsExt=*/false); 6364 } 6365 6366 /// This is used for computing addresses like this: 6367 /// 6368 /// ldr x1, [x2, x3] 6369 /// 6370 /// Where x2 is the base register, and x3 is an offset register. 6371 /// 6372 /// When possible (or profitable) to fold a G_PTR_ADD into the address 6373 /// calculation, this will do so. Otherwise, it will return std::nullopt. 6374 InstructionSelector::ComplexRendererFns 6375 AArch64InstructionSelector::selectAddrModeRegisterOffset( 6376 MachineOperand &Root) const { 6377 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 6378 6379 // We need a GEP. 6380 MachineInstr *Gep = MRI.getVRegDef(Root.getReg()); 6381 if (Gep->getOpcode() != TargetOpcode::G_PTR_ADD) 6382 return std::nullopt; 6383 6384 // If this is used more than once, let's not bother folding. 6385 // TODO: Check if they are memory ops. If they are, then we can still fold 6386 // without having to recompute anything. 6387 if (!MRI.hasOneNonDBGUse(Gep->getOperand(0).getReg())) 6388 return std::nullopt; 6389 6390 // Base is the GEP's LHS, offset is its RHS. 6391 return {{[=](MachineInstrBuilder &MIB) { 6392 MIB.addUse(Gep->getOperand(1).getReg()); 6393 }, 6394 [=](MachineInstrBuilder &MIB) { 6395 MIB.addUse(Gep->getOperand(2).getReg()); 6396 }, 6397 [=](MachineInstrBuilder &MIB) { 6398 // Need to add both immediates here to make sure that they are both 6399 // added to the instruction. 6400 MIB.addImm(0); 6401 MIB.addImm(0); 6402 }}}; 6403 } 6404 6405 /// This is intended to be equivalent to selectAddrModeXRO in 6406 /// AArch64ISelDAGtoDAG. It's used for selecting X register offset loads. 6407 InstructionSelector::ComplexRendererFns 6408 AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root, 6409 unsigned SizeInBytes) const { 6410 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 6411 if (!Root.isReg()) 6412 return std::nullopt; 6413 MachineInstr *PtrAdd = 6414 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 6415 if (!PtrAdd) 6416 return std::nullopt; 6417 6418 // Check for an immediates which cannot be encoded in the [base + imm] 6419 // addressing mode, and can't be encoded in an add/sub. If this happens, we'll 6420 // end up with code like: 6421 // 6422 // mov x0, wide 6423 // add x1 base, x0 6424 // ldr x2, [x1, x0] 6425 // 6426 // In this situation, we can use the [base, xreg] addressing mode to save an 6427 // add/sub: 6428 // 6429 // mov x0, wide 6430 // ldr x2, [base, x0] 6431 auto ValAndVReg = 6432 getIConstantVRegValWithLookThrough(PtrAdd->getOperand(2).getReg(), MRI); 6433 if (ValAndVReg) { 6434 unsigned Scale = Log2_32(SizeInBytes); 6435 int64_t ImmOff = ValAndVReg->Value.getSExtValue(); 6436 6437 // Skip immediates that can be selected in the load/store addresing 6438 // mode. 6439 if (ImmOff % SizeInBytes == 0 && ImmOff >= 0 && 6440 ImmOff < (0x1000 << Scale)) 6441 return std::nullopt; 6442 6443 // Helper lambda to decide whether or not it is preferable to emit an add. 6444 auto isPreferredADD = [](int64_t ImmOff) { 6445 // Constants in [0x0, 0xfff] can be encoded in an add. 6446 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 6447 return true; 6448 6449 // Can it be encoded in an add lsl #12? 6450 if ((ImmOff & 0xffffffffff000fffLL) != 0x0LL) 6451 return false; 6452 6453 // It can be encoded in an add lsl #12, but we may not want to. If it is 6454 // possible to select this as a single movz, then prefer that. A single 6455 // movz is faster than an add with a shift. 6456 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 6457 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 6458 }; 6459 6460 // If the immediate can be encoded in a single add/sub, then bail out. 6461 if (isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 6462 return std::nullopt; 6463 } 6464 6465 // Try to fold shifts into the addressing mode. 6466 auto AddrModeFns = selectAddrModeShiftedExtendXReg(Root, SizeInBytes); 6467 if (AddrModeFns) 6468 return AddrModeFns; 6469 6470 // If that doesn't work, see if it's possible to fold in registers from 6471 // a GEP. 6472 return selectAddrModeRegisterOffset(Root); 6473 } 6474 6475 /// This is used for computing addresses like this: 6476 /// 6477 /// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal] 6478 /// 6479 /// Where we have a 64-bit base register, a 32-bit offset register, and an 6480 /// extend (which may or may not be signed). 6481 InstructionSelector::ComplexRendererFns 6482 AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root, 6483 unsigned SizeInBytes) const { 6484 MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo(); 6485 6486 MachineInstr *PtrAdd = 6487 getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI); 6488 if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI)) 6489 return std::nullopt; 6490 6491 MachineOperand &LHS = PtrAdd->getOperand(1); 6492 MachineOperand &RHS = PtrAdd->getOperand(2); 6493 MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI); 6494 6495 // The first case is the same as selectAddrModeXRO, except we need an extend. 6496 // In this case, we try to find a shift and extend, and fold them into the 6497 // addressing mode. 6498 // 6499 // E.g. 6500 // 6501 // off_reg = G_Z/S/ANYEXT ext_reg 6502 // val = G_CONSTANT LegalShiftVal 6503 // shift = G_SHL off_reg val 6504 // ptr = G_PTR_ADD base_reg shift 6505 // x = G_LOAD ptr 6506 // 6507 // In this case we can get a load like this: 6508 // 6509 // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal] 6510 auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0), 6511 SizeInBytes, /*WantsExt=*/true); 6512 if (ExtendedShl) 6513 return ExtendedShl; 6514 6515 // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though. 6516 // 6517 // e.g. 6518 // ldr something, [base_reg, ext_reg, sxtw] 6519 if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI)) 6520 return std::nullopt; 6521 6522 // Check if this is an extend. We'll get an extend type if it is. 6523 AArch64_AM::ShiftExtendType Ext = 6524 getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true); 6525 if (Ext == AArch64_AM::InvalidShiftExtend) 6526 return std::nullopt; 6527 6528 // Need a 32-bit wide register. 6529 MachineIRBuilder MIB(*PtrAdd); 6530 Register ExtReg = moveScalarRegClass(OffsetInst->getOperand(1).getReg(), 6531 AArch64::GPR32RegClass, MIB); 6532 unsigned SignExtend = Ext == AArch64_AM::SXTW; 6533 6534 // Base is LHS, offset is ExtReg. 6535 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); }, 6536 [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 6537 [=](MachineInstrBuilder &MIB) { 6538 MIB.addImm(SignExtend); 6539 MIB.addImm(0); 6540 }}}; 6541 } 6542 6543 /// Select a "register plus unscaled signed 9-bit immediate" address. This 6544 /// should only match when there is an offset that is not valid for a scaled 6545 /// immediate addressing mode. The "Size" argument is the size in bytes of the 6546 /// memory reference, which is needed here to know what is valid for a scaled 6547 /// immediate. 6548 InstructionSelector::ComplexRendererFns 6549 AArch64InstructionSelector::selectAddrModeUnscaled(MachineOperand &Root, 6550 unsigned Size) const { 6551 MachineRegisterInfo &MRI = 6552 Root.getParent()->getParent()->getParent()->getRegInfo(); 6553 6554 if (!Root.isReg()) 6555 return std::nullopt; 6556 6557 if (!isBaseWithConstantOffset(Root, MRI)) 6558 return std::nullopt; 6559 6560 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 6561 6562 MachineOperand &OffImm = RootDef->getOperand(2); 6563 if (!OffImm.isReg()) 6564 return std::nullopt; 6565 MachineInstr *RHS = MRI.getVRegDef(OffImm.getReg()); 6566 if (RHS->getOpcode() != TargetOpcode::G_CONSTANT) 6567 return std::nullopt; 6568 int64_t RHSC; 6569 MachineOperand &RHSOp1 = RHS->getOperand(1); 6570 if (!RHSOp1.isCImm() || RHSOp1.getCImm()->getBitWidth() > 64) 6571 return std::nullopt; 6572 RHSC = RHSOp1.getCImm()->getSExtValue(); 6573 6574 // If the offset is valid as a scaled immediate, don't match here. 6575 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Log2_32(Size))) 6576 return std::nullopt; 6577 if (RHSC >= -256 && RHSC < 256) { 6578 MachineOperand &Base = RootDef->getOperand(1); 6579 return {{ 6580 [=](MachineInstrBuilder &MIB) { MIB.add(Base); }, 6581 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC); }, 6582 }}; 6583 } 6584 return std::nullopt; 6585 } 6586 6587 InstructionSelector::ComplexRendererFns 6588 AArch64InstructionSelector::tryFoldAddLowIntoImm(MachineInstr &RootDef, 6589 unsigned Size, 6590 MachineRegisterInfo &MRI) const { 6591 if (RootDef.getOpcode() != AArch64::G_ADD_LOW) 6592 return std::nullopt; 6593 MachineInstr &Adrp = *MRI.getVRegDef(RootDef.getOperand(1).getReg()); 6594 if (Adrp.getOpcode() != AArch64::ADRP) 6595 return std::nullopt; 6596 6597 // TODO: add heuristics like isWorthFoldingADDlow() from SelectionDAG. 6598 auto Offset = Adrp.getOperand(1).getOffset(); 6599 if (Offset % Size != 0) 6600 return std::nullopt; 6601 6602 auto GV = Adrp.getOperand(1).getGlobal(); 6603 if (GV->isThreadLocal()) 6604 return std::nullopt; 6605 6606 auto &MF = *RootDef.getParent()->getParent(); 6607 if (GV->getPointerAlignment(MF.getDataLayout()) < Size) 6608 return std::nullopt; 6609 6610 unsigned OpFlags = STI.ClassifyGlobalReference(GV, MF.getTarget()); 6611 MachineIRBuilder MIRBuilder(RootDef); 6612 Register AdrpReg = Adrp.getOperand(0).getReg(); 6613 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(AdrpReg); }, 6614 [=](MachineInstrBuilder &MIB) { 6615 MIB.addGlobalAddress(GV, Offset, 6616 OpFlags | AArch64II::MO_PAGEOFF | 6617 AArch64II::MO_NC); 6618 }}}; 6619 } 6620 6621 /// Select a "register plus scaled unsigned 12-bit immediate" address. The 6622 /// "Size" argument is the size in bytes of the memory reference, which 6623 /// determines the scale. 6624 InstructionSelector::ComplexRendererFns 6625 AArch64InstructionSelector::selectAddrModeIndexed(MachineOperand &Root, 6626 unsigned Size) const { 6627 MachineFunction &MF = *Root.getParent()->getParent()->getParent(); 6628 MachineRegisterInfo &MRI = MF.getRegInfo(); 6629 6630 if (!Root.isReg()) 6631 return std::nullopt; 6632 6633 MachineInstr *RootDef = MRI.getVRegDef(Root.getReg()); 6634 if (RootDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { 6635 return {{ 6636 [=](MachineInstrBuilder &MIB) { MIB.add(RootDef->getOperand(1)); }, 6637 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 6638 }}; 6639 } 6640 6641 CodeModel::Model CM = MF.getTarget().getCodeModel(); 6642 // Check if we can fold in the ADD of small code model ADRP + ADD address. 6643 if (CM == CodeModel::Small) { 6644 auto OpFns = tryFoldAddLowIntoImm(*RootDef, Size, MRI); 6645 if (OpFns) 6646 return OpFns; 6647 } 6648 6649 if (isBaseWithConstantOffset(Root, MRI)) { 6650 MachineOperand &LHS = RootDef->getOperand(1); 6651 MachineOperand &RHS = RootDef->getOperand(2); 6652 MachineInstr *LHSDef = MRI.getVRegDef(LHS.getReg()); 6653 MachineInstr *RHSDef = MRI.getVRegDef(RHS.getReg()); 6654 6655 int64_t RHSC = (int64_t)RHSDef->getOperand(1).getCImm()->getZExtValue(); 6656 unsigned Scale = Log2_32(Size); 6657 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 6658 if (LHSDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) 6659 return {{ 6660 [=](MachineInstrBuilder &MIB) { MIB.add(LHSDef->getOperand(1)); }, 6661 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 6662 }}; 6663 6664 return {{ 6665 [=](MachineInstrBuilder &MIB) { MIB.add(LHS); }, 6666 [=](MachineInstrBuilder &MIB) { MIB.addImm(RHSC >> Scale); }, 6667 }}; 6668 } 6669 } 6670 6671 // Before falling back to our general case, check if the unscaled 6672 // instructions can handle this. If so, that's preferable. 6673 if (selectAddrModeUnscaled(Root, Size)) 6674 return std::nullopt; 6675 6676 return {{ 6677 [=](MachineInstrBuilder &MIB) { MIB.add(Root); }, 6678 [=](MachineInstrBuilder &MIB) { MIB.addImm(0); }, 6679 }}; 6680 } 6681 6682 /// Given a shift instruction, return the correct shift type for that 6683 /// instruction. 6684 static AArch64_AM::ShiftExtendType getShiftTypeForInst(MachineInstr &MI) { 6685 switch (MI.getOpcode()) { 6686 default: 6687 return AArch64_AM::InvalidShiftExtend; 6688 case TargetOpcode::G_SHL: 6689 return AArch64_AM::LSL; 6690 case TargetOpcode::G_LSHR: 6691 return AArch64_AM::LSR; 6692 case TargetOpcode::G_ASHR: 6693 return AArch64_AM::ASR; 6694 case TargetOpcode::G_ROTR: 6695 return AArch64_AM::ROR; 6696 } 6697 } 6698 6699 /// Select a "shifted register" operand. If the value is not shifted, set the 6700 /// shift operand to a default value of "lsl 0". 6701 InstructionSelector::ComplexRendererFns 6702 AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root, 6703 bool AllowROR) const { 6704 if (!Root.isReg()) 6705 return std::nullopt; 6706 MachineRegisterInfo &MRI = 6707 Root.getParent()->getParent()->getParent()->getRegInfo(); 6708 6709 // Check if the operand is defined by an instruction which corresponds to 6710 // a ShiftExtendType. E.g. a G_SHL, G_LSHR, etc. 6711 MachineInstr *ShiftInst = MRI.getVRegDef(Root.getReg()); 6712 AArch64_AM::ShiftExtendType ShType = getShiftTypeForInst(*ShiftInst); 6713 if (ShType == AArch64_AM::InvalidShiftExtend) 6714 return std::nullopt; 6715 if (ShType == AArch64_AM::ROR && !AllowROR) 6716 return std::nullopt; 6717 if (!isWorthFoldingIntoExtendedReg(*ShiftInst, MRI)) 6718 return std::nullopt; 6719 6720 // Need an immediate on the RHS. 6721 MachineOperand &ShiftRHS = ShiftInst->getOperand(2); 6722 auto Immed = getImmedFromMO(ShiftRHS); 6723 if (!Immed) 6724 return std::nullopt; 6725 6726 // We have something that we can fold. Fold in the shift's LHS and RHS into 6727 // the instruction. 6728 MachineOperand &ShiftLHS = ShiftInst->getOperand(1); 6729 Register ShiftReg = ShiftLHS.getReg(); 6730 6731 unsigned NumBits = MRI.getType(ShiftReg).getSizeInBits(); 6732 unsigned Val = *Immed & (NumBits - 1); 6733 unsigned ShiftVal = AArch64_AM::getShifterImm(ShType, Val); 6734 6735 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ShiftReg); }, 6736 [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}}; 6737 } 6738 6739 AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst( 6740 MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const { 6741 unsigned Opc = MI.getOpcode(); 6742 6743 // Handle explicit extend instructions first. 6744 if (Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG) { 6745 unsigned Size; 6746 if (Opc == TargetOpcode::G_SEXT) 6747 Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 6748 else 6749 Size = MI.getOperand(2).getImm(); 6750 assert(Size != 64 && "Extend from 64 bits?"); 6751 switch (Size) { 6752 case 8: 6753 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTB; 6754 case 16: 6755 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::SXTH; 6756 case 32: 6757 return AArch64_AM::SXTW; 6758 default: 6759 return AArch64_AM::InvalidShiftExtend; 6760 } 6761 } 6762 6763 if (Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_ANYEXT) { 6764 unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); 6765 assert(Size != 64 && "Extend from 64 bits?"); 6766 switch (Size) { 6767 case 8: 6768 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTB; 6769 case 16: 6770 return IsLoadStore ? AArch64_AM::InvalidShiftExtend : AArch64_AM::UXTH; 6771 case 32: 6772 return AArch64_AM::UXTW; 6773 default: 6774 return AArch64_AM::InvalidShiftExtend; 6775 } 6776 } 6777 6778 // Don't have an explicit extend. Try to handle a G_AND with a constant mask 6779 // on the RHS. 6780 if (Opc != TargetOpcode::G_AND) 6781 return AArch64_AM::InvalidShiftExtend; 6782 6783 std::optional<uint64_t> MaybeAndMask = getImmedFromMO(MI.getOperand(2)); 6784 if (!MaybeAndMask) 6785 return AArch64_AM::InvalidShiftExtend; 6786 uint64_t AndMask = *MaybeAndMask; 6787 switch (AndMask) { 6788 default: 6789 return AArch64_AM::InvalidShiftExtend; 6790 case 0xFF: 6791 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 6792 case 0xFFFF: 6793 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 6794 case 0xFFFFFFFF: 6795 return AArch64_AM::UXTW; 6796 } 6797 } 6798 6799 Register AArch64InstructionSelector::moveScalarRegClass( 6800 Register Reg, const TargetRegisterClass &RC, MachineIRBuilder &MIB) const { 6801 MachineRegisterInfo &MRI = *MIB.getMRI(); 6802 auto Ty = MRI.getType(Reg); 6803 assert(!Ty.isVector() && "Expected scalars only!"); 6804 if (Ty.getSizeInBits() == TRI.getRegSizeInBits(RC)) 6805 return Reg; 6806 6807 // Create a copy and immediately select it. 6808 // FIXME: We should have an emitCopy function? 6809 auto Copy = MIB.buildCopy({&RC}, {Reg}); 6810 selectCopy(*Copy, TII, MRI, TRI, RBI); 6811 return Copy.getReg(0); 6812 } 6813 6814 /// Select an "extended register" operand. This operand folds in an extend 6815 /// followed by an optional left shift. 6816 InstructionSelector::ComplexRendererFns 6817 AArch64InstructionSelector::selectArithExtendedRegister( 6818 MachineOperand &Root) const { 6819 if (!Root.isReg()) 6820 return std::nullopt; 6821 MachineRegisterInfo &MRI = 6822 Root.getParent()->getParent()->getParent()->getRegInfo(); 6823 6824 uint64_t ShiftVal = 0; 6825 Register ExtReg; 6826 AArch64_AM::ShiftExtendType Ext; 6827 MachineInstr *RootDef = getDefIgnoringCopies(Root.getReg(), MRI); 6828 if (!RootDef) 6829 return std::nullopt; 6830 6831 if (!isWorthFoldingIntoExtendedReg(*RootDef, MRI)) 6832 return std::nullopt; 6833 6834 // Check if we can fold a shift and an extend. 6835 if (RootDef->getOpcode() == TargetOpcode::G_SHL) { 6836 // Look for a constant on the RHS of the shift. 6837 MachineOperand &RHS = RootDef->getOperand(2); 6838 std::optional<uint64_t> MaybeShiftVal = getImmedFromMO(RHS); 6839 if (!MaybeShiftVal) 6840 return std::nullopt; 6841 ShiftVal = *MaybeShiftVal; 6842 if (ShiftVal > 4) 6843 return std::nullopt; 6844 // Look for a valid extend instruction on the LHS of the shift. 6845 MachineOperand &LHS = RootDef->getOperand(1); 6846 MachineInstr *ExtDef = getDefIgnoringCopies(LHS.getReg(), MRI); 6847 if (!ExtDef) 6848 return std::nullopt; 6849 Ext = getExtendTypeForInst(*ExtDef, MRI); 6850 if (Ext == AArch64_AM::InvalidShiftExtend) 6851 return std::nullopt; 6852 ExtReg = ExtDef->getOperand(1).getReg(); 6853 } else { 6854 // Didn't get a shift. Try just folding an extend. 6855 Ext = getExtendTypeForInst(*RootDef, MRI); 6856 if (Ext == AArch64_AM::InvalidShiftExtend) 6857 return std::nullopt; 6858 ExtReg = RootDef->getOperand(1).getReg(); 6859 6860 // If we have a 32 bit instruction which zeroes out the high half of a 6861 // register, we get an implicit zero extend for free. Check if we have one. 6862 // FIXME: We actually emit the extend right now even though we don't have 6863 // to. 6864 if (Ext == AArch64_AM::UXTW && MRI.getType(ExtReg).getSizeInBits() == 32) { 6865 MachineInstr *ExtInst = MRI.getVRegDef(ExtReg); 6866 if (isDef32(*ExtInst)) 6867 return std::nullopt; 6868 } 6869 } 6870 6871 // We require a GPR32 here. Narrow the ExtReg if needed using a subregister 6872 // copy. 6873 MachineIRBuilder MIB(*RootDef); 6874 ExtReg = moveScalarRegClass(ExtReg, AArch64::GPR32RegClass, MIB); 6875 6876 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }, 6877 [=](MachineInstrBuilder &MIB) { 6878 MIB.addImm(getArithExtendImm(Ext, ShiftVal)); 6879 }}}; 6880 } 6881 6882 InstructionSelector::ComplexRendererFns 6883 AArch64InstructionSelector::selectExtractHigh(MachineOperand &Root) const { 6884 if (!Root.isReg()) 6885 return std::nullopt; 6886 MachineRegisterInfo &MRI = 6887 Root.getParent()->getParent()->getParent()->getRegInfo(); 6888 6889 MachineInstr *Extract = getDefIgnoringCopies(Root.getReg(), MRI); 6890 if (Extract && Extract->getOpcode() == TargetOpcode::G_UNMERGE_VALUES && 6891 Root.getReg() == Extract->getOperand(1).getReg()) { 6892 Register ExtReg = Extract->getOperand(2).getReg(); 6893 return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); }}}; 6894 } 6895 6896 return std::nullopt; 6897 } 6898 6899 void AArch64InstructionSelector::renderTruncImm(MachineInstrBuilder &MIB, 6900 const MachineInstr &MI, 6901 int OpIdx) const { 6902 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 6903 assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 6904 "Expected G_CONSTANT"); 6905 std::optional<int64_t> CstVal = 6906 getIConstantVRegSExtVal(MI.getOperand(0).getReg(), MRI); 6907 assert(CstVal && "Expected constant value"); 6908 MIB.addImm(*CstVal); 6909 } 6910 6911 void AArch64InstructionSelector::renderLogicalImm32( 6912 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 6913 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 6914 "Expected G_CONSTANT"); 6915 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 6916 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 32); 6917 MIB.addImm(Enc); 6918 } 6919 6920 void AArch64InstructionSelector::renderLogicalImm64( 6921 MachineInstrBuilder &MIB, const MachineInstr &I, int OpIdx) const { 6922 assert(I.getOpcode() == TargetOpcode::G_CONSTANT && OpIdx == -1 && 6923 "Expected G_CONSTANT"); 6924 uint64_t CstVal = I.getOperand(1).getCImm()->getZExtValue(); 6925 uint64_t Enc = AArch64_AM::encodeLogicalImmediate(CstVal, 64); 6926 MIB.addImm(Enc); 6927 } 6928 6929 void AArch64InstructionSelector::renderFPImm16(MachineInstrBuilder &MIB, 6930 const MachineInstr &MI, 6931 int OpIdx) const { 6932 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && 6933 "Expected G_FCONSTANT"); 6934 MIB.addImm( 6935 AArch64_AM::getFP16Imm(MI.getOperand(1).getFPImm()->getValueAPF())); 6936 } 6937 6938 void AArch64InstructionSelector::renderFPImm32(MachineInstrBuilder &MIB, 6939 const MachineInstr &MI, 6940 int OpIdx) const { 6941 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && 6942 "Expected G_FCONSTANT"); 6943 MIB.addImm( 6944 AArch64_AM::getFP32Imm(MI.getOperand(1).getFPImm()->getValueAPF())); 6945 } 6946 6947 void AArch64InstructionSelector::renderFPImm64(MachineInstrBuilder &MIB, 6948 const MachineInstr &MI, 6949 int OpIdx) const { 6950 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && 6951 "Expected G_FCONSTANT"); 6952 MIB.addImm( 6953 AArch64_AM::getFP64Imm(MI.getOperand(1).getFPImm()->getValueAPF())); 6954 } 6955 6956 void AArch64InstructionSelector::renderFPImm32SIMDModImmType4( 6957 MachineInstrBuilder &MIB, const MachineInstr &MI, int OpIdx) const { 6958 assert(MI.getOpcode() == TargetOpcode::G_FCONSTANT && OpIdx == -1 && 6959 "Expected G_FCONSTANT"); 6960 MIB.addImm(AArch64_AM::encodeAdvSIMDModImmType4(MI.getOperand(1) 6961 .getFPImm() 6962 ->getValueAPF() 6963 .bitcastToAPInt() 6964 .getZExtValue())); 6965 } 6966 6967 bool AArch64InstructionSelector::isLoadStoreOfNumBytes( 6968 const MachineInstr &MI, unsigned NumBytes) const { 6969 if (!MI.mayLoadOrStore()) 6970 return false; 6971 assert(MI.hasOneMemOperand() && 6972 "Expected load/store to have only one mem op!"); 6973 return (*MI.memoperands_begin())->getSize() == NumBytes; 6974 } 6975 6976 bool AArch64InstructionSelector::isDef32(const MachineInstr &MI) const { 6977 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 6978 if (MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() != 32) 6979 return false; 6980 6981 // Only return true if we know the operation will zero-out the high half of 6982 // the 64-bit register. Truncates can be subregister copies, which don't 6983 // zero out the high bits. Copies and other copy-like instructions can be 6984 // fed by truncates, or could be lowered as subregister copies. 6985 switch (MI.getOpcode()) { 6986 default: 6987 return true; 6988 case TargetOpcode::COPY: 6989 case TargetOpcode::G_BITCAST: 6990 case TargetOpcode::G_TRUNC: 6991 case TargetOpcode::G_PHI: 6992 return false; 6993 } 6994 } 6995 6996 6997 // Perform fixups on the given PHI instruction's operands to force them all 6998 // to be the same as the destination regbank. 6999 static void fixupPHIOpBanks(MachineInstr &MI, MachineRegisterInfo &MRI, 7000 const AArch64RegisterBankInfo &RBI) { 7001 assert(MI.getOpcode() == TargetOpcode::G_PHI && "Expected a G_PHI"); 7002 Register DstReg = MI.getOperand(0).getReg(); 7003 const RegisterBank *DstRB = MRI.getRegBankOrNull(DstReg); 7004 assert(DstRB && "Expected PHI dst to have regbank assigned"); 7005 MachineIRBuilder MIB(MI); 7006 7007 // Go through each operand and ensure it has the same regbank. 7008 for (MachineOperand &MO : llvm::drop_begin(MI.operands())) { 7009 if (!MO.isReg()) 7010 continue; 7011 Register OpReg = MO.getReg(); 7012 const RegisterBank *RB = MRI.getRegBankOrNull(OpReg); 7013 if (RB != DstRB) { 7014 // Insert a cross-bank copy. 7015 auto *OpDef = MRI.getVRegDef(OpReg); 7016 const LLT &Ty = MRI.getType(OpReg); 7017 MachineBasicBlock &OpDefBB = *OpDef->getParent(); 7018 7019 // Any instruction we insert must appear after all PHIs in the block 7020 // for the block to be valid MIR. 7021 MachineBasicBlock::iterator InsertPt = std::next(OpDef->getIterator()); 7022 if (InsertPt != OpDefBB.end() && InsertPt->isPHI()) 7023 InsertPt = OpDefBB.getFirstNonPHI(); 7024 MIB.setInsertPt(*OpDef->getParent(), InsertPt); 7025 auto Copy = MIB.buildCopy(Ty, OpReg); 7026 MRI.setRegBank(Copy.getReg(0), *DstRB); 7027 MO.setReg(Copy.getReg(0)); 7028 } 7029 } 7030 } 7031 7032 void AArch64InstructionSelector::processPHIs(MachineFunction &MF) { 7033 // We're looking for PHIs, build a list so we don't invalidate iterators. 7034 MachineRegisterInfo &MRI = MF.getRegInfo(); 7035 SmallVector<MachineInstr *, 32> Phis; 7036 for (auto &BB : MF) { 7037 for (auto &MI : BB) { 7038 if (MI.getOpcode() == TargetOpcode::G_PHI) 7039 Phis.emplace_back(&MI); 7040 } 7041 } 7042 7043 for (auto *MI : Phis) { 7044 // We need to do some work here if the operand types are < 16 bit and they 7045 // are split across fpr/gpr banks. Since all types <32b on gpr 7046 // end up being assigned gpr32 regclasses, we can end up with PHIs here 7047 // which try to select between a gpr32 and an fpr16. Ideally RBS shouldn't 7048 // be selecting heterogenous regbanks for operands if possible, but we 7049 // still need to be able to deal with it here. 7050 // 7051 // To fix this, if we have a gpr-bank operand < 32b in size and at least 7052 // one other operand is on the fpr bank, then we add cross-bank copies 7053 // to homogenize the operand banks. For simplicity the bank that we choose 7054 // to settle on is whatever bank the def operand has. For example: 7055 // 7056 // %endbb: 7057 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2:fpr(s16), %bb2 7058 // => 7059 // %bb2: 7060 // ... 7061 // %in2_copy:gpr(s16) = COPY %in2:fpr(s16) 7062 // ... 7063 // %endbb: 7064 // %dst:gpr(s16) = G_PHI %in1:gpr(s16), %bb1, %in2_copy:gpr(s16), %bb2 7065 bool HasGPROp = false, HasFPROp = false; 7066 for (const MachineOperand &MO : llvm::drop_begin(MI->operands())) { 7067 if (!MO.isReg()) 7068 continue; 7069 const LLT &Ty = MRI.getType(MO.getReg()); 7070 if (!Ty.isValid() || !Ty.isScalar()) 7071 break; 7072 if (Ty.getSizeInBits() >= 32) 7073 break; 7074 const RegisterBank *RB = MRI.getRegBankOrNull(MO.getReg()); 7075 // If for some reason we don't have a regbank yet. Don't try anything. 7076 if (!RB) 7077 break; 7078 7079 if (RB->getID() == AArch64::GPRRegBankID) 7080 HasGPROp = true; 7081 else 7082 HasFPROp = true; 7083 } 7084 // We have heterogenous regbanks, need to fixup. 7085 if (HasGPROp && HasFPROp) 7086 fixupPHIOpBanks(*MI, MRI, RBI); 7087 } 7088 } 7089 7090 namespace llvm { 7091 InstructionSelector * 7092 createAArch64InstructionSelector(const AArch64TargetMachine &TM, 7093 AArch64Subtarget &Subtarget, 7094 AArch64RegisterBankInfo &RBI) { 7095 return new AArch64InstructionSelector(TM, Subtarget, RBI); 7096 } 7097 } 7098