1 //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition of the TargetLowering class that is common 11 /// to all AMD GPUs. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H 16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H 17 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/TargetLowering.h" 20 21 namespace llvm { 22 23 class AMDGPUMachineFunction; 24 class AMDGPUSubtarget; 25 struct ArgDescriptor; 26 27 class AMDGPUTargetLowering : public TargetLowering { 28 private: 29 const AMDGPUSubtarget *Subtarget; 30 31 /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been 32 /// legalized from a smaller type VT. Need to match pre-legalized type because 33 /// the generic legalization inserts the add/sub between the select and 34 /// compare. 35 SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const; 36 37 public: 38 /// \returns The minimum number of bits needed to store the value of \Op as an 39 /// unsigned integer. Truncating to this size and then zero-extending to the 40 /// original size will not change the value. 41 static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG); 42 43 /// \returns The minimum number of bits needed to store the value of \Op as a 44 /// signed integer. Truncating to this size and then sign-extending to the 45 /// original size will not change the value. 46 static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); 47 48 protected: 49 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 50 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 51 /// Split a vector store into multiple scalar stores. 52 /// \returns The resulting chain. 53 54 SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; 55 SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; 56 SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; 57 SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; 58 SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; 59 60 SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const; 61 SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; 62 SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; 63 64 SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op, 65 SDNodeFlags Flags) const; 66 SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const; 67 std::pair<SDValue, SDValue> getScaledLogInput(SelectionDAG &DAG, 68 const SDLoc SL, SDValue Op, 69 SDNodeFlags Flags) const; 70 71 SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const; 72 SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const; 73 SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const; 74 SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 75 bool IsLog10, SDNodeFlags Flags) const; 76 SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const; 77 78 SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 79 SDNodeFlags Flags) const; 80 SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const; 81 82 SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const; 83 84 SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const; 85 SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const; 86 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 87 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 88 89 SDValue LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const; 90 SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const; 91 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 92 93 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 94 95 protected: 96 bool shouldCombineMemoryType(EVT VT) const; 97 SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; 98 SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; 99 SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; 100 SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const; 101 102 SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, 103 unsigned Opc, SDValue LHS, 104 uint32_t ValLo, uint32_t ValHi) const; 105 SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; 106 SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const; 107 SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; 108 SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const; 109 SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; 110 SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const; 111 SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const; 112 SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const; 113 SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, 114 SDValue RHS, DAGCombinerInfo &DCI) const; 115 116 SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, 117 SDValue N) const; 118 SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; 119 120 TargetLowering::NegatibleCost 121 getConstantNegateCost(const ConstantFPSDNode *C) const; 122 123 bool isConstantCostlierToNegate(SDValue N) const; 124 bool isConstantCheaperToNegate(SDValue N) const; 125 SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const; 126 SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const; 127 SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; 128 129 static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); 130 131 virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, 132 SelectionDAG &DAG) const; 133 134 /// Return 64-bit value Op as two 32-bit integers. 135 std::pair<SDValue, SDValue> split64BitValue(SDValue Op, 136 SelectionDAG &DAG) const; 137 SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const; 138 SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const; 139 140 /// Split a vector type into two parts. The first part is a power of two 141 /// vector. The second part is whatever is left over, and is a scalar if it 142 /// would otherwise be a 1-vector. 143 std::pair<EVT, EVT> getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const; 144 145 /// Split a vector value into two parts of types LoVT and HiVT. HiVT could be 146 /// scalar. 147 std::pair<SDValue, SDValue> splitVector(const SDValue &N, const SDLoc &DL, 148 const EVT &LoVT, const EVT &HighVT, 149 SelectionDAG &DAG) const; 150 151 /// Split a vector load into 2 loads of half the vector. 152 SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 153 154 /// Widen a suitably aligned v3 load. For all other cases, split the input 155 /// vector load. 156 SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 157 158 /// Split a vector store into 2 stores of half the vector. 159 SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; 160 161 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 162 SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; 163 SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; 164 SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const; 165 void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, 166 SmallVectorImpl<SDValue> &Results) const; 167 168 void analyzeFormalArgumentsCompute( 169 CCState &State, 170 const SmallVectorImpl<ISD::InputArg> &Ins) const; 171 172 public: 173 AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); 174 175 bool mayIgnoreSignedZero(SDValue Op) const; 176 177 static inline SDValue stripBitcast(SDValue Val) { 178 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 179 } 180 181 static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc); 182 static bool allUsesHaveSourceMods(const SDNode *N, 183 unsigned CostThreshold = 4); 184 bool isFAbsFree(EVT VT) const override; 185 bool isFNegFree(EVT VT) const override; 186 bool isTruncateFree(EVT Src, EVT Dest) const override; 187 bool isTruncateFree(Type *Src, Type *Dest) const override; 188 189 bool isZExtFree(Type *Src, Type *Dest) const override; 190 bool isZExtFree(EVT Src, EVT Dest) const override; 191 192 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 193 bool LegalOperations, bool ForCodeSize, 194 NegatibleCost &Cost, 195 unsigned Depth) const override; 196 197 bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override; 198 199 bool isDesirableToCommuteWithShift(const SDNode *N, 200 CombineLevel Level) const override; 201 202 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 203 ISD::NodeType ExtendKind) const override; 204 205 MVT getVectorIdxTy(const DataLayout &) const override; 206 bool isSelectSupported(SelectSupportKind) const override; 207 208 bool isFPImmLegal(const APFloat &Imm, EVT VT, 209 bool ForCodeSize) const override; 210 bool ShouldShrinkFPConstant(EVT VT) const override; 211 bool shouldReduceLoadWidth(SDNode *Load, 212 ISD::LoadExtType ExtType, 213 EVT ExtVT) const override; 214 215 bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG, 216 const MachineMemOperand &MMO) const final; 217 218 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, 219 unsigned NumElem, 220 unsigned AS) const override; 221 bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override; 222 bool isCheapToSpeculateCttz(Type *Ty) const override; 223 bool isCheapToSpeculateCtlz(Type *Ty) const override; 224 225 bool isSDNodeAlwaysUniform(const SDNode *N) const override; 226 static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); 227 static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); 228 229 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 230 const SmallVectorImpl<ISD::OutputArg> &Outs, 231 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 232 SelectionDAG &DAG) const override; 233 234 SDValue addTokenForArgument(SDValue Chain, 235 SelectionDAG &DAG, 236 MachineFrameInfo &MFI, 237 int ClobberedFI) const; 238 239 SDValue lowerUnhandledCall(CallLoweringInfo &CLI, 240 SmallVectorImpl<SDValue> &InVals, 241 StringRef Reason) const; 242 SDValue LowerCall(CallLoweringInfo &CLI, 243 SmallVectorImpl<SDValue> &InVals) const override; 244 245 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, 246 SelectionDAG &DAG) const; 247 248 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 249 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 250 void ReplaceNodeResults(SDNode * N, 251 SmallVectorImpl<SDValue> &Results, 252 SelectionDAG &DAG) const override; 253 254 SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS, 255 SDValue RHS, SDValue True, SDValue False, 256 SDValue CC, DAGCombinerInfo &DCI) const; 257 258 SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, 259 SDValue RHS, SDValue True, SDValue False, 260 SDValue CC, DAGCombinerInfo &DCI) const; 261 262 const char* getTargetNodeName(unsigned Opcode) const override; 263 264 // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for 265 // AMDGPU. Commit r319036, 266 // (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6) 267 // turned on MergeConsecutiveStores() before Instruction Selection for all 268 // targets. Enough AMDGPU compiles go into an infinite loop ( 269 // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges; 270 // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for 271 // now. 272 bool mergeStoresAfterLegalization(EVT) const override { return false; } 273 274 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { 275 return true; 276 } 277 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 278 int &RefinementSteps, bool &UseOneConstNR, 279 bool Reciprocal) const override; 280 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 281 int &RefinementSteps) const override; 282 283 virtual SDNode *PostISelFolding(MachineSDNode *N, 284 SelectionDAG &DAG) const = 0; 285 286 /// Determine which of the bits specified in \p Mask are known to be 287 /// either zero or one and return them in the \p KnownZero and \p KnownOne 288 /// bitsets. 289 void computeKnownBitsForTargetNode(const SDValue Op, 290 KnownBits &Known, 291 const APInt &DemandedElts, 292 const SelectionDAG &DAG, 293 unsigned Depth = 0) const override; 294 295 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, 296 const SelectionDAG &DAG, 297 unsigned Depth = 0) const override; 298 299 unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, 300 Register R, 301 const APInt &DemandedElts, 302 const MachineRegisterInfo &MRI, 303 unsigned Depth = 0) const override; 304 305 bool isKnownNeverNaNForTargetNode(SDValue Op, 306 const SelectionDAG &DAG, 307 bool SNaN = false, 308 unsigned Depth = 0) const override; 309 310 bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, 311 Register N1) const override; 312 313 /// Helper function that adds Reg to the LiveIn list of the DAG's 314 /// MachineFunction. 315 /// 316 /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise 317 /// a copy from the register. 318 SDValue CreateLiveInRegister(SelectionDAG &DAG, 319 const TargetRegisterClass *RC, 320 Register Reg, EVT VT, 321 const SDLoc &SL, 322 bool RawReg = false) const; 323 SDValue CreateLiveInRegister(SelectionDAG &DAG, 324 const TargetRegisterClass *RC, 325 Register Reg, EVT VT) const { 326 return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode())); 327 } 328 329 // Returns the raw live in register rather than a copy from it. 330 SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, 331 const TargetRegisterClass *RC, 332 Register Reg, EVT VT) const { 333 return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true); 334 } 335 336 /// Similar to CreateLiveInRegister, except value maybe loaded from a stack 337 /// slot rather than passed in a register. 338 SDValue loadStackInputValue(SelectionDAG &DAG, 339 EVT VT, 340 const SDLoc &SL, 341 int64_t Offset) const; 342 343 SDValue storeStackInputValue(SelectionDAG &DAG, 344 const SDLoc &SL, 345 SDValue Chain, 346 SDValue ArgVal, 347 int64_t Offset) const; 348 349 SDValue loadInputValue(SelectionDAG &DAG, 350 const TargetRegisterClass *RC, 351 EVT VT, const SDLoc &SL, 352 const ArgDescriptor &Arg) const; 353 354 enum ImplicitParameter { 355 FIRST_IMPLICIT, 356 PRIVATE_BASE, 357 SHARED_BASE, 358 QUEUE_PTR, 359 }; 360 361 /// Helper function that returns the byte offset of the given 362 /// type of implicit parameter. 363 uint32_t getImplicitParameterOffset(const MachineFunction &MF, 364 const ImplicitParameter Param) const; 365 uint32_t getImplicitParameterOffset(const uint64_t ExplicitKernArgSize, 366 const ImplicitParameter Param) const; 367 368 MVT getFenceOperandTy(const DataLayout &DL) const override { 369 return MVT::i32; 370 } 371 372 AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; 373 374 bool isConstantUnsignedBitfieldExtractLegal(unsigned Opc, LLT Ty1, 375 LLT Ty2) const override; 376 377 bool shouldSinkOperands(Instruction *I, 378 SmallVectorImpl<Use *> &Ops) const override; 379 }; 380 381 namespace AMDGPUISD { 382 383 enum NodeType : unsigned { 384 // AMDIL ISD Opcodes 385 FIRST_NUMBER = ISD::BUILTIN_OP_END, 386 UMUL, // 32bit unsigned multiplication 387 BRANCH_COND, 388 // End AMDIL ISD Opcodes 389 390 // Function call. 391 CALL, 392 TC_RETURN, 393 TC_RETURN_GFX, 394 TRAP, 395 396 // Masked control flow nodes. 397 IF, 398 ELSE, 399 LOOP, 400 401 // A uniform kernel return that terminates the wavefront. 402 ENDPGM, 403 404 // s_endpgm, but we may want to insert it in the middle of the block. 405 ENDPGM_TRAP, 406 407 // Return to a shader part's epilog code. 408 RETURN_TO_EPILOG, 409 410 // Return with values from a non-entry function. 411 RET_GLUE, 412 413 DWORDADDR, 414 FRACT, 415 416 /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output 417 /// modifier behavior with dx10_enable. 418 CLAMP, 419 420 // This is SETCC with the full mask result which is used for a compare with a 421 // result bit per item in the wavefront. 422 SETCC, 423 SETREG, 424 425 DENORM_MODE, 426 427 // FP ops with input and output chain. 428 FMA_W_CHAIN, 429 FMUL_W_CHAIN, 430 431 // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. 432 // Denormals handled on some parts. 433 COS_HW, 434 SIN_HW, 435 FMAX_LEGACY, 436 FMIN_LEGACY, 437 438 FMAX3, 439 SMAX3, 440 UMAX3, 441 FMIN3, 442 SMIN3, 443 UMIN3, 444 FMED3, 445 SMED3, 446 UMED3, 447 FDOT2, 448 URECIP, 449 DIV_SCALE, 450 DIV_FMAS, 451 DIV_FIXUP, 452 // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is 453 // treated as an illegal operation. 454 FMAD_FTZ, 455 456 // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. 457 // For f64, max error 2^29 ULP, handles denormals. 458 RCP, 459 RSQ, 460 RCP_LEGACY, 461 RCP_IFLAG, 462 463 // log2, no denormal handling for f32. 464 LOG, 465 466 // exp2, no denormal handling for f32. 467 EXP, 468 469 FMUL_LEGACY, 470 RSQ_CLAMP, 471 FP_CLASS, 472 DOT4, 473 CARRY, 474 BORROW, 475 BFE_U32, // Extract range of bits with zero extension to 32-bits. 476 BFE_I32, // Extract range of bits with sign extension to 32-bits. 477 BFI, // (src0 & src1) | (~src0 & src2) 478 BFM, // Insert a range of bits into a 32-bit word. 479 FFBH_U32, // ctlz with -1 if input is zero. 480 FFBH_I32, 481 FFBL_B32, // cttz with -1 if input is zero. 482 MUL_U24, 483 MUL_I24, 484 MULHI_U24, 485 MULHI_I24, 486 MAD_U24, 487 MAD_I24, 488 MAD_U64_U32, 489 MAD_I64_I32, 490 PERM, 491 TEXTURE_FETCH, 492 R600_EXPORT, 493 CONST_ADDRESS, 494 REGISTER_LOAD, 495 REGISTER_STORE, 496 SAMPLE, 497 SAMPLEB, 498 SAMPLED, 499 SAMPLEL, 500 501 // These cvt_f32_ubyte* nodes need to remain consecutive and in order. 502 CVT_F32_UBYTE0, 503 CVT_F32_UBYTE1, 504 CVT_F32_UBYTE2, 505 CVT_F32_UBYTE3, 506 507 // Convert two float 32 numbers into a single register holding two packed f16 508 // with round to zero. 509 CVT_PKRTZ_F16_F32, 510 CVT_PKNORM_I16_F32, 511 CVT_PKNORM_U16_F32, 512 CVT_PK_I16_I32, 513 CVT_PK_U16_U32, 514 515 // Same as the standard node, except the high bits of the resulting integer 516 // are known 0. 517 FP_TO_FP16, 518 519 /// This node is for VLIW targets and it is used to represent a vector 520 /// that is stored in consecutive registers with the same channel. 521 /// For example: 522 /// |X |Y|Z|W| 523 /// T0|v.x| | | | 524 /// T1|v.y| | | | 525 /// T2|v.z| | | | 526 /// T3|v.w| | | | 527 BUILD_VERTICAL_VECTOR, 528 /// Pointer to the start of the shader's constant data. 529 CONST_DATA_PTR, 530 PC_ADD_REL_OFFSET, 531 LDS, 532 FPTRUNC_ROUND_UPWARD, 533 FPTRUNC_ROUND_DOWNWARD, 534 535 DUMMY_CHAIN, 536 FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, 537 LOAD_D16_HI, 538 LOAD_D16_LO, 539 LOAD_D16_HI_I8, 540 LOAD_D16_HI_U8, 541 LOAD_D16_LO_I8, 542 LOAD_D16_LO_U8, 543 544 STORE_MSKOR, 545 LOAD_CONSTANT, 546 TBUFFER_STORE_FORMAT, 547 TBUFFER_STORE_FORMAT_D16, 548 TBUFFER_LOAD_FORMAT, 549 TBUFFER_LOAD_FORMAT_D16, 550 DS_ORDERED_COUNT, 551 ATOMIC_CMP_SWAP, 552 ATOMIC_LOAD_FMIN, 553 ATOMIC_LOAD_FMAX, 554 BUFFER_LOAD, 555 BUFFER_LOAD_UBYTE, 556 BUFFER_LOAD_USHORT, 557 BUFFER_LOAD_BYTE, 558 BUFFER_LOAD_SHORT, 559 BUFFER_LOAD_FORMAT, 560 BUFFER_LOAD_FORMAT_TFE, 561 BUFFER_LOAD_FORMAT_D16, 562 SBUFFER_LOAD, 563 BUFFER_STORE, 564 BUFFER_STORE_BYTE, 565 BUFFER_STORE_SHORT, 566 BUFFER_STORE_FORMAT, 567 BUFFER_STORE_FORMAT_D16, 568 BUFFER_ATOMIC_SWAP, 569 BUFFER_ATOMIC_ADD, 570 BUFFER_ATOMIC_SUB, 571 BUFFER_ATOMIC_SMIN, 572 BUFFER_ATOMIC_UMIN, 573 BUFFER_ATOMIC_SMAX, 574 BUFFER_ATOMIC_UMAX, 575 BUFFER_ATOMIC_AND, 576 BUFFER_ATOMIC_OR, 577 BUFFER_ATOMIC_XOR, 578 BUFFER_ATOMIC_INC, 579 BUFFER_ATOMIC_DEC, 580 BUFFER_ATOMIC_CMPSWAP, 581 BUFFER_ATOMIC_CSUB, 582 BUFFER_ATOMIC_FADD, 583 BUFFER_ATOMIC_FMIN, 584 BUFFER_ATOMIC_FMAX, 585 586 LAST_AMDGPU_ISD_NUMBER 587 }; 588 589 } // End namespace AMDGPUISD 590 591 } // End namespace llvm 592 593 #endif 594