1 //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition of the TargetLowering class that is common 11 /// to all AMD GPUs. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H 16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H 17 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/TargetLowering.h" 20 21 namespace llvm { 22 23 class AMDGPUMachineFunction; 24 class AMDGPUSubtarget; 25 struct ArgDescriptor; 26 27 class AMDGPUTargetLowering : public TargetLowering { 28 private: 29 const AMDGPUSubtarget *Subtarget; 30 31 /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been 32 /// legalized from a smaller type VT. Need to match pre-legalized type because 33 /// the generic legalization inserts the add/sub between the select and 34 /// compare. 35 SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const; 36 37 public: 38 /// \returns The minimum number of bits needed to store the value of \Op as an 39 /// unsigned integer. Truncating to this size and then zero-extending to the 40 /// original size will not change the value. 41 static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG); 42 43 /// \returns The minimum number of bits needed to store the value of \Op as a 44 /// signed integer. Truncating to this size and then sign-extending to the 45 /// original size will not change the value. 46 static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); 47 48 protected: 49 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 50 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 51 /// Split a vector store into multiple scalar stores. 52 /// \returns The resulting chain. 53 54 SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; 55 SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; 56 SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; 57 SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; 58 SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; 59 60 SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const; 61 SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; 62 SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; 63 64 static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags); 65 static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, 66 SDNodeFlags Flags); 67 SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op, 68 SDNodeFlags Flags) const; 69 SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const; 70 std::pair<SDValue, SDValue> getScaledLogInput(SelectionDAG &DAG, 71 const SDLoc SL, SDValue Op, 72 SDNodeFlags Flags) const; 73 74 SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const; 75 SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const; 76 SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const; 77 SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 78 bool IsLog10, SDNodeFlags Flags) const; 79 SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const; 80 81 SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 82 SDNodeFlags Flags) const; 83 SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 84 SDNodeFlags Flags) const; 85 SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const; 86 87 SDValue lowerCTLZResults(SDValue Op, SelectionDAG &DAG) const; 88 89 SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const; 90 91 SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const; 92 SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const; 93 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 94 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 95 96 SDValue LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const; 97 SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const; 98 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 99 100 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 101 102 protected: 103 bool shouldCombineMemoryType(EVT VT) const; 104 SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; 105 SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; 106 SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; 107 SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const; 108 109 SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, 110 unsigned Opc, SDValue LHS, 111 uint32_t ValLo, uint32_t ValHi) const; 112 SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; 113 SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const; 114 SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; 115 SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const; 116 SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; 117 SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const; 118 SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const; 119 SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const; 120 SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, 121 SDValue RHS, DAGCombinerInfo &DCI) const; 122 123 SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, 124 SDValue N) const; 125 SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; 126 127 TargetLowering::NegatibleCost 128 getConstantNegateCost(const ConstantFPSDNode *C) const; 129 130 bool isConstantCostlierToNegate(SDValue N) const; 131 bool isConstantCheaperToNegate(SDValue N) const; 132 SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const; 133 SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const; 134 SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; 135 136 static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); 137 138 virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, 139 SelectionDAG &DAG) const; 140 141 /// Return 64-bit value Op as two 32-bit integers. 142 std::pair<SDValue, SDValue> split64BitValue(SDValue Op, 143 SelectionDAG &DAG) const; 144 SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const; 145 SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const; 146 147 /// Split a vector type into two parts. The first part is a power of two 148 /// vector. The second part is whatever is left over, and is a scalar if it 149 /// would otherwise be a 1-vector. 150 std::pair<EVT, EVT> getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const; 151 152 /// Split a vector value into two parts of types LoVT and HiVT. HiVT could be 153 /// scalar. 154 std::pair<SDValue, SDValue> splitVector(const SDValue &N, const SDLoc &DL, 155 const EVT &LoVT, const EVT &HighVT, 156 SelectionDAG &DAG) const; 157 158 /// Split a vector load into 2 loads of half the vector. 159 SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 160 161 /// Widen a suitably aligned v3 load. For all other cases, split the input 162 /// vector load. 163 SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 164 165 /// Split a vector store into 2 stores of half the vector. 166 SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; 167 168 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 169 SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; 170 SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; 171 SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const; 172 void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, 173 SmallVectorImpl<SDValue> &Results) const; 174 175 void analyzeFormalArgumentsCompute( 176 CCState &State, 177 const SmallVectorImpl<ISD::InputArg> &Ins) const; 178 179 public: 180 AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); 181 182 bool mayIgnoreSignedZero(SDValue Op) const; 183 184 static inline SDValue stripBitcast(SDValue Val) { 185 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 186 } 187 188 static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc); 189 static bool allUsesHaveSourceMods(const SDNode *N, 190 unsigned CostThreshold = 4); 191 bool isFAbsFree(EVT VT) const override; 192 bool isFNegFree(EVT VT) const override; 193 bool isTruncateFree(EVT Src, EVT Dest) const override; 194 bool isTruncateFree(Type *Src, Type *Dest) const override; 195 196 bool isZExtFree(Type *Src, Type *Dest) const override; 197 bool isZExtFree(EVT Src, EVT Dest) const override; 198 199 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 200 bool LegalOperations, bool ForCodeSize, 201 NegatibleCost &Cost, 202 unsigned Depth) const override; 203 204 bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override; 205 206 bool isDesirableToCommuteWithShift(const SDNode *N, 207 CombineLevel Level) const override; 208 209 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 210 ISD::NodeType ExtendKind) const override; 211 212 MVT getVectorIdxTy(const DataLayout &) const override; 213 bool isSelectSupported(SelectSupportKind) const override; 214 215 bool isFPImmLegal(const APFloat &Imm, EVT VT, 216 bool ForCodeSize) const override; 217 bool ShouldShrinkFPConstant(EVT VT) const override; 218 bool shouldReduceLoadWidth(SDNode *Load, 219 ISD::LoadExtType ExtType, 220 EVT ExtVT) const override; 221 222 bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG, 223 const MachineMemOperand &MMO) const final; 224 225 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, 226 unsigned NumElem, 227 unsigned AS) const override; 228 bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override; 229 bool isCheapToSpeculateCttz(Type *Ty) const override; 230 bool isCheapToSpeculateCtlz(Type *Ty) const override; 231 232 bool isSDNodeAlwaysUniform(const SDNode *N) const override; 233 234 // FIXME: This hook should not exist 235 AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { 236 return AtomicExpansionKind::None; 237 } 238 239 AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { 240 return AtomicExpansionKind::None; 241 } 242 243 AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *) const override { 244 return AtomicExpansionKind::None; 245 } 246 247 static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); 248 static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); 249 250 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 251 const SmallVectorImpl<ISD::OutputArg> &Outs, 252 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 253 SelectionDAG &DAG) const override; 254 255 SDValue addTokenForArgument(SDValue Chain, 256 SelectionDAG &DAG, 257 MachineFrameInfo &MFI, 258 int ClobberedFI) const; 259 260 SDValue lowerUnhandledCall(CallLoweringInfo &CLI, 261 SmallVectorImpl<SDValue> &InVals, 262 StringRef Reason) const; 263 SDValue LowerCall(CallLoweringInfo &CLI, 264 SmallVectorImpl<SDValue> &InVals) const override; 265 266 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 267 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 268 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 269 void ReplaceNodeResults(SDNode * N, 270 SmallVectorImpl<SDValue> &Results, 271 SelectionDAG &DAG) const override; 272 273 SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS, 274 SDValue RHS, SDValue True, SDValue False, 275 SDValue CC, DAGCombinerInfo &DCI) const; 276 277 SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, 278 SDValue RHS, SDValue True, SDValue False, 279 SDValue CC, DAGCombinerInfo &DCI) const; 280 281 const char* getTargetNodeName(unsigned Opcode) const override; 282 283 // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for 284 // AMDGPU. Commit r319036, 285 // (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6) 286 // turned on MergeConsecutiveStores() before Instruction Selection for all 287 // targets. Enough AMDGPU compiles go into an infinite loop ( 288 // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges; 289 // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for 290 // now. 291 bool mergeStoresAfterLegalization(EVT) const override { return false; } 292 293 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { 294 return true; 295 } 296 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 297 int &RefinementSteps, bool &UseOneConstNR, 298 bool Reciprocal) const override; 299 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 300 int &RefinementSteps) const override; 301 302 virtual SDNode *PostISelFolding(MachineSDNode *N, 303 SelectionDAG &DAG) const = 0; 304 305 /// Determine which of the bits specified in \p Mask are known to be 306 /// either zero or one and return them in the \p KnownZero and \p KnownOne 307 /// bitsets. 308 void computeKnownBitsForTargetNode(const SDValue Op, 309 KnownBits &Known, 310 const APInt &DemandedElts, 311 const SelectionDAG &DAG, 312 unsigned Depth = 0) const override; 313 314 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, 315 const SelectionDAG &DAG, 316 unsigned Depth = 0) const override; 317 318 unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, 319 Register R, 320 const APInt &DemandedElts, 321 const MachineRegisterInfo &MRI, 322 unsigned Depth = 0) const override; 323 324 bool isKnownNeverNaNForTargetNode(SDValue Op, 325 const SelectionDAG &DAG, 326 bool SNaN = false, 327 unsigned Depth = 0) const override; 328 329 bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, 330 Register N1) const override; 331 332 /// Helper function that adds Reg to the LiveIn list of the DAG's 333 /// MachineFunction. 334 /// 335 /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise 336 /// a copy from the register. 337 SDValue CreateLiveInRegister(SelectionDAG &DAG, 338 const TargetRegisterClass *RC, 339 Register Reg, EVT VT, 340 const SDLoc &SL, 341 bool RawReg = false) const; 342 SDValue CreateLiveInRegister(SelectionDAG &DAG, 343 const TargetRegisterClass *RC, 344 Register Reg, EVT VT) const { 345 return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode())); 346 } 347 348 // Returns the raw live in register rather than a copy from it. 349 SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, 350 const TargetRegisterClass *RC, 351 Register Reg, EVT VT) const { 352 return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true); 353 } 354 355 /// Similar to CreateLiveInRegister, except value maybe loaded from a stack 356 /// slot rather than passed in a register. 357 SDValue loadStackInputValue(SelectionDAG &DAG, 358 EVT VT, 359 const SDLoc &SL, 360 int64_t Offset) const; 361 362 SDValue storeStackInputValue(SelectionDAG &DAG, 363 const SDLoc &SL, 364 SDValue Chain, 365 SDValue ArgVal, 366 int64_t Offset) const; 367 368 SDValue loadInputValue(SelectionDAG &DAG, 369 const TargetRegisterClass *RC, 370 EVT VT, const SDLoc &SL, 371 const ArgDescriptor &Arg) const; 372 373 enum ImplicitParameter { 374 FIRST_IMPLICIT, 375 PRIVATE_BASE, 376 SHARED_BASE, 377 QUEUE_PTR, 378 }; 379 380 /// Helper function that returns the byte offset of the given 381 /// type of implicit parameter. 382 uint32_t getImplicitParameterOffset(const MachineFunction &MF, 383 const ImplicitParameter Param) const; 384 uint32_t getImplicitParameterOffset(const uint64_t ExplicitKernArgSize, 385 const ImplicitParameter Param) const; 386 387 MVT getFenceOperandTy(const DataLayout &DL) const override { 388 return MVT::i32; 389 } 390 391 AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override; 392 393 bool shouldSinkOperands(Instruction *I, 394 SmallVectorImpl<Use *> &Ops) const override; 395 }; 396 397 namespace AMDGPUISD { 398 399 enum NodeType : unsigned { 400 // AMDIL ISD Opcodes 401 FIRST_NUMBER = ISD::BUILTIN_OP_END, 402 UMUL, // 32bit unsigned multiplication 403 BRANCH_COND, 404 // End AMDIL ISD Opcodes 405 406 // Function call. 407 CALL, 408 TC_RETURN, 409 TC_RETURN_GFX, 410 TC_RETURN_CHAIN, 411 TRAP, 412 413 // Masked control flow nodes. 414 IF, 415 ELSE, 416 LOOP, 417 418 // A uniform kernel return that terminates the wavefront. 419 ENDPGM, 420 421 // s_endpgm, but we may want to insert it in the middle of the block. 422 ENDPGM_TRAP, 423 424 // "s_trap 2" equivalent on hardware that does not support it. 425 SIMULATED_TRAP, 426 427 // Return to a shader part's epilog code. 428 RETURN_TO_EPILOG, 429 430 // Return with values from a non-entry function. 431 RET_GLUE, 432 433 // Convert a unswizzled wave uniform stack address to an address compatible 434 // with a vector offset for use in stack access. 435 WAVE_ADDRESS, 436 437 DWORDADDR, 438 FRACT, 439 440 /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output 441 /// modifier behavior with dx10_enable. 442 CLAMP, 443 444 // This is SETCC with the full mask result which is used for a compare with a 445 // result bit per item in the wavefront. 446 SETCC, 447 SETREG, 448 449 DENORM_MODE, 450 451 // FP ops with input and output chain. 452 FMA_W_CHAIN, 453 FMUL_W_CHAIN, 454 455 // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. 456 // Denormals handled on some parts. 457 COS_HW, 458 SIN_HW, 459 FMAX_LEGACY, 460 FMIN_LEGACY, 461 462 FMAX3, 463 SMAX3, 464 UMAX3, 465 FMIN3, 466 SMIN3, 467 UMIN3, 468 FMED3, 469 SMED3, 470 UMED3, 471 FMAXIMUM3, 472 FMINIMUM3, 473 FDOT2, 474 URECIP, 475 DIV_SCALE, 476 DIV_FMAS, 477 DIV_FIXUP, 478 // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is 479 // treated as an illegal operation. 480 FMAD_FTZ, 481 482 // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. 483 // For f64, max error 2^29 ULP, handles denormals. 484 RCP, 485 RSQ, 486 RCP_LEGACY, 487 RCP_IFLAG, 488 489 // log2, no denormal handling for f32. 490 LOG, 491 492 // exp2, no denormal handling for f32. 493 EXP, 494 495 FMUL_LEGACY, 496 RSQ_CLAMP, 497 FP_CLASS, 498 DOT4, 499 CARRY, 500 BORROW, 501 BFE_U32, // Extract range of bits with zero extension to 32-bits. 502 BFE_I32, // Extract range of bits with sign extension to 32-bits. 503 BFI, // (src0 & src1) | (~src0 & src2) 504 BFM, // Insert a range of bits into a 32-bit word. 505 FFBH_U32, // ctlz with -1 if input is zero. 506 FFBH_I32, 507 FFBL_B32, // cttz with -1 if input is zero. 508 MUL_U24, 509 MUL_I24, 510 MULHI_U24, 511 MULHI_I24, 512 MAD_U24, 513 MAD_I24, 514 MAD_U64_U32, 515 MAD_I64_I32, 516 PERM, 517 TEXTURE_FETCH, 518 R600_EXPORT, 519 CONST_ADDRESS, 520 REGISTER_LOAD, 521 REGISTER_STORE, 522 SAMPLE, 523 SAMPLEB, 524 SAMPLED, 525 SAMPLEL, 526 527 // These cvt_f32_ubyte* nodes need to remain consecutive and in order. 528 CVT_F32_UBYTE0, 529 CVT_F32_UBYTE1, 530 CVT_F32_UBYTE2, 531 CVT_F32_UBYTE3, 532 533 // Convert two float 32 numbers into a single register holding two packed f16 534 // with round to zero. 535 CVT_PKRTZ_F16_F32, 536 CVT_PKNORM_I16_F32, 537 CVT_PKNORM_U16_F32, 538 CVT_PK_I16_I32, 539 CVT_PK_U16_U32, 540 541 // Same as the standard node, except the high bits of the resulting integer 542 // are known 0. 543 FP_TO_FP16, 544 545 /// This node is for VLIW targets and it is used to represent a vector 546 /// that is stored in consecutive registers with the same channel. 547 /// For example: 548 /// |X |Y|Z|W| 549 /// T0|v.x| | | | 550 /// T1|v.y| | | | 551 /// T2|v.z| | | | 552 /// T3|v.w| | | | 553 BUILD_VERTICAL_VECTOR, 554 /// Pointer to the start of the shader's constant data. 555 CONST_DATA_PTR, 556 PC_ADD_REL_OFFSET, 557 LDS, 558 FPTRUNC_ROUND_UPWARD, 559 FPTRUNC_ROUND_DOWNWARD, 560 561 DUMMY_CHAIN, 562 FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, 563 LOAD_D16_HI, 564 LOAD_D16_LO, 565 LOAD_D16_HI_I8, 566 LOAD_D16_HI_U8, 567 LOAD_D16_LO_I8, 568 LOAD_D16_LO_U8, 569 570 STORE_MSKOR, 571 LOAD_CONSTANT, 572 TBUFFER_STORE_FORMAT, 573 TBUFFER_STORE_FORMAT_D16, 574 TBUFFER_LOAD_FORMAT, 575 TBUFFER_LOAD_FORMAT_D16, 576 DS_ORDERED_COUNT, 577 ATOMIC_CMP_SWAP, 578 BUFFER_LOAD, 579 BUFFER_LOAD_UBYTE, 580 BUFFER_LOAD_USHORT, 581 BUFFER_LOAD_BYTE, 582 BUFFER_LOAD_SHORT, 583 BUFFER_LOAD_TFE, 584 BUFFER_LOAD_UBYTE_TFE, 585 BUFFER_LOAD_USHORT_TFE, 586 BUFFER_LOAD_BYTE_TFE, 587 BUFFER_LOAD_SHORT_TFE, 588 BUFFER_LOAD_FORMAT, 589 BUFFER_LOAD_FORMAT_TFE, 590 BUFFER_LOAD_FORMAT_D16, 591 SBUFFER_LOAD, 592 SBUFFER_LOAD_BYTE, 593 SBUFFER_LOAD_UBYTE, 594 SBUFFER_LOAD_SHORT, 595 SBUFFER_LOAD_USHORT, 596 BUFFER_STORE, 597 BUFFER_STORE_BYTE, 598 BUFFER_STORE_SHORT, 599 BUFFER_STORE_FORMAT, 600 BUFFER_STORE_FORMAT_D16, 601 BUFFER_ATOMIC_SWAP, 602 BUFFER_ATOMIC_ADD, 603 BUFFER_ATOMIC_SUB, 604 BUFFER_ATOMIC_SMIN, 605 BUFFER_ATOMIC_UMIN, 606 BUFFER_ATOMIC_SMAX, 607 BUFFER_ATOMIC_UMAX, 608 BUFFER_ATOMIC_AND, 609 BUFFER_ATOMIC_OR, 610 BUFFER_ATOMIC_XOR, 611 BUFFER_ATOMIC_INC, 612 BUFFER_ATOMIC_DEC, 613 BUFFER_ATOMIC_CMPSWAP, 614 BUFFER_ATOMIC_CSUB, 615 BUFFER_ATOMIC_FADD, 616 BUFFER_ATOMIC_FMIN, 617 BUFFER_ATOMIC_FMAX, 618 BUFFER_ATOMIC_COND_SUB_U32, 619 620 LAST_AMDGPU_ISD_NUMBER 621 }; 622 623 } // End namespace AMDGPUISD 624 625 } // End namespace llvm 626 627 #endif 628