1 //===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Interface definition of the TargetLowering class that is common 11 /// to all AMD GPUs. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H 16 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H 17 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/TargetLowering.h" 20 21 namespace llvm { 22 23 class AMDGPUMachineFunction; 24 class AMDGPUSubtarget; 25 struct ArgDescriptor; 26 27 class AMDGPUTargetLowering : public TargetLowering { 28 private: 29 const AMDGPUSubtarget *Subtarget; 30 31 /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been 32 /// legalized from a smaller type VT. Need to match pre-legalized type because 33 /// the generic legalization inserts the add/sub between the select and 34 /// compare. 35 SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const; 36 37 public: 38 /// \returns The minimum number of bits needed to store the value of \Op as an 39 /// unsigned integer. Truncating to this size and then zero-extending to the 40 /// original size will not change the value. 41 static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG); 42 43 /// \returns The minimum number of bits needed to store the value of \Op as a 44 /// signed integer. Truncating to this size and then sign-extending to the 45 /// original size will not change the value. 46 static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG); 47 48 protected: 49 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 50 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 51 /// Split a vector store into multiple scalar stores. 52 /// \returns The resulting chain. 53 54 SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const; 55 SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; 56 SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; 57 SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; 58 SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; 59 60 SDValue LowerFROUNDEVEN(SDValue Op, SelectionDAG &DAG) const; 61 SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const; 62 SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; 63 64 static bool allowApproxFunc(const SelectionDAG &DAG, SDNodeFlags Flags); 65 static bool needsDenormHandlingF32(const SelectionDAG &DAG, SDValue Src, 66 SDNodeFlags Flags); 67 SDValue getIsLtSmallestNormal(SelectionDAG &DAG, SDValue Op, 68 SDNodeFlags Flags) const; 69 SDValue getIsFinite(SelectionDAG &DAG, SDValue Op, SDNodeFlags Flags) const; 70 std::pair<SDValue, SDValue> getScaledLogInput(SelectionDAG &DAG, 71 const SDLoc SL, SDValue Op, 72 SDNodeFlags Flags) const; 73 74 SDValue LowerFLOG2(SDValue Op, SelectionDAG &DAG) const; 75 SDValue LowerFLOGCommon(SDValue Op, SelectionDAG &DAG) const; 76 SDValue LowerFLOG10(SDValue Op, SelectionDAG &DAG) const; 77 SDValue LowerFLOGUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 78 bool IsLog10, SDNodeFlags Flags) const; 79 SDValue lowerFEXP2(SDValue Op, SelectionDAG &DAG) const; 80 81 SDValue lowerFEXPUnsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 82 SDNodeFlags Flags) const; 83 SDValue lowerFEXP10Unsafe(SDValue Op, const SDLoc &SL, SelectionDAG &DAG, 84 SDNodeFlags Flags) const; 85 SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const; 86 87 SDValue lowerCTLZResults(SDValue Op, SelectionDAG &DAG) const; 88 89 SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const; 90 91 SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const; 92 SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const; 93 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 94 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 95 96 SDValue LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, bool Signed) const; 97 SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const; 98 SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const; 99 100 SDValue LowerF64ToF16Safe(SDValue Src, const SDLoc &DL, 101 SelectionDAG &DAG) const; 102 103 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 104 105 protected: 106 bool shouldCombineMemoryType(EVT VT) const; 107 SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const; 108 SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const; 109 SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const; 110 SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const; 111 112 SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL, 113 unsigned Opc, SDValue LHS, 114 uint32_t ValLo, uint32_t ValHi) const; 115 SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; 116 SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const; 117 SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; 118 SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const; 119 SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; 120 SDValue performMulLoHiCombine(SDNode *N, DAGCombinerInfo &DCI) const; 121 SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const; 122 SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const; 123 SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS, 124 SDValue RHS, DAGCombinerInfo &DCI) const; 125 126 SDValue foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, 127 SDValue N) const; 128 SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const; 129 130 TargetLowering::NegatibleCost 131 getConstantNegateCost(const ConstantFPSDNode *C) const; 132 133 bool isConstantCostlierToNegate(SDValue N) const; 134 bool isConstantCheaperToNegate(SDValue N) const; 135 SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const; 136 SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const; 137 SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const; 138 139 static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); 140 141 virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, 142 SelectionDAG &DAG) const; 143 144 /// Return 64-bit value Op as two 32-bit integers. 145 std::pair<SDValue, SDValue> split64BitValue(SDValue Op, 146 SelectionDAG &DAG) const; 147 SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const; 148 SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const; 149 150 /// Split a vector type into two parts. The first part is a power of two 151 /// vector. The second part is whatever is left over, and is a scalar if it 152 /// would otherwise be a 1-vector. 153 std::pair<EVT, EVT> getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const; 154 155 /// Split a vector value into two parts of types LoVT and HiVT. HiVT could be 156 /// scalar. 157 std::pair<SDValue, SDValue> splitVector(const SDValue &N, const SDLoc &DL, 158 const EVT &LoVT, const EVT &HighVT, 159 SelectionDAG &DAG) const; 160 161 /// Split a vector load into 2 loads of half the vector. 162 SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 163 164 /// Widen a suitably aligned v3 load. For all other cases, split the input 165 /// vector load. 166 SDValue WidenOrSplitVectorLoad(SDValue Op, SelectionDAG &DAG) const; 167 168 /// Split a vector store into 2 stores of half the vector. 169 SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; 170 171 SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; 172 SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; 173 SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; 174 SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const; 175 void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, 176 SmallVectorImpl<SDValue> &Results) const; 177 178 void analyzeFormalArgumentsCompute( 179 CCState &State, 180 const SmallVectorImpl<ISD::InputArg> &Ins) const; 181 182 public: 183 AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI); 184 185 bool mayIgnoreSignedZero(SDValue Op) const; 186 187 static inline SDValue stripBitcast(SDValue Val) { 188 return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; 189 } 190 191 static bool shouldFoldFNegIntoSrc(SDNode *FNeg, SDValue FNegSrc); 192 static bool allUsesHaveSourceMods(const SDNode *N, 193 unsigned CostThreshold = 4); 194 bool isFAbsFree(EVT VT) const override; 195 bool isFNegFree(EVT VT) const override; 196 bool isTruncateFree(EVT Src, EVT Dest) const override; 197 bool isTruncateFree(Type *Src, Type *Dest) const override; 198 199 bool isZExtFree(Type *Src, Type *Dest) const override; 200 bool isZExtFree(EVT Src, EVT Dest) const override; 201 202 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, 203 bool LegalOperations, bool ForCodeSize, 204 NegatibleCost &Cost, 205 unsigned Depth) const override; 206 207 bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override; 208 209 bool isDesirableToCommuteWithShift(const SDNode *N, 210 CombineLevel Level) const override; 211 212 EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, 213 ISD::NodeType ExtendKind) const override; 214 215 unsigned getVectorIdxWidth(const DataLayout &) const override; 216 bool isSelectSupported(SelectSupportKind) const override; 217 218 bool isFPImmLegal(const APFloat &Imm, EVT VT, 219 bool ForCodeSize) const override; 220 bool ShouldShrinkFPConstant(EVT VT) const override; 221 bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtType, EVT ExtVT, 222 std::optional<unsigned> ByteOffset) const override; 223 224 bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG, 225 const MachineMemOperand &MMO) const final; 226 227 bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, 228 unsigned NumElem, 229 unsigned AS) const override; 230 bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override; 231 bool isCheapToSpeculateCttz(Type *Ty) const override; 232 bool isCheapToSpeculateCtlz(Type *Ty) const override; 233 234 bool isSDNodeAlwaysUniform(const SDNode *N) const override; 235 236 // FIXME: This hook should not exist 237 AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override { 238 return AtomicExpansionKind::None; 239 } 240 241 AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override { 242 return AtomicExpansionKind::None; 243 } 244 245 AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *) const override { 246 return AtomicExpansionKind::None; 247 } 248 249 static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg); 250 static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg); 251 252 SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, 253 const SmallVectorImpl<ISD::OutputArg> &Outs, 254 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL, 255 SelectionDAG &DAG) const override; 256 257 SDValue addTokenForArgument(SDValue Chain, 258 SelectionDAG &DAG, 259 MachineFrameInfo &MFI, 260 int ClobberedFI) const; 261 262 SDValue lowerUnhandledCall(CallLoweringInfo &CLI, 263 SmallVectorImpl<SDValue> &InVals, 264 StringRef Reason) const; 265 SDValue LowerCall(CallLoweringInfo &CLI, 266 SmallVectorImpl<SDValue> &InVals) const override; 267 268 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 269 SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; 270 SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; 271 void ReplaceNodeResults(SDNode * N, 272 SmallVectorImpl<SDValue> &Results, 273 SelectionDAG &DAG) const override; 274 275 SDValue combineFMinMaxLegacyImpl(const SDLoc &DL, EVT VT, SDValue LHS, 276 SDValue RHS, SDValue True, SDValue False, 277 SDValue CC, DAGCombinerInfo &DCI) const; 278 279 SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, 280 SDValue RHS, SDValue True, SDValue False, 281 SDValue CC, DAGCombinerInfo &DCI) const; 282 283 const char* getTargetNodeName(unsigned Opcode) const override; 284 285 // FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for 286 // AMDGPU. Commit r319036, 287 // (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6) 288 // turned on MergeConsecutiveStores() before Instruction Selection for all 289 // targets. Enough AMDGPU compiles go into an infinite loop ( 290 // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges; 291 // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for 292 // now. 293 bool mergeStoresAfterLegalization(EVT) const override { return false; } 294 295 bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { 296 return true; 297 } 298 SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 299 int &RefinementSteps, bool &UseOneConstNR, 300 bool Reciprocal) const override; 301 SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, 302 int &RefinementSteps) const override; 303 304 virtual SDNode *PostISelFolding(MachineSDNode *N, 305 SelectionDAG &DAG) const = 0; 306 307 /// Determine which of the bits specified in \p Mask are known to be 308 /// either zero or one and return them in the \p KnownZero and \p KnownOne 309 /// bitsets. 310 void computeKnownBitsForTargetNode(const SDValue Op, 311 KnownBits &Known, 312 const APInt &DemandedElts, 313 const SelectionDAG &DAG, 314 unsigned Depth = 0) const override; 315 316 unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, 317 const SelectionDAG &DAG, 318 unsigned Depth = 0) const override; 319 320 unsigned computeNumSignBitsForTargetInstr(GISelValueTracking &Analysis, 321 Register R, 322 const APInt &DemandedElts, 323 const MachineRegisterInfo &MRI, 324 unsigned Depth = 0) const override; 325 326 bool isKnownNeverNaNForTargetNode(SDValue Op, const APInt &DemandedElts, 327 const SelectionDAG &DAG, bool SNaN = false, 328 unsigned Depth = 0) const override; 329 330 bool isReassocProfitable(MachineRegisterInfo &MRI, Register N0, 331 Register N1) const override; 332 333 /// Helper function that adds Reg to the LiveIn list of the DAG's 334 /// MachineFunction. 335 /// 336 /// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise 337 /// a copy from the register. 338 SDValue CreateLiveInRegister(SelectionDAG &DAG, 339 const TargetRegisterClass *RC, 340 Register Reg, EVT VT, 341 const SDLoc &SL, 342 bool RawReg = false) const; 343 SDValue CreateLiveInRegister(SelectionDAG &DAG, 344 const TargetRegisterClass *RC, 345 Register Reg, EVT VT) const { 346 return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode())); 347 } 348 349 // Returns the raw live in register rather than a copy from it. 350 SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, 351 const TargetRegisterClass *RC, 352 Register Reg, EVT VT) const { 353 return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true); 354 } 355 356 /// Similar to CreateLiveInRegister, except value maybe loaded from a stack 357 /// slot rather than passed in a register. 358 SDValue loadStackInputValue(SelectionDAG &DAG, 359 EVT VT, 360 const SDLoc &SL, 361 int64_t Offset) const; 362 363 SDValue storeStackInputValue(SelectionDAG &DAG, 364 const SDLoc &SL, 365 SDValue Chain, 366 SDValue ArgVal, 367 int64_t Offset) const; 368 369 SDValue loadInputValue(SelectionDAG &DAG, 370 const TargetRegisterClass *RC, 371 EVT VT, const SDLoc &SL, 372 const ArgDescriptor &Arg) const; 373 374 enum ImplicitParameter { 375 FIRST_IMPLICIT, 376 PRIVATE_BASE, 377 SHARED_BASE, 378 QUEUE_PTR, 379 }; 380 381 /// Helper function that returns the byte offset of the given 382 /// type of implicit parameter. 383 uint32_t getImplicitParameterOffset(const MachineFunction &MF, 384 const ImplicitParameter Param) const; 385 uint32_t getImplicitParameterOffset(const uint64_t ExplicitKernArgSize, 386 const ImplicitParameter Param) const; 387 388 MVT getFenceOperandTy(const DataLayout &DL) const override { 389 return MVT::i32; 390 } 391 }; 392 393 namespace AMDGPUISD { 394 395 enum NodeType : unsigned { 396 // AMDIL ISD Opcodes 397 FIRST_NUMBER = ISD::BUILTIN_OP_END, 398 BRANCH_COND, 399 // End AMDIL ISD Opcodes 400 401 // Function call. 402 CALL, 403 TC_RETURN, 404 TC_RETURN_GFX, 405 TC_RETURN_CHAIN, 406 TC_RETURN_CHAIN_DVGPR, 407 TRAP, 408 409 // Masked control flow nodes. 410 IF, 411 ELSE, 412 LOOP, 413 414 // A uniform kernel return that terminates the wavefront. 415 ENDPGM, 416 417 // s_endpgm, but we may want to insert it in the middle of the block. 418 ENDPGM_TRAP, 419 420 // "s_trap 2" equivalent on hardware that does not support it. 421 SIMULATED_TRAP, 422 423 // Return to a shader part's epilog code. 424 RETURN_TO_EPILOG, 425 426 // Return with values from a non-entry function. 427 RET_GLUE, 428 429 // Convert a unswizzled wave uniform stack address to an address compatible 430 // with a vector offset for use in stack access. 431 WAVE_ADDRESS, 432 433 DWORDADDR, 434 FRACT, 435 436 /// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output 437 /// modifier behavior with dx10_enable. 438 CLAMP, 439 440 // This is SETCC with the full mask result which is used for a compare with a 441 // result bit per item in the wavefront. 442 SETCC, 443 444 DENORM_MODE, 445 446 // FP ops with input and output chain. 447 FMA_W_CHAIN, 448 FMUL_W_CHAIN, 449 450 // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. 451 // Denormals handled on some parts. 452 COS_HW, 453 SIN_HW, 454 FMAX_LEGACY, 455 FMIN_LEGACY, 456 457 FMAX3, 458 SMAX3, 459 UMAX3, 460 FMIN3, 461 SMIN3, 462 UMIN3, 463 FMED3, 464 SMED3, 465 UMED3, 466 FMAXIMUM3, 467 FMINIMUM3, 468 FDOT2, 469 URECIP, 470 DIV_SCALE, 471 DIV_FMAS, 472 DIV_FIXUP, 473 // For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is 474 // treated as an illegal operation. 475 FMAD_FTZ, 476 477 // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. 478 // For f64, max error 2^29 ULP, handles denormals. 479 RCP, 480 RSQ, 481 RCP_LEGACY, 482 RCP_IFLAG, 483 484 // log2, no denormal handling for f32. 485 LOG, 486 487 // exp2, no denormal handling for f32. 488 EXP, 489 490 FMUL_LEGACY, 491 RSQ_CLAMP, 492 FP_CLASS, 493 DOT4, 494 CARRY, 495 BORROW, 496 BFE_U32, // Extract range of bits with zero extension to 32-bits. 497 BFE_I32, // Extract range of bits with sign extension to 32-bits. 498 BFI, // (src0 & src1) | (~src0 & src2) 499 BFM, // Insert a range of bits into a 32-bit word. 500 FFBH_U32, // ctlz with -1 if input is zero. 501 FFBH_I32, 502 FFBL_B32, // cttz with -1 if input is zero. 503 MUL_U24, 504 MUL_I24, 505 MULHI_U24, 506 MULHI_I24, 507 MAD_U24, 508 MAD_I24, 509 MAD_U64_U32, 510 MAD_I64_I32, 511 PERM, 512 TEXTURE_FETCH, 513 R600_EXPORT, 514 CONST_ADDRESS, 515 REGISTER_LOAD, 516 REGISTER_STORE, 517 518 // These cvt_f32_ubyte* nodes need to remain consecutive and in order. 519 CVT_F32_UBYTE0, 520 CVT_F32_UBYTE1, 521 CVT_F32_UBYTE2, 522 CVT_F32_UBYTE3, 523 524 // Convert two float 32 numbers into a single register holding two packed f16 525 // with round to zero. 526 CVT_PKRTZ_F16_F32, 527 CVT_PKNORM_I16_F32, 528 CVT_PKNORM_U16_F32, 529 CVT_PK_I16_I32, 530 CVT_PK_U16_U32, 531 532 // Same as the standard node, except the high bits of the resulting integer 533 // are known 0. 534 FP_TO_FP16, 535 536 /// This node is for VLIW targets and it is used to represent a vector 537 /// that is stored in consecutive registers with the same channel. 538 /// For example: 539 /// |X |Y|Z|W| 540 /// T0|v.x| | | | 541 /// T1|v.y| | | | 542 /// T2|v.z| | | | 543 /// T3|v.w| | | | 544 BUILD_VERTICAL_VECTOR, 545 /// Pointer to the start of the shader's constant data. 546 CONST_DATA_PTR, 547 PC_ADD_REL_OFFSET, 548 LDS, 549 550 DUMMY_CHAIN, 551 552 FIRST_MEMORY_OPCODE, 553 LOAD_D16_HI = FIRST_MEMORY_OPCODE, 554 LOAD_D16_LO, 555 LOAD_D16_HI_I8, 556 LOAD_D16_HI_U8, 557 LOAD_D16_LO_I8, 558 LOAD_D16_LO_U8, 559 560 STORE_MSKOR, 561 TBUFFER_STORE_FORMAT, 562 TBUFFER_STORE_FORMAT_D16, 563 TBUFFER_LOAD_FORMAT, 564 TBUFFER_LOAD_FORMAT_D16, 565 DS_ORDERED_COUNT, 566 ATOMIC_CMP_SWAP, 567 BUFFER_LOAD, 568 BUFFER_LOAD_UBYTE, 569 BUFFER_LOAD_USHORT, 570 BUFFER_LOAD_BYTE, 571 BUFFER_LOAD_SHORT, 572 BUFFER_LOAD_TFE, 573 BUFFER_LOAD_UBYTE_TFE, 574 BUFFER_LOAD_USHORT_TFE, 575 BUFFER_LOAD_BYTE_TFE, 576 BUFFER_LOAD_SHORT_TFE, 577 BUFFER_LOAD_FORMAT, 578 BUFFER_LOAD_FORMAT_TFE, 579 BUFFER_LOAD_FORMAT_D16, 580 SBUFFER_LOAD, 581 SBUFFER_LOAD_BYTE, 582 SBUFFER_LOAD_UBYTE, 583 SBUFFER_LOAD_SHORT, 584 SBUFFER_LOAD_USHORT, 585 SBUFFER_PREFETCH_DATA, 586 BUFFER_STORE, 587 BUFFER_STORE_BYTE, 588 BUFFER_STORE_SHORT, 589 BUFFER_STORE_FORMAT, 590 BUFFER_STORE_FORMAT_D16, 591 BUFFER_ATOMIC_SWAP, 592 BUFFER_ATOMIC_ADD, 593 BUFFER_ATOMIC_SUB, 594 BUFFER_ATOMIC_SMIN, 595 BUFFER_ATOMIC_UMIN, 596 BUFFER_ATOMIC_SMAX, 597 BUFFER_ATOMIC_UMAX, 598 BUFFER_ATOMIC_AND, 599 BUFFER_ATOMIC_OR, 600 BUFFER_ATOMIC_XOR, 601 BUFFER_ATOMIC_INC, 602 BUFFER_ATOMIC_DEC, 603 BUFFER_ATOMIC_CMPSWAP, 604 BUFFER_ATOMIC_CSUB, 605 BUFFER_ATOMIC_FADD, 606 BUFFER_ATOMIC_FMIN, 607 BUFFER_ATOMIC_FMAX, 608 BUFFER_ATOMIC_COND_SUB_U32, 609 LAST_MEMORY_OPCODE = BUFFER_ATOMIC_COND_SUB_U32, 610 }; 611 612 } // End namespace AMDGPUISD 613 614 } // End namespace llvm 615 616 #endif 617