1//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains DAG node definitions for the AMDGPU target. 10// 11//===----------------------------------------------------------------------===// 12 13//===----------------------------------------------------------------------===// 14// AMDGPU DAG Profiles 15//===----------------------------------------------------------------------===// 16 17def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ 18 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> 19]>; 20 21def AMDGPUFPClassOp : SDTypeProfile<1, 2, 22 [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>] 23>; 24 25def AMDGPUFPPackOp : SDTypeProfile<1, 2, 26 [SDTCisFP<1>, SDTCisSameAs<1, 2>] 27>; 28 29def AMDGPUIntPackOp : SDTypeProfile<1, 2, 30 [SDTCisInt<1>, SDTCisSameAs<1, 2>] 31>; 32 33def AMDGPUDivScaleOp : SDTypeProfile<2, 3, 34 [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>] 35>; 36 37// float, float, float, vcc 38def AMDGPUFmasOp : SDTypeProfile<1, 4, 39 [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>] 40>; 41 42def ImmOp : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 43def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 44 45def AMDGPUIfOp : SDTypeProfile<1, 2, 46 [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] 47>; 48 49def AMDGPUElseOp : SDTypeProfile<1, 2, 50 [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] 51>; 52 53def AMDGPULoopOp : SDTypeProfile<0, 2, 54 [SDTCisVT<0, i1>, SDTCisVT<1, OtherVT>] 55>; 56 57def AMDGPUIfBreakOp : SDTypeProfile<1, 2, 58 [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>] 59>; 60 61//===----------------------------------------------------------------------===// 62// AMDGPU DAG Nodes 63// 64 65def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>; 66def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>; 67def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>; 68 69def callseq_start : SDNode<"ISD::CALLSEQ_START", 70 SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, 71 [SDNPHasChain, SDNPOutGlue] 72>; 73 74def callseq_end : SDNode<"ISD::CALLSEQ_END", 75 SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, 76 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 77>; 78 79def AMDGPUcall : SDNode<"AMDGPUISD::CALL", 80 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 81 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 82 SDNPVariadic] 83>; 84 85def AMDGPUTCReturnTP : SDTypeProfile<0, 3, [ 86 SDTCisPtrTy<0> 87]>; 88 89def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN", AMDGPUTCReturnTP, 90[SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 91>; 92 93def AMDGPUtc_return_gfx: SDNode<"AMDGPUISD::TC_RETURN_GFX", AMDGPUTCReturnTP, 94[SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 95>; 96 97def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN", 98 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 99 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 100>; 101 102// With dynamic VGPRs. 103def AMDGPUtc_return_chain_dvgpr: SDNode<"AMDGPUISD::TC_RETURN_CHAIN_DVGPR", 104 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 105 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 106>; 107 108def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP", 109 SDTypeProfile<0, 1, [SDTCisVT<0, i16>]>, 110 [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPOptInGlue] 111>; 112 113def AMDGPUconstdata_ptr : SDNode< 114 "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>, 115 SDTCisVT<0, iPTR>]> 116>; 117 118// This argument to this node is a dword address. 119def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; 120 121def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; 122def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; 123// out = a - floor(a) 124def AMDGPUfract_impl : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; 125 126// out = 1.0 / a 127def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; 128 129// v_log_f32, which is log2 130def AMDGPUlog_impl : SDNode<"AMDGPUISD::LOG", SDTFPUnaryOp>; 131 132// v_exp_f32, which is exp2 133def AMDGPUexp_impl : SDNode<"AMDGPUISD::EXP", SDTFPUnaryOp>; 134 135// out = 1.0 / sqrt(a) 136def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; 137 138def AMDGPUrcp_legacy_impl : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; 139 140def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>; 141 142// out = 1.0 / sqrt(a) result clamped to +/- max_float. 143def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; 144 145def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; 146def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; 147def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; 148def AMDGPUpk_i16_i32_impl : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; 149def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; 150def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>; 151 152 153def AMDGPUfp_class_impl : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; 154 155// out = max(a, b) a and b are floats, where a nan comparison fails. 156// This is not commutative because this gives the second operand: 157// x < nan ? x : nan -> nan 158// nan < x ? nan : x -> x 159def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp, 160 [] 161>; 162 163def AMDGPUfmul_legacy_impl : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp, 164 [SDNPCommutative, SDNPAssociative] 165>; 166 167// out = min(a, b) a and b are floats, where a nan comparison fails. 168def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, 169 [] 170>; 171 172// FIXME: TableGen doesn't like commutative instructions with more 173// than 2 operands. 174// out = max(a, b, c) a, b and c are floats 175def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp, 176 [/*SDNPCommutative, SDNPAssociative*/] 177>; 178 179// out = max(a, b, c) a, b and c are floats. Operation is IEEE2019 compliant. 180def AMDGPUfmaximum3 : SDNode<"AMDGPUISD::FMAXIMUM3", SDTFPTernaryOp, 181 [/*SDNPCommutative, SDNPAssociative*/] 182>; 183 184// out = max(a, b, c) a, b, and c are signed ints 185def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp, 186 [/*SDNPCommutative, SDNPAssociative*/] 187>; 188 189// out = max(a, b, c) a, b and c are unsigned ints 190def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp, 191 [/*SDNPCommutative, SDNPAssociative*/] 192>; 193 194// out = min(a, b, c) a, b and c are floats 195def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp, 196 [/*SDNPCommutative, SDNPAssociative*/] 197>; 198 199// out = min(a, b, c) a, b and c are floats. Operation is IEEE2019 compliant. 200def AMDGPUfminimum3 : SDNode<"AMDGPUISD::FMINIMUM3", SDTFPTernaryOp, 201 [/*SDNPCommutative, SDNPAssociative*/] 202>; 203 204// out = min(a, b, c) a, b and c are signed ints 205def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp, 206 [/*SDNPCommutative, SDNPAssociative*/] 207>; 208 209// out = min(a, b) a and b are unsigned ints 210def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp, 211 [/*SDNPCommutative, SDNPAssociative*/] 212>; 213 214// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0 215def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>; 216 217// out = (src1 > src0) ? 1 : 0 218def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; 219 220def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc 221 SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> 222]>; 223 224def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>; 225 226def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [ 227 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 228 229def AMDGPUmul : SDNode<"AMDGPUISD::FMUL_W_CHAIN", SDTFPBinOp, [ 230 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 231 232def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", 233 SDTIntToFPOp, []>; 234def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1", 235 SDTIntToFPOp, []>; 236def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2", 237 SDTIntToFPOp, []>; 238def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3", 239 SDTIntToFPOp, []>; 240 241def AMDGPUcvt_pk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", 242 AMDGPUIntPackOp, []>; 243 244// urecip - This operation is a helper for integer division, it returns the 245// result of 1 / a as a fractional unsigned integer. 246// out = (2^32 / a) + e 247// e is rounding error 248def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; 249 250// Special case divide preop and flags. 251def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>; 252 253// Special case divide FMA with scale and flags (src0 = Quotient, 254// src1 = Denominator, src2 = Numerator). 255def AMDGPUdiv_fmas_impl : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp, 256 [SDNPOptInGlue]>; 257 258// Single or double precision division fixup. 259// Special case divide fixup and flags(src0 = Quotient, src1 = 260// Denominator, src2 = Numerator). 261def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>; 262 263def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>; 264 265def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD", 266 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, 267 [SDNPHasChain, SDNPMayLoad]>; 268 269def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", 270 SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, 271 [SDNPHasChain, SDNPMayStore]>; 272 273// MSKOR instructions are atomic memory instructions used mainly for storing 274// 8-bit and 16-bit values. The definition is: 275// 276// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src) 277// 278// src0: vec4(src, 0, 0, mask) 279// src1: dst - rat offset (aka pointer) in dwords 280def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", 281 SDTypeProfile<0, 2, []>, 282 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 283 284def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP", 285 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisVec<2>]>, 286 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, 287 SDNPMemOperand]>; 288 289def AMDGPUbfe_u32_impl : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>; 290def AMDGPUbfe_i32_impl : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; 291def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; 292def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; 293 294def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>; 295def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>; 296 297def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>; 298 299// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore 300// when performing the multiply. The result is a 32 or 64 bit value. 301def AMDGPUMul24Op : SDTypeProfile<1, 2, [ 302 SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2> 303]>; 304 305def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", AMDGPUMul24Op, 306 [SDNPCommutative, SDNPAssociative] 307>; 308def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", AMDGPUMul24Op, 309 [SDNPCommutative, SDNPAssociative] 310>; 311 312// mulhi24 yields the high-order 16 bits of the 48-bit result. Here's an example 313// that shows mulhi24 is not associative: 314// 315// Given a = 0x10002, b = c = 0xffffff: 316// mulhi24(mulhi24(a, b), c) = mulhi24(0x100, 0xffffff) = 0 317// Which is not equal to: 318// mulhi24(a, mulhi24(b, c)) = mulhi24(0x10002, 0xffff) = 1 319def AMDGPUmulhi_u24_impl : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp, 320 [SDNPCommutative] 321>; 322def AMDGPUmulhi_i24_impl : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp, 323 [SDNPCommutative] 324>; 325 326def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp, 327 [] 328>; 329def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp, 330 [] 331>; 332 333def AMDGPUsmed3 : SDNode<"AMDGPUISD::SMED3", AMDGPUDTIntTernaryOp, 334 [] 335>; 336 337def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, 338 [] 339>; 340 341def AMDGPUfmed3_impl : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; 342 343def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2", 344 SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, 345 SDTCisFP<0>, SDTCisVec<1>, 346 SDTCisInt<4>]>, 347 []>; 348 349def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>; 350 351// SI+ export 352def AMDGPUExportOp : SDTypeProfile<0, 8, [ 353 SDTCisInt<0>, // i8 tgt 354 SDTCisInt<1>, // i8 en 355 // i32 or f32 src0 356 SDTCisSameAs<3, 2>, // f32 src1 357 SDTCisSameAs<4, 2>, // f32 src2 358 SDTCisSameAs<5, 2>, // f32 src3 359 SDTCisInt<6>, // i1 compr 360 // skip done 361 SDTCisInt<1> // i1 vm 362 363]>; 364 365 366//===----------------------------------------------------------------------===// 367// Flow Control Profile Types 368//===----------------------------------------------------------------------===// 369// Branch instruction where second and third are basic blocks 370def SDTIL_BRCond : SDTypeProfile<0, 2, [ 371 SDTCisVT<0, OtherVT> 372 ]>; 373 374//===----------------------------------------------------------------------===// 375// Flow Control DAG Nodes 376//===----------------------------------------------------------------------===// 377def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; 378 379//===----------------------------------------------------------------------===// 380// Call/Return DAG Nodes 381//===----------------------------------------------------------------------===// 382def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, 383 [SDNPHasChain, SDNPOptInGlue]>; 384def AMDGPUendpgm_trap : SDNode<"AMDGPUISD::ENDPGM_TRAP", SDTNone, 385 [SDNPHasChain]>; 386def AMDGPUsimulated_trap : SDNode<"AMDGPUISD::SIMULATED_TRAP", SDTNone, 387 [SDNPHasChain]>; 388 389def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, 390 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 391 392def AMDGPUret_glue : SDNode<"AMDGPUISD::RET_GLUE", SDTNone, 393 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 394>; 395 396 397//===----------------------------------------------------------------------===// 398// Intrinsic/Custom node compatibility PatFrags 399//===----------------------------------------------------------------------===// 400 401def AMDGPUrcp : PatFrags<(ops node:$src), [(int_amdgcn_rcp node:$src), 402 (AMDGPUrcp_impl node:$src)]>; 403def AMDGPUrcp_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rcp_legacy node:$src), 404 (AMDGPUrcp_legacy_impl node:$src)]>; 405 406def AMDGPUrsq : PatFrags<(ops node:$src), [(int_amdgcn_rsq node:$src), 407 (AMDGPUrsq_impl node:$src)]>; 408 409def AMDGPUrsq_clamp : PatFrags<(ops node:$src), [(int_amdgcn_rsq_clamp node:$src), 410 (AMDGPUrsq_clamp_impl node:$src)]>; 411 412def AMDGPUsin : PatFrags<(ops node:$src), [(int_amdgcn_sin node:$src), 413 (AMDGPUsin_impl node:$src)]>; 414def AMDGPUcos : PatFrags<(ops node:$src), [(int_amdgcn_cos node:$src), 415 (AMDGPUcos_impl node:$src)]>; 416def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src), 417 (AMDGPUfract_impl node:$src)]>; 418def AMDGPUlog : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src), 419 (AMDGPUlog_impl node:$src)]>; 420def AMDGPUlogf16 : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src), 421 (flog2 node:$src)]>; 422 423def AMDGPUexp : PatFrags<(ops node:$src), [(int_amdgcn_exp2 node:$src), 424 (AMDGPUexp_impl node:$src)]>; 425def AMDGPUexpf16 : PatFrags<(ops node:$src), [(int_amdgcn_exp2 node:$src), 426 (fexp2 node:$src)]>; 427 428def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1), 429 [(int_amdgcn_class node:$src0, node:$src1), 430 (AMDGPUfp_class_impl node:$src0, node:$src1)]>; 431 432def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2), 433 [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2), 434 (AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>; 435 436def AMDGPUdiv_fixup : PatFrags<(ops node:$src0, node:$src1, node:$src2), 437 [(int_amdgcn_div_fixup node:$src0, node:$src1, node:$src2), 438 (AMDGPUdiv_fixup_impl node:$src0, node:$src1, node:$src2)]>; 439 440def AMDGPUffbh_i32 : PatFrags<(ops node:$src), 441 [(int_amdgcn_sffbh node:$src), 442 (AMDGPUffbh_i32_impl node:$src)]>; 443 444def AMDGPUffbh_u32 : PatFrags<(ops node:$src), 445 [(ctlz_zero_undef node:$src), 446 (AMDGPUffbh_u32_impl node:$src)]>; 447 448def AMDGPUffbl_b32 : PatFrags<(ops node:$src), 449 [(cttz_zero_undef node:$src), 450 (AMDGPUffbl_b32_impl node:$src)]>; 451 452def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1), 453 [(int_amdgcn_cvt_pkrtz node:$src0, node:$src1), 454 (AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>; 455 456def AMDGPUpknorm_i16_f32 : PatFrags<(ops node:$src0, node:$src1), 457 [(int_amdgcn_cvt_pknorm_i16 node:$src0, node:$src1), 458 (AMDGPUpknorm_i16_f32_impl node:$src0, node:$src1)]>; 459 460def AMDGPUpknorm_u16_f32 : PatFrags<(ops node:$src0, node:$src1), 461 [(int_amdgcn_cvt_pknorm_u16 node:$src0, node:$src1), 462 (AMDGPUpknorm_u16_f32_impl node:$src0, node:$src1)]>; 463 464def AMDGPUpk_i16_i32 : PatFrags<(ops node:$src0, node:$src1), 465 [(int_amdgcn_cvt_pk_i16 node:$src0, node:$src1), 466 (AMDGPUpk_i16_i32_impl node:$src0, node:$src1)]>; 467 468def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1), 469 [(int_amdgcn_cvt_pk_u16 node:$src0, node:$src1), 470 (AMDGPUpk_u16_u32_impl node:$src0, node:$src1)]>; 471 472def AMDGPUfmad_ftz : PatFrags<(ops node:$src0, node:$src1, node:$src2), 473 [(int_amdgcn_fmad_ftz node:$src0, node:$src1, node:$src2), 474 (AMDGPUfmad_ftz_impl node:$src0, node:$src1, node:$src2)]>; 475 476def AMDGPUmul_u24 : PatFrags<(ops node:$src0, node:$src1), 477 [(int_amdgcn_mul_u24 node:$src0, node:$src1), 478 (AMDGPUmul_u24_impl node:$src0, node:$src1)]>; 479 480def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1), 481 [(int_amdgcn_mul_i24 node:$src0, node:$src1), 482 (AMDGPUmul_i24_impl node:$src0, node:$src1)]>; 483 484def AMDGPUmulhi_u24 : PatFrags<(ops node:$src0, node:$src1), 485 [(int_amdgcn_mulhi_u24 node:$src0, node:$src1), 486 (AMDGPUmulhi_u24_impl node:$src0, node:$src1)]>; 487 488def AMDGPUmulhi_i24 : PatFrags<(ops node:$src0, node:$src1), 489 [(int_amdgcn_mulhi_i24 node:$src0, node:$src1), 490 (AMDGPUmulhi_i24_impl node:$src0, node:$src1)]>; 491 492def AMDGPUbfe_i32 : PatFrags<(ops node:$src0, node:$src1, node:$src2), 493 [(int_amdgcn_sbfe node:$src0, node:$src1, node:$src2), 494 (AMDGPUbfe_i32_impl node:$src0, node:$src1, node:$src2)]>; 495 496def AMDGPUbfe_u32 : PatFrags<(ops node:$src0, node:$src1, node:$src2), 497 [(int_amdgcn_ubfe node:$src0, node:$src1, node:$src2), 498 (AMDGPUbfe_u32_impl node:$src0, node:$src1, node:$src2)]>; 499 500def AMDGPUfmul_legacy : PatFrags<(ops node:$src0, node:$src1), 501 [(int_amdgcn_fmul_legacy node:$src0, node:$src1), 502 (AMDGPUfmul_legacy_impl node:$src0, node:$src1)]>; 503 504def AMDGPUfdot2 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$clamp), 505 [(int_amdgcn_fdot2 node:$src0, node:$src1, node:$src2, node:$clamp), 506 (AMDGPUfdot2_impl node:$src0, node:$src1, node:$src2, node:$clamp)]>; 507 508def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc), 509 [(int_amdgcn_div_fmas node:$src0, node:$src1, node:$src2, node:$vcc), 510 (AMDGPUdiv_fmas_impl node:$src0, node:$src1, node:$src2, node:$vcc)]>; 511 512def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2), 513 [(int_amdgcn_perm node:$src0, node:$src1, node:$src2), 514 (AMDGPUperm_impl node:$src0, node:$src1, node:$src2)]>; 515