1//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains DAG node definitions for the AMDGPU target. 10// 11//===----------------------------------------------------------------------===// 12 13//===----------------------------------------------------------------------===// 14// AMDGPU DAG Profiles 15//===----------------------------------------------------------------------===// 16 17def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ 18 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> 19]>; 20 21def AMDGPUFPClassOp : SDTypeProfile<1, 2, 22 [SDTCisInt<0>, SDTCisFP<1>, SDTCisInt<2>] 23>; 24 25def AMDGPUFPPackOp : SDTypeProfile<1, 2, 26 [SDTCisFP<1>, SDTCisSameAs<1, 2>] 27>; 28 29def AMDGPUIntPackOp : SDTypeProfile<1, 2, 30 [SDTCisInt<1>, SDTCisSameAs<1, 2>] 31>; 32 33def AMDGPUDivScaleOp : SDTypeProfile<2, 3, 34 [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>] 35>; 36 37// float, float, float, vcc 38def AMDGPUFmasOp : SDTypeProfile<1, 4, 39 [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>] 40>; 41 42def ImmOp : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 43def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>; 44 45def AMDGPUIfOp : SDTypeProfile<1, 2, 46 [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] 47>; 48 49def AMDGPUElseOp : SDTypeProfile<1, 2, 50 [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>] 51>; 52 53def AMDGPULoopOp : SDTypeProfile<0, 2, 54 [SDTCisVT<0, i1>, SDTCisVT<1, OtherVT>] 55>; 56 57def AMDGPUIfBreakOp : SDTypeProfile<1, 2, 58 [SDTCisVT<0, i1>, SDTCisVT<1, i1>, SDTCisVT<2, i1>] 59>; 60 61//===----------------------------------------------------------------------===// 62// AMDGPU DAG Nodes 63// 64 65def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>; 66def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>; 67def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>; 68 69def callseq_start : SDNode<"ISD::CALLSEQ_START", 70 SDCallSeqStart<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, 71 [SDNPHasChain, SDNPOutGlue] 72>; 73 74def callseq_end : SDNode<"ISD::CALLSEQ_END", 75 SDCallSeqEnd<[ SDTCisVT<0, i32>, SDTCisVT<1, i32> ]>, 76 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue] 77>; 78 79def AMDGPUcall : SDNode<"AMDGPUISD::CALL", 80 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 81 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, 82 SDNPVariadic] 83>; 84 85def AMDGPUTCReturnTP : SDTypeProfile<0, 3, [ 86 SDTCisPtrTy<0> 87]>; 88 89def AMDGPUtc_return: SDNode<"AMDGPUISD::TC_RETURN", AMDGPUTCReturnTP, 90[SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 91>; 92 93def AMDGPUtc_return_gfx: SDNode<"AMDGPUISD::TC_RETURN_GFX", AMDGPUTCReturnTP, 94[SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 95>; 96 97def AMDGPUtc_return_chain: SDNode<"AMDGPUISD::TC_RETURN_CHAIN", 98 SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, 99 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 100>; 101 102def AMDGPUtrap : SDNode<"AMDGPUISD::TRAP", 103 SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>, 104 [SDNPHasChain, SDNPVariadic, SDNPSideEffect, SDNPInGlue] 105>; 106 107def AMDGPUconstdata_ptr : SDNode< 108 "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>, 109 SDTCisVT<0, iPTR>]> 110>; 111 112// This argument to this node is a dword address. 113def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; 114 115def AMDGPUcos_impl : SDNode<"AMDGPUISD::COS_HW", SDTFPUnaryOp>; 116def AMDGPUsin_impl : SDNode<"AMDGPUISD::SIN_HW", SDTFPUnaryOp>; 117// out = a - floor(a) 118def AMDGPUfract_impl : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; 119 120// out = 1.0 / a 121def AMDGPUrcp_impl : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; 122 123// v_log_f32, which is log2 124def AMDGPUlog_impl : SDNode<"AMDGPUISD::LOG", SDTFPUnaryOp>; 125 126// v_exp_f32, which is exp2 127def AMDGPUexp_impl : SDNode<"AMDGPUISD::EXP", SDTFPUnaryOp>; 128 129// out = 1.0 / sqrt(a) 130def AMDGPUrsq_impl : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; 131 132def AMDGPUrcp_legacy_impl : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>; 133 134def AMDGPUrcp_iflag : SDNode<"AMDGPUISD::RCP_IFLAG", SDTFPUnaryOp>; 135 136// out = 1.0 / sqrt(a) result clamped to +/- max_float. 137def AMDGPUrsq_clamp_impl : SDNode<"AMDGPUISD::RSQ_CLAMP", SDTFPUnaryOp>; 138 139def AMDGPUpkrtz_f16_f32_impl : SDNode<"AMDGPUISD::CVT_PKRTZ_F16_F32", AMDGPUFPPackOp>; 140def AMDGPUpknorm_i16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_I16_F32", AMDGPUFPPackOp>; 141def AMDGPUpknorm_u16_f32_impl : SDNode<"AMDGPUISD::CVT_PKNORM_U16_F32", AMDGPUFPPackOp>; 142def AMDGPUpk_i16_i32_impl : SDNode<"AMDGPUISD::CVT_PK_I16_I32", AMDGPUIntPackOp>; 143def AMDGPUpk_u16_u32_impl : SDNode<"AMDGPUISD::CVT_PK_U16_U32", AMDGPUIntPackOp>; 144def AMDGPUfp_to_f16 : SDNode<"AMDGPUISD::FP_TO_FP16" , SDTFPToIntOp>; 145 146 147def AMDGPUfp_class_impl : SDNode<"AMDGPUISD::FP_CLASS", AMDGPUFPClassOp>; 148 149// out = max(a, b) a and b are floats, where a nan comparison fails. 150// This is not commutative because this gives the second operand: 151// x < nan ? x : nan -> nan 152// nan < x ? nan : x -> x 153def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp, 154 [] 155>; 156 157def AMDGPUfmul_legacy_impl : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp, 158 [SDNPCommutative, SDNPAssociative] 159>; 160 161// out = min(a, b) a and b are floats, where a nan comparison fails. 162def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp, 163 [] 164>; 165 166// FIXME: TableGen doesn't like commutative instructions with more 167// than 2 operands. 168// out = max(a, b, c) a, b and c are floats 169def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp, 170 [/*SDNPCommutative, SDNPAssociative*/] 171>; 172 173// out = max(a, b, c) a, b and c are floats. Operation is IEEE2019 compliant. 174def AMDGPUfmaximum3 : SDNode<"AMDGPUISD::FMAXIMUM3", SDTFPTernaryOp, 175 [/*SDNPCommutative, SDNPAssociative*/] 176>; 177 178// out = max(a, b, c) a, b, and c are signed ints 179def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp, 180 [/*SDNPCommutative, SDNPAssociative*/] 181>; 182 183// out = max(a, b, c) a, b and c are unsigned ints 184def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp, 185 [/*SDNPCommutative, SDNPAssociative*/] 186>; 187 188// out = min(a, b, c) a, b and c are floats 189def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp, 190 [/*SDNPCommutative, SDNPAssociative*/] 191>; 192 193// out = min(a, b, c) a, b and c are floats. Operation is IEEE2019 compliant. 194def AMDGPUfminimum3 : SDNode<"AMDGPUISD::FMINIMUM3", SDTFPTernaryOp, 195 [/*SDNPCommutative, SDNPAssociative*/] 196>; 197 198// out = min(a, b, c) a, b and c are signed ints 199def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp, 200 [/*SDNPCommutative, SDNPAssociative*/] 201>; 202 203// out = min(a, b) a and b are unsigned ints 204def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp, 205 [/*SDNPCommutative, SDNPAssociative*/] 206>; 207 208// out = (src0 + src1 > 0xFFFFFFFF) ? 1 : 0 209def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>; 210 211// out = (src1 > src0) ? 1 : 0 212def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>; 213 214def AMDGPUSetCCOp : SDTypeProfile<1, 3, [ // setcc 215 SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT> 216]>; 217 218def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>; 219 220def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [ 221 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 222 223def AMDGPUmul : SDNode<"AMDGPUISD::FMUL_W_CHAIN", SDTFPBinOp, [ 224 SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; 225 226def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", 227 SDTIntToFPOp, []>; 228def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1", 229 SDTIntToFPOp, []>; 230def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2", 231 SDTIntToFPOp, []>; 232def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3", 233 SDTIntToFPOp, []>; 234 235def AMDGPUcvt_pk_i16_i32 : SDNode<"AMDGPUISD::CVT_PK_I16_I32", 236 AMDGPUIntPackOp, []>; 237 238// urecip - This operation is a helper for integer division, it returns the 239// result of 1 / a as a fractional unsigned integer. 240// out = (2^32 / a) + e 241// e is rounding error 242def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; 243 244// Special case divide preop and flags. 245def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>; 246 247// Special case divide FMA with scale and flags (src0 = Quotient, 248// src1 = Denominator, src2 = Numerator). 249def AMDGPUdiv_fmas_impl : SDNode<"AMDGPUISD::DIV_FMAS", AMDGPUFmasOp, 250 [SDNPOptInGlue]>; 251 252// Single or double precision division fixup. 253// Special case divide fixup and flags(src0 = Quotient, src1 = 254// Denominator, src2 = Numerator). 255def AMDGPUdiv_fixup_impl : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>; 256 257def AMDGPUfmad_ftz_impl : SDNode<"AMDGPUISD::FMAD_FTZ", SDTFPTernaryOp>; 258 259def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD", 260 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, 261 [SDNPHasChain, SDNPMayLoad]>; 262 263def AMDGPUregister_store : SDNode<"AMDGPUISD::REGISTER_STORE", 264 SDTypeProfile<0, 3, [SDTCisPtrTy<1>, SDTCisInt<2>]>, 265 [SDNPHasChain, SDNPMayStore]>; 266 267// MSKOR instructions are atomic memory instructions used mainly for storing 268// 8-bit and 16-bit values. The definition is: 269// 270// MSKOR(dst, mask, src) MEM[dst] = ((MEM[dst] & ~mask) | src) 271// 272// src0: vec4(src, 0, 0, mask) 273// src1: dst - rat offset (aka pointer) in dwords 274def AMDGPUstore_mskor : SDNode<"AMDGPUISD::STORE_MSKOR", 275 SDTypeProfile<0, 2, []>, 276 [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; 277 278def AMDGPUatomic_cmp_swap : SDNode<"AMDGPUISD::ATOMIC_CMP_SWAP", 279 SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisVec<2>]>, 280 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, 281 SDNPMemOperand]>; 282 283def AMDGPUbfe_u32_impl : SDNode<"AMDGPUISD::BFE_U32", AMDGPUDTIntTernaryOp>; 284def AMDGPUbfe_i32_impl : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; 285def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; 286def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; 287 288def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>; 289def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>; 290 291def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>; 292 293// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore 294// when performing the multiply. The result is a 32 or 64 bit value. 295def AMDGPUMul24Op : SDTypeProfile<1, 2, [ 296 SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2> 297]>; 298 299def AMDGPUmul_u24_impl : SDNode<"AMDGPUISD::MUL_U24", AMDGPUMul24Op, 300 [SDNPCommutative, SDNPAssociative] 301>; 302def AMDGPUmul_i24_impl : SDNode<"AMDGPUISD::MUL_I24", AMDGPUMul24Op, 303 [SDNPCommutative, SDNPAssociative] 304>; 305 306// mulhi24 yields the high-order 16 bits of the 48-bit result. Here's an example 307// that shows mulhi24 is not associative: 308// 309// Given a = 0x10002, b = c = 0xffffff: 310// mulhi24(mulhi24(a, b), c) = mulhi24(0x100, 0xffffff) = 0 311// Which is not equal to: 312// mulhi24(a, mulhi24(b, c)) = mulhi24(0x10002, 0xffff) = 1 313def AMDGPUmulhi_u24_impl : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp, 314 [SDNPCommutative] 315>; 316def AMDGPUmulhi_i24_impl : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp, 317 [SDNPCommutative] 318>; 319 320def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp, 321 [] 322>; 323def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp, 324 [] 325>; 326 327def AMDGPUsmed3 : SDNode<"AMDGPUISD::SMED3", AMDGPUDTIntTernaryOp, 328 [] 329>; 330 331def AMDGPUumed3 : SDNode<"AMDGPUISD::UMED3", AMDGPUDTIntTernaryOp, 332 [] 333>; 334 335def AMDGPUfmed3_impl : SDNode<"AMDGPUISD::FMED3", SDTFPTernaryOp, []>; 336 337def AMDGPUfdot2_impl : SDNode<"AMDGPUISD::FDOT2", 338 SDTypeProfile<1, 4, [SDTCisSameAs<0, 3>, SDTCisSameAs<1, 2>, 339 SDTCisFP<0>, SDTCisVec<1>, 340 SDTCisInt<4>]>, 341 []>; 342 343def AMDGPUperm_impl : SDNode<"AMDGPUISD::PERM", AMDGPUDTIntTernaryOp, []>; 344 345// SI+ export 346def AMDGPUExportOp : SDTypeProfile<0, 8, [ 347 SDTCisInt<0>, // i8 tgt 348 SDTCisInt<1>, // i8 en 349 // i32 or f32 src0 350 SDTCisSameAs<3, 2>, // f32 src1 351 SDTCisSameAs<4, 2>, // f32 src2 352 SDTCisSameAs<5, 2>, // f32 src3 353 SDTCisInt<6>, // i1 compr 354 // skip done 355 SDTCisInt<1> // i1 vm 356 357]>; 358 359 360//===----------------------------------------------------------------------===// 361// Flow Control Profile Types 362//===----------------------------------------------------------------------===// 363// Branch instruction where second and third are basic blocks 364def SDTIL_BRCond : SDTypeProfile<0, 2, [ 365 SDTCisVT<0, OtherVT> 366 ]>; 367 368//===----------------------------------------------------------------------===// 369// Flow Control DAG Nodes 370//===----------------------------------------------------------------------===// 371def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; 372 373//===----------------------------------------------------------------------===// 374// Call/Return DAG Nodes 375//===----------------------------------------------------------------------===// 376def AMDGPUendpgm : SDNode<"AMDGPUISD::ENDPGM", SDTNone, 377 [SDNPHasChain, SDNPOptInGlue]>; 378def AMDGPUendpgm_trap : SDNode<"AMDGPUISD::ENDPGM_TRAP", SDTNone, 379 [SDNPHasChain]>; 380 381def AMDGPUreturn_to_epilog : SDNode<"AMDGPUISD::RETURN_TO_EPILOG", SDTNone, 382 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; 383 384def AMDGPUret_glue : SDNode<"AMDGPUISD::RET_GLUE", SDTNone, 385 [SDNPHasChain, SDNPOptInGlue, SDNPVariadic] 386>; 387 388 389//===----------------------------------------------------------------------===// 390// Intrinsic/Custom node compatibility PatFrags 391//===----------------------------------------------------------------------===// 392 393def AMDGPUrcp : PatFrags<(ops node:$src), [(int_amdgcn_rcp node:$src), 394 (AMDGPUrcp_impl node:$src)]>; 395def AMDGPUrcp_legacy : PatFrags<(ops node:$src), [(int_amdgcn_rcp_legacy node:$src), 396 (AMDGPUrcp_legacy_impl node:$src)]>; 397 398def AMDGPUrsq : PatFrags<(ops node:$src), [(int_amdgcn_rsq node:$src), 399 (AMDGPUrsq_impl node:$src)]>; 400 401def AMDGPUrsq_clamp : PatFrags<(ops node:$src), [(int_amdgcn_rsq_clamp node:$src), 402 (AMDGPUrsq_clamp_impl node:$src)]>; 403 404def AMDGPUsin : PatFrags<(ops node:$src), [(int_amdgcn_sin node:$src), 405 (AMDGPUsin_impl node:$src)]>; 406def AMDGPUcos : PatFrags<(ops node:$src), [(int_amdgcn_cos node:$src), 407 (AMDGPUcos_impl node:$src)]>; 408def AMDGPUfract : PatFrags<(ops node:$src), [(int_amdgcn_fract node:$src), 409 (AMDGPUfract_impl node:$src)]>; 410def AMDGPUlog : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src), 411 (AMDGPUlog_impl node:$src)]>; 412def AMDGPUlogf16 : PatFrags<(ops node:$src), [(int_amdgcn_log node:$src), 413 (flog2 node:$src)]>; 414 415def AMDGPUexp : PatFrags<(ops node:$src), [(int_amdgcn_exp2 node:$src), 416 (AMDGPUexp_impl node:$src)]>; 417def AMDGPUexpf16 : PatFrags<(ops node:$src), [(int_amdgcn_exp2 node:$src), 418 (fexp2 node:$src)]>; 419 420def AMDGPUfp_class : PatFrags<(ops node:$src0, node:$src1), 421 [(int_amdgcn_class node:$src0, node:$src1), 422 (AMDGPUfp_class_impl node:$src0, node:$src1)]>; 423 424def AMDGPUfmed3 : PatFrags<(ops node:$src0, node:$src1, node:$src2), 425 [(int_amdgcn_fmed3 node:$src0, node:$src1, node:$src2), 426 (AMDGPUfmed3_impl node:$src0, node:$src1, node:$src2)]>; 427 428def AMDGPUdiv_fixup : PatFrags<(ops node:$src0, node:$src1, node:$src2), 429 [(int_amdgcn_div_fixup node:$src0, node:$src1, node:$src2), 430 (AMDGPUdiv_fixup_impl node:$src0, node:$src1, node:$src2)]>; 431 432def AMDGPUffbh_i32 : PatFrags<(ops node:$src), 433 [(int_amdgcn_sffbh node:$src), 434 (AMDGPUffbh_i32_impl node:$src)]>; 435 436def AMDGPUffbh_u32 : PatFrags<(ops node:$src), 437 [(ctlz_zero_undef node:$src), 438 (AMDGPUffbh_u32_impl node:$src)]>; 439 440def AMDGPUffbl_b32 : PatFrags<(ops node:$src), 441 [(cttz_zero_undef node:$src), 442 (AMDGPUffbl_b32_impl node:$src)]>; 443 444def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1), 445 [(int_amdgcn_cvt_pkrtz node:$src0, node:$src1), 446 (AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>; 447 448def AMDGPUpknorm_i16_f32 : PatFrags<(ops node:$src0, node:$src1), 449 [(int_amdgcn_cvt_pknorm_i16 node:$src0, node:$src1), 450 (AMDGPUpknorm_i16_f32_impl node:$src0, node:$src1)]>; 451 452def AMDGPUpknorm_u16_f32 : PatFrags<(ops node:$src0, node:$src1), 453 [(int_amdgcn_cvt_pknorm_u16 node:$src0, node:$src1), 454 (AMDGPUpknorm_u16_f32_impl node:$src0, node:$src1)]>; 455 456def AMDGPUpk_i16_i32 : PatFrags<(ops node:$src0, node:$src1), 457 [(int_amdgcn_cvt_pk_i16 node:$src0, node:$src1), 458 (AMDGPUpk_i16_i32_impl node:$src0, node:$src1)]>; 459 460def AMDGPUpk_u16_u32 : PatFrags<(ops node:$src0, node:$src1), 461 [(int_amdgcn_cvt_pk_u16 node:$src0, node:$src1), 462 (AMDGPUpk_u16_u32_impl node:$src0, node:$src1)]>; 463 464def AMDGPUfmad_ftz : PatFrags<(ops node:$src0, node:$src1, node:$src2), 465 [(int_amdgcn_fmad_ftz node:$src0, node:$src1, node:$src2), 466 (AMDGPUfmad_ftz_impl node:$src0, node:$src1, node:$src2)]>; 467 468def AMDGPUmul_u24 : PatFrags<(ops node:$src0, node:$src1), 469 [(int_amdgcn_mul_u24 node:$src0, node:$src1), 470 (AMDGPUmul_u24_impl node:$src0, node:$src1)]>; 471 472def AMDGPUmul_i24 : PatFrags<(ops node:$src0, node:$src1), 473 [(int_amdgcn_mul_i24 node:$src0, node:$src1), 474 (AMDGPUmul_i24_impl node:$src0, node:$src1)]>; 475 476def AMDGPUmulhi_u24 : PatFrags<(ops node:$src0, node:$src1), 477 [(int_amdgcn_mulhi_u24 node:$src0, node:$src1), 478 (AMDGPUmulhi_u24_impl node:$src0, node:$src1)]>; 479 480def AMDGPUmulhi_i24 : PatFrags<(ops node:$src0, node:$src1), 481 [(int_amdgcn_mulhi_i24 node:$src0, node:$src1), 482 (AMDGPUmulhi_i24_impl node:$src0, node:$src1)]>; 483 484def AMDGPUbfe_i32 : PatFrags<(ops node:$src0, node:$src1, node:$src2), 485 [(int_amdgcn_sbfe node:$src0, node:$src1, node:$src2), 486 (AMDGPUbfe_i32_impl node:$src0, node:$src1, node:$src2)]>; 487 488def AMDGPUbfe_u32 : PatFrags<(ops node:$src0, node:$src1, node:$src2), 489 [(int_amdgcn_ubfe node:$src0, node:$src1, node:$src2), 490 (AMDGPUbfe_u32_impl node:$src0, node:$src1, node:$src2)]>; 491 492def AMDGPUfmul_legacy : PatFrags<(ops node:$src0, node:$src1), 493 [(int_amdgcn_fmul_legacy node:$src0, node:$src1), 494 (AMDGPUfmul_legacy_impl node:$src0, node:$src1)]>; 495 496def AMDGPUfdot2 : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$clamp), 497 [(int_amdgcn_fdot2 node:$src0, node:$src1, node:$src2, node:$clamp), 498 (AMDGPUfdot2_impl node:$src0, node:$src1, node:$src2, node:$clamp)]>; 499 500def AMDGPUdiv_fmas : PatFrags<(ops node:$src0, node:$src1, node:$src2, node:$vcc), 501 [(int_amdgcn_div_fmas node:$src0, node:$src1, node:$src2, node:$vcc), 502 (AMDGPUdiv_fmas_impl node:$src0, node:$src1, node:$src2, node:$vcc)]>; 503 504def AMDGPUperm : PatFrags<(ops node:$src0, node:$src1, node:$src2), 505 [(int_amdgcn_perm node:$src0, node:$src1, node:$src2), 506 (AMDGPUperm_impl node:$src0, node:$src1, node:$src2)]>; 507