1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains instruction defs that are common to all hw codegen 10// targets. 11// 12//===----------------------------------------------------------------------===// 13 14class AddressSpacesImpl { 15 int Flat = 0; 16 int Global = 1; 17 int Region = 2; 18 int Local = 3; 19 int Constant = 4; 20 int Private = 5; 21} 22 23def AddrSpaces : AddressSpacesImpl; 24 25 26class AMDGPUInst <dag outs, dag ins, string asm = "", 27 list<dag> pattern = []> : Instruction { 28 field bit isRegisterLoad = 0; 29 field bit isRegisterStore = 0; 30 31 let Namespace = "AMDGPU"; 32 let OutOperandList = outs; 33 let InOperandList = ins; 34 let AsmString = asm; 35 let Pattern = pattern; 36 let Itinerary = NullALU; 37 38 // SoftFail is a field the disassembler can use to provide a way for 39 // instructions to not match without killing the whole decode process. It is 40 // mainly used for ARM, but Tablegen expects this field to exist or it fails 41 // to build the decode table. 42 field bits<64> SoftFail = 0; 43 44 let DecoderNamespace = Namespace; 45 46 let TSFlags{63} = isRegisterLoad; 47 let TSFlags{62} = isRegisterStore; 48} 49 50class AMDGPUShaderInst <dag outs, dag ins, string asm = "", 51 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> { 52 53 field bits<32> Inst = 0xffffffff; 54} 55 56//===---------------------------------------------------------------------===// 57// Return instruction 58//===---------------------------------------------------------------------===// 59 60class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> 61: Instruction { 62 63 let Namespace = "AMDGPU"; 64 dag OutOperandList = outs; 65 dag InOperandList = ins; 66 let Pattern = pattern; 67 let AsmString = !strconcat(asmstr, "\n"); 68 let isPseudo = 1; 69 let Itinerary = NullALU; 70 bit hasIEEEFlag = 0; 71 bit hasZeroOpFlag = 0; 72 let mayLoad = 0; 73 let mayStore = 0; 74 let hasSideEffects = 0; 75 let isCodeGenOnly = 1; 76} 77 78def TruePredicate : Predicate<"">; 79 80// FIXME: Tablegen should specially supports this 81def FalsePredicate : Predicate<"false">; 82 83// Add a predicate to the list if does not already exist to deduplicate it. 84class PredConcat<list<Predicate> lst, Predicate pred> { 85 list<Predicate> ret = 86 !listconcat([pred], !filter(item, lst, 87 !ne(!cast<string>(item), !cast<string>(pred)))); 88} 89 90class PredicateControl { 91 Predicate SubtargetPredicate = TruePredicate; 92 Predicate AssemblerPredicate = TruePredicate; 93 Predicate WaveSizePredicate = TruePredicate; 94 list<Predicate> OtherPredicates = []; 95 list<Predicate> Predicates = PredConcat< 96 PredConcat<PredConcat<OtherPredicates, 97 SubtargetPredicate>.ret, 98 AssemblerPredicate>.ret, 99 WaveSizePredicate>.ret; 100} 101 102class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>, 103 PredicateControl; 104 105let RecomputePerFunction = 1 in { 106def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 107def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">; 108def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 109def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 110def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">; 111def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 112def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; 113} 114 115def FMA : Predicate<"Subtarget->hasFMA()">; 116 117def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 118 119def u16ImmTarget : AsmOperandClass { 120 let Name = "U16Imm"; 121 let RenderMethod = "addImmOperands"; 122} 123 124def s16ImmTarget : AsmOperandClass { 125 let Name = "S16Imm"; 126 let RenderMethod = "addImmOperands"; 127} 128 129let OperandType = "OPERAND_IMMEDIATE" in { 130 131def u32imm : Operand<i32> { 132 let PrintMethod = "printU32ImmOperand"; 133} 134 135def u16imm : Operand<i16> { 136 let PrintMethod = "printU16ImmOperand"; 137 let ParserMatchClass = u16ImmTarget; 138} 139 140def s16imm : Operand<i16> { 141 let PrintMethod = "printU16ImmOperand"; 142 let ParserMatchClass = s16ImmTarget; 143} 144 145def u8imm : Operand<i8> { 146 let PrintMethod = "printU8ImmOperand"; 147} 148 149} // End OperandType = "OPERAND_IMMEDIATE" 150 151//===--------------------------------------------------------------------===// 152// Custom Operands 153//===--------------------------------------------------------------------===// 154def brtarget : Operand<OtherVT>; 155 156//===----------------------------------------------------------------------===// 157// Misc. PatFrags 158//===----------------------------------------------------------------------===// 159 160class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag< 161 (ops node:$src0), 162 (op $src0), 163 [{ return N->hasOneUse(); }]> { 164 165 let GISelPredicateCode = [{ 166 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 167 }]; 168} 169 170class HasOneUseBinOp<SDPatternOperator op> : PatFrag< 171 (ops node:$src0, node:$src1), 172 (op $src0, $src1), 173 [{ return N->hasOneUse(); }]> { 174 let GISelPredicateCode = [{ 175 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 176 }]; 177} 178 179class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< 180 (ops node:$src0, node:$src1, node:$src2), 181 (op $src0, $src1, $src2), 182 [{ return N->hasOneUse(); }]> { 183 let GISelPredicateCode = [{ 184 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 185 }]; 186} 187 188let Properties = [SDNPCommutative, SDNPAssociative] in { 189def smax_oneuse : HasOneUseBinOp<smax>; 190def smin_oneuse : HasOneUseBinOp<smin>; 191def umax_oneuse : HasOneUseBinOp<umax>; 192def umin_oneuse : HasOneUseBinOp<umin>; 193 194def fminnum_oneuse : HasOneUseBinOp<fminnum>; 195def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>; 196 197def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>; 198def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>; 199 200 201def and_oneuse : HasOneUseBinOp<and>; 202def or_oneuse : HasOneUseBinOp<or>; 203def xor_oneuse : HasOneUseBinOp<xor>; 204} // Properties = [SDNPCommutative, SDNPAssociative] 205 206def not_oneuse : HasOneUseUnaryOp<not>; 207 208def add_oneuse : HasOneUseBinOp<add>; 209def sub_oneuse : HasOneUseBinOp<sub>; 210 211def srl_oneuse : HasOneUseBinOp<srl>; 212def shl_oneuse : HasOneUseBinOp<shl>; 213 214def select_oneuse : HasOneUseTernaryOp<select>; 215 216def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>; 217def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>; 218 219def srl_16 : PatFrag< 220 (ops node:$src0), (srl_oneuse node:$src0, (i32 16)) 221>; 222 223 224def hi_i16_elt : PatFrag< 225 (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0)))) 226>; 227 228 229def hi_f16_elt : PatLeaf< 230 (vt), [{ 231 if (N->getOpcode() != ISD::BITCAST) 232 return false; 233 SDValue Tmp = N->getOperand(0); 234 235 if (Tmp.getOpcode() != ISD::SRL) 236 return false; 237 if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1)) 238 return RHS->getZExtValue() == 16; 239 return false; 240}]>; 241 242//===----------------------------------------------------------------------===// 243// PatLeafs for floating-point comparisons 244//===----------------------------------------------------------------------===// 245 246def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>; 247def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>; 248def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>; 249def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>; 250def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>; 251def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>; 252def COND_O : PatFrags<(ops), [(OtherVT SETO)]>; 253def COND_UO : PatFrags<(ops), [(OtherVT SETUO)]>; 254 255//===----------------------------------------------------------------------===// 256// PatLeafs for unsigned / unordered comparisons 257//===----------------------------------------------------------------------===// 258 259def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>; 260def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>; 261def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>; 262def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>; 263def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>; 264def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>; 265 266// XXX - For some reason R600 version is preferring to use unordered 267// for setne? 268def COND_UNE_NE : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>; 269 270//===----------------------------------------------------------------------===// 271// PatLeafs for signed comparisons 272//===----------------------------------------------------------------------===// 273 274def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>; 275def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>; 276def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>; 277def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>; 278 279//===----------------------------------------------------------------------===// 280// PatLeafs for integer equality 281//===----------------------------------------------------------------------===// 282 283def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>; 284def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>; 285 286// FIXME: Should not need code predicate 287//def COND_NULL : PatLeaf<(OtherVT null_frag)>; 288def COND_NULL : PatLeaf < 289 (cond), 290 [{(void)N; return false;}] 291>; 292 293//===----------------------------------------------------------------------===// 294// PatLeafs for Texture Constants 295//===----------------------------------------------------------------------===// 296 297def TEX_ARRAY : PatLeaf< 298 (imm), 299 [{uint32_t TType = (uint32_t)N->getZExtValue(); 300 return TType == 9 || TType == 10 || TType == 16; 301 }] 302>; 303 304def TEX_RECT : PatLeaf< 305 (imm), 306 [{uint32_t TType = (uint32_t)N->getZExtValue(); 307 return TType == 5; 308 }] 309>; 310 311def TEX_SHADOW : PatLeaf< 312 (imm), 313 [{uint32_t TType = (uint32_t)N->getZExtValue(); 314 return (TType >= 6 && TType <= 8) || TType == 13; 315 }] 316>; 317 318def TEX_SHADOW_ARRAY : PatLeaf< 319 (imm), 320 [{uint32_t TType = (uint32_t)N->getZExtValue(); 321 return TType == 11 || TType == 12 || TType == 17; 322 }] 323>; 324 325//===----------------------------------------------------------------------===// 326// Load/Store Pattern Fragments 327//===----------------------------------------------------------------------===// 328 329def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, 330 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 331>; 332 333class AddressSpaceList<list<int> AS> { 334 list<int> AddrSpaces = AS; 335} 336 337class Aligned<int Bytes> { 338 int MinAlignment = Bytes; 339} 340 341class StoreHi16<SDPatternOperator op> : PatFrag < 342 (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> { 343 let IsStore = 1; 344} 345 346def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant ]>; 347def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, AddrSpaces.Constant ]>; 348def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>; 349 350def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, 351 AddrSpaces.Global, 352 AddrSpaces.Constant ]>; 353def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>; 354 355def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 356def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 357 358def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 359def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 360 361def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 362def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 363 364 365 366foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 367let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 368 369def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { 370 let IsLoad = 1; 371 let IsNonExtLoad = 1; 372} 373 374def extloadi8_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> { 375 let IsLoad = 1; 376 let MemoryVT = i8; 377} 378 379def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> { 380 let IsLoad = 1; 381 let MemoryVT = i16; 382} 383 384def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { 385 let IsLoad = 1; 386 let MemoryVT = i8; 387} 388 389def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { 390 let IsLoad = 1; 391 let MemoryVT = i16; 392} 393 394def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { 395 let IsLoad = 1; 396 let MemoryVT = i8; 397} 398 399def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { 400 let IsLoad = 1; 401 let MemoryVT = i16; 402} 403 404def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> { 405 let IsAtomic = 1; 406 let MemoryVT = i32; 407} 408 409def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> { 410 let IsAtomic = 1; 411 let MemoryVT = i64; 412} 413} // End let AddressSpaces 414} // End foreach as 415 416 417foreach as = [ "global", "flat", "local", "private", "region" ] in { 418let AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in { 419def store_#as : PatFrag<(ops node:$val, node:$ptr), 420 (unindexedstore node:$val, node:$ptr)> { 421 let IsStore = 1; 422 let IsTruncStore = 0; 423} 424 425// truncstore fragments. 426def truncstore_#as : PatFrag<(ops node:$val, node:$ptr), 427 (unindexedstore node:$val, node:$ptr)> { 428 let IsStore = 1; 429 let IsTruncStore = 1; 430} 431 432// TODO: We don't really need the truncstore here. We can use 433// unindexedstore with MemoryVT directly, which will save an 434// unnecessary check that the memory size is less than the value type 435// in the generated matcher table. 436def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr), 437 (truncstore node:$val, node:$ptr)> { 438 let IsStore = 1; 439 let MemoryVT = i8; 440} 441 442def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), 443 (truncstore node:$val, node:$ptr)> { 444 let IsStore = 1; 445 let MemoryVT = i16; 446} 447 448def store_hi16_#as : StoreHi16 <truncstorei16>; 449def truncstorei8_hi16_#as : StoreHi16<truncstorei8>; 450def truncstorei16_hi16_#as : StoreHi16<truncstorei16>; 451 452defm atomic_store_#as : binary_atomic_op<atomic_store>; 453 454} // End let AddressSpaces 455} // End foreach as 456 457 458multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { 459 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 460 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 461 defm "_"#as : binary_atomic_op<atomic_op, IsInt>; 462 463 let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in { 464 defm "_"#as#"_noret" : binary_atomic_op<atomic_op, IsInt>; 465 } 466 467 let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in { 468 defm "_"#as#"_ret" : binary_atomic_op<atomic_op, IsInt>; 469 } 470 } 471 } 472} 473 474defm atomic_swap : ret_noret_binary_atomic_op<atomic_swap>; 475defm atomic_load_add : ret_noret_binary_atomic_op<atomic_load_add>; 476defm atomic_load_and : ret_noret_binary_atomic_op<atomic_load_and>; 477defm atomic_load_max : ret_noret_binary_atomic_op<atomic_load_max>; 478defm atomic_load_min : ret_noret_binary_atomic_op<atomic_load_min>; 479defm atomic_load_or : ret_noret_binary_atomic_op<atomic_load_or>; 480defm atomic_load_sub : ret_noret_binary_atomic_op<atomic_load_sub>; 481defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>; 482defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>; 483defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>; 484defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>; 485let MemoryVT = v2f16 in 486defm atomic_load_fadd_v2f16 : ret_noret_binary_atomic_op<atomic_load_fadd, 0>; 487defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>; 488 489def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 490 Aligned<8> { 491 let IsLoad = 1; 492 let IsNonExtLoad = 1; 493} 494 495def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 496 Aligned<16> { 497 let IsLoad = 1; 498 let IsNonExtLoad = 1; 499} 500 501def store_align8_local: PatFrag<(ops node:$val, node:$ptr), 502 (store_local node:$val, node:$ptr)>, Aligned<8> { 503 let IsStore = 1; 504 let IsTruncStore = 0; 505} 506 507def store_align16_local: PatFrag<(ops node:$val, node:$ptr), 508 (store_local node:$val, node:$ptr)>, Aligned<16> { 509 let IsStore = 1; 510 let IsTruncStore = 0; 511} 512 513let AddressSpaces = StoreAddress_local.AddrSpaces in { 514defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>; 515defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 516} 517 518let AddressSpaces = StoreAddress_region.AddrSpaces in { 519defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>; 520defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 521} 522 523//===----------------------------------------------------------------------===// 524// Misc Pattern Fragments 525//===----------------------------------------------------------------------===// 526 527class Constants { 528int TWO_PI = 0x40c90fdb; 529int PI = 0x40490fdb; 530int TWO_PI_INV = 0x3e22f983; 531int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9 532int FP16_ONE = 0x3C00; 533int FP16_NEG_ONE = 0xBC00; 534int FP32_ONE = 0x3f800000; 535int FP32_NEG_ONE = 0xbf800000; 536int FP64_ONE = 0x3ff0000000000000; 537int FP64_NEG_ONE = 0xbff0000000000000; 538} 539def CONST : Constants; 540 541def FP_ZERO : PatLeaf < 542 (fpimm), 543 [{return N->getValueAPF().isZero();}] 544>; 545 546def FP_ONE : PatLeaf < 547 (fpimm), 548 [{return N->isExactlyValue(1.0);}] 549>; 550 551def FP_HALF : PatLeaf < 552 (fpimm), 553 [{return N->isExactlyValue(0.5);}] 554>; 555 556/* Generic helper patterns for intrinsics */ 557/* -------------------------------------- */ 558 559class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 560 : AMDGPUPat < 561 (fpow f32:$src0, f32:$src1), 562 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 563>; 564 565/* Other helper patterns */ 566/* --------------------- */ 567 568/* Extract element pattern */ 569class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 570 SubRegIndex sub_reg> 571 : AMDGPUPat< 572 (sub_type (extractelt vec_type:$src, sub_idx)), 573 (EXTRACT_SUBREG $src, sub_reg) 574>; 575 576/* Insert element pattern */ 577class Insert_Element <ValueType elem_type, ValueType vec_type, 578 int sub_idx, SubRegIndex sub_reg> 579 : AMDGPUPat < 580 (insertelt vec_type:$vec, elem_type:$elem, sub_idx), 581 (INSERT_SUBREG $vec, $elem, sub_reg) 582>; 583 584// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 585// can handle COPY instructions. 586// bitconvert pattern 587class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat < 588 (dt (bitconvert (st rc:$src0))), 589 (dt rc:$src0) 590>; 591 592// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 593// can handle COPY instructions. 594class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat < 595 (vt (AMDGPUdwordaddr (vt rc:$addr))), 596 (vt rc:$addr) 597>; 598 599// fshr pattern 600class FSHRPattern <Instruction BIT_ALIGN> : AMDGPUPat < 601 (fshr i32:$src0, i32:$src1, i32:$src2), 602 (BIT_ALIGN $src0, $src1, $src2) 603>; 604 605// rotr pattern 606class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat < 607 (rotr i32:$src0, i32:$src1), 608 (BIT_ALIGN $src0, $src0, $src1) 609>; 610 611// Special conversion patterns 612 613def cvt_rpi_i32_f32 : PatFrag < 614 (ops node:$src), 615 (fp_to_sint (ffloor (fadd $src, FP_HALF))), 616 [{ (void) N; return TM.Options.NoNaNsFPMath; }] 617>; 618 619def cvt_flr_i32_f32 : PatFrag < 620 (ops node:$src), 621 (fp_to_sint (ffloor $src)), 622 [{ (void)N; return TM.Options.NoNaNsFPMath; }] 623>; 624 625let AddedComplexity = 2 in { 626class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 627 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), 628 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 629 (Inst $src0, $src1, $src2)) 630>; 631 632class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 633 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), 634 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 635 (Inst $src0, $src1, $src2)) 636>; 637} // AddedComplexity. 638 639class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat < 640 (fdiv FP_ONE, vt:$src), 641 (RcpInst $src) 642>; 643 644class RsqPat<Instruction RsqInst, ValueType vt> : AMDGPUPat < 645 (AMDGPUrcp (fsqrt vt:$src)), 646 (RsqInst $src) 647>; 648 649// Instructions which select to the same v_min_f* 650def fminnum_like : PatFrags<(ops node:$src0, node:$src1), 651 [(fminnum_ieee node:$src0, node:$src1), 652 (fminnum node:$src0, node:$src1)] 653>; 654 655// Instructions which select to the same v_max_f* 656def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1), 657 [(fmaxnum_ieee node:$src0, node:$src1), 658 (fmaxnum node:$src0, node:$src1)] 659>; 660 661def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 662 [(fminnum_ieee_oneuse node:$src0, node:$src1), 663 (fminnum_oneuse node:$src0, node:$src1)] 664>; 665 666def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 667 [(fmaxnum_ieee_oneuse node:$src0, node:$src1), 668 (fmaxnum_oneuse node:$src0, node:$src1)] 669>; 670 671def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2), 672 [(fmad node:$src0, node:$src1, node:$src2), 673 (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)] 674>; 675 676// FIXME: fsqrt should not select directly 677def any_amdgcn_sqrt : PatFrags<(ops node:$src0), 678 [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)] 679>; 680