1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains instruction defs that are common to all hw codegen 10// targets. 11// 12//===----------------------------------------------------------------------===// 13 14class AddressSpacesImpl { 15 int Flat = 0; 16 int Global = 1; 17 int Region = 2; 18 int Local = 3; 19 int Constant = 4; 20 int Private = 5; 21 int Constant32Bit = 6; 22} 23 24def AddrSpaces : AddressSpacesImpl; 25 26 27class AMDGPUInst <dag outs, dag ins, string asm = "", 28 list<dag> pattern = []> : Instruction { 29 field bit isRegisterLoad = 0; 30 field bit isRegisterStore = 0; 31 32 let Namespace = "AMDGPU"; 33 let OutOperandList = outs; 34 let InOperandList = ins; 35 let AsmString = asm; 36 let Pattern = pattern; 37 let Itinerary = NullALU; 38 39 // SoftFail is a field the disassembler can use to provide a way for 40 // instructions to not match without killing the whole decode process. It is 41 // mainly used for ARM, but Tablegen expects this field to exist or it fails 42 // to build the decode table. 43 field bits<96> SoftFail = 0; 44 45 let DecoderNamespace = Namespace; 46 47 let TSFlags{63} = isRegisterLoad; 48 let TSFlags{62} = isRegisterStore; 49} 50 51class AMDGPUShaderInst <dag outs, dag ins, string asm = "", 52 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> { 53 54 field bits<32> Inst = 0xffffffff; 55} 56 57//===---------------------------------------------------------------------===// 58// Return instruction 59//===---------------------------------------------------------------------===// 60 61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> 62: Instruction { 63 64 let Namespace = "AMDGPU"; 65 dag OutOperandList = outs; 66 dag InOperandList = ins; 67 let Pattern = pattern; 68 let AsmString = !strconcat(asmstr, "\n"); 69 let isPseudo = 1; 70 let Itinerary = NullALU; 71 bit hasIEEEFlag = 0; 72 bit hasZeroOpFlag = 0; 73 let mayLoad = 0; 74 let mayStore = 0; 75 let hasSideEffects = 0; 76 let isCodeGenOnly = 1; 77} 78 79// Get the union of two Register lists 80class RegListUnion<list<Register> lstA, list<Register> lstB> { 81 list<Register> ret = !listconcat(lstA, !listremove(lstB, lstA)); 82} 83 84class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>, 85 PredicateControl, GISelFlags; 86 87let GIIgnoreCopies = 1 in 88class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>; 89 90let RecomputePerFunction = 1 in { 91def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">; 92def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">; 93def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">; 94def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">; 95def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">; 96def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">; 97def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; 98} 99 100def FMA : Predicate<"Subtarget->hasFMA()">; 101 102def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 103 104def i1imm_0 : OperandWithDefaultOps<i1, (ops (i1 0))>; 105 106class CustomOperandClass<string name, bit optional, string predicateMethod, 107 string parserMethod, string defaultMethod> 108 : AsmOperandClass { 109 let Name = name; 110 let PredicateMethod = predicateMethod; 111 let ParserMethod = parserMethod; 112 let RenderMethod = "addImmOperands"; 113 let IsOptional = optional; 114 let DefaultMethod = defaultMethod; 115} 116 117class CustomOperandProps<bit optional = 0, string name = NAME> { 118 string ImmTy = "ImmTy"#name; 119 string PredicateMethod = "is"#name; 120 string ParserMethod = "parse"#name; 121 string DefaultValue = "0"; 122 string DefaultMethod = "[this]() { return "# 123 "AMDGPUOperand::CreateImm(this, "#DefaultValue#", SMLoc(), "# 124 "AMDGPUOperand::"#ImmTy#"); }"; 125 string PrintMethod = "print"#name; 126 AsmOperandClass ParserMatchClass = 127 CustomOperandClass<name, optional, PredicateMethod, ParserMethod, 128 DefaultMethod>; 129 string OperandType = "OPERAND_IMMEDIATE"; 130} 131 132class CustomOperand<ValueType type, bit optional = 0, string name = NAME> 133 : Operand<type>, CustomOperandProps<optional, name>; 134 135class ImmOperand<ValueType type, string name = NAME, bit optional = 0, 136 string printer = "print"#name> 137 : CustomOperand<type, optional, name> { 138 let ImmTy = "ImmTyNone"; 139 let ParserMethod = ""; 140 let PrintMethod = printer; 141} 142 143class S16ImmOperand : ImmOperand<i16, "S16Imm", 0, "printU16ImmOperand">; 144 145def s16imm : S16ImmOperand; 146def u16imm : ImmOperand<i16, "U16Imm", 0, "printU16ImmOperand">; 147 148class ValuePredicatedOperand<CustomOperand op, string valuePredicate, 149 bit optional = 0> 150 : CustomOperand<op.Type, optional> { 151 let ImmTy = op.ImmTy; 152 defvar OpPredicate = op.ParserMatchClass.PredicateMethod; 153 let PredicateMethod = 154 "getPredicate([](const AMDGPUOperand &Op) -> bool { "# 155 "return Op."#OpPredicate#"() && "#valuePredicate#"; })"; 156 let ParserMethod = op.ParserMatchClass.ParserMethod; 157 let DefaultValue = op.DefaultValue; 158 let DefaultMethod = op.DefaultMethod; 159 let PrintMethod = op.PrintMethod; 160} 161 162//===--------------------------------------------------------------------===// 163// Custom Operands 164//===--------------------------------------------------------------------===// 165def brtarget : Operand<OtherVT>; 166 167//===----------------------------------------------------------------------===// 168// Misc. PatFrags 169//===----------------------------------------------------------------------===// 170 171class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag< 172 (ops node:$src0), 173 (op $src0)> { 174 let HasOneUse = 1; 175} 176 177class HasOneUseBinOp<SDPatternOperator op> : PatFrag< 178 (ops node:$src0, node:$src1), 179 (op $src0, $src1)> { 180 let HasOneUse = 1; 181} 182 183class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< 184 (ops node:$src0, node:$src1, node:$src2), 185 (op $src0, $src1, $src2)> { 186 let HasOneUse = 1; 187} 188 189class is_canonicalized_1<SDPatternOperator op> : PatFrag< 190 (ops node:$src0), 191 (op $src0), 192 [{ 193 const SITargetLowering &Lowering = 194 *static_cast<const SITargetLowering *>(getTargetLowering()); 195 196 return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)); 197 }]> { 198 199 let GISelPredicateCode = [{ 200 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 201 MF.getSubtarget().getTargetLowering()); 202 203 return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF); 204 }]; 205} 206 207class is_canonicalized_2<SDPatternOperator op> : PatFrag< 208 (ops node:$src0, node:$src1), 209 (op $src0, $src1), 210 [{ 211 const SITargetLowering &Lowering = 212 *static_cast<const SITargetLowering *>(getTargetLowering()); 213 214 return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) && 215 Lowering.isCanonicalized(*CurDAG, N->getOperand(1)); 216 }]> { 217 218 // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class 219 let GISelPredicateCode = [{ 220 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 221 MF.getSubtarget().getTargetLowering()); 222 223 return TLI->isCanonicalized(MI.getOperand(1).getReg(), MF) && 224 TLI->isCanonicalized(MI.getOperand(2).getReg(), MF); 225 }]; 226} 227 228class FoldTernaryOpPat<SDPatternOperator op1, SDPatternOperator op2> : PatFrag< 229 (ops node:$src0, node:$src1, node:$src2), 230 (op2 (op1 node:$src0, node:$src1), node:$src2) 231>; 232 233def imad : FoldTernaryOpPat<mul, add>; 234 235let Properties = [SDNPCommutative, SDNPAssociative] in { 236def smax_oneuse : HasOneUseBinOp<smax>; 237def smin_oneuse : HasOneUseBinOp<smin>; 238def umax_oneuse : HasOneUseBinOp<umax>; 239def umin_oneuse : HasOneUseBinOp<umin>; 240 241def fminnum_oneuse : HasOneUseBinOp<fminnum>; 242def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>; 243def fminimum_oneuse : HasOneUseBinOp<fminimum>; 244def fmaximum_oneuse : HasOneUseBinOp<fmaximum>; 245 246def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>; 247def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>; 248 249 250def and_oneuse : HasOneUseBinOp<and>; 251def or_oneuse : HasOneUseBinOp<or>; 252def xor_oneuse : HasOneUseBinOp<xor>; 253} // Properties = [SDNPCommutative, SDNPAssociative] 254 255def not_oneuse : HasOneUseUnaryOp<not>; 256 257def add_oneuse : HasOneUseBinOp<add>; 258def sub_oneuse : HasOneUseBinOp<sub>; 259 260def srl_oneuse : HasOneUseBinOp<srl>; 261def shl_oneuse : HasOneUseBinOp<shl>; 262 263def select_oneuse : HasOneUseTernaryOp<select>; 264 265def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>; 266def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>; 267 268//===----------------------------------------------------------------------===// 269// PatFrags for shifts 270//===----------------------------------------------------------------------===// 271 272// Constrained shift PatFrags. 273 274def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm), 275 [{ return isUnneededShiftMask(N, 4); }]> { 276 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }]; 277 } 278 279def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm), 280 [{ return isUnneededShiftMask(N, 5); }]> { 281 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }]; 282 } 283 284def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm), 285 [{ return isUnneededShiftMask(N, 6); }]> { 286 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }]; 287 } 288 289foreach width = [16, 32, 64] in { 290defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width); 291 292def cshl_#width : PatFrags<(ops node:$src0, node:$src1), 293 [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>; 294defvar cshl = !cast<SDPatternOperator>("cshl_"#width); 295def cshl_#width#_oneuse : HasOneUseBinOp<cshl>; 296def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1), 297 (cshl $src1, $src0)>; 298 299def csrl_#width : PatFrags<(ops node:$src0, node:$src1), 300 [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>; 301defvar csrl = !cast<SDPatternOperator>("csrl_"#width); 302def csrl_#width#_oneuse : HasOneUseBinOp<csrl>; 303def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1), 304 (csrl $src1, $src0)>; 305 306def csra_#width : PatFrags<(ops node:$src0, node:$src1), 307 [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>; 308defvar csra = !cast<SDPatternOperator>("csra_"#width); 309def csra_#width#_oneuse : HasOneUseBinOp<csra>; 310def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1), 311 (csra $src1, $src0)>; 312} // end foreach width 313 314def srl_16 : PatFrag< 315 (ops node:$src0), (srl_oneuse node:$src0, (i32 16)) 316>; 317 318 319def hi_i16_elt : PatFrag< 320 (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0)))) 321>; 322 323 324def hi_f16_elt : PatLeaf< 325 (vt), [{ 326 if (N->getOpcode() != ISD::BITCAST) 327 return false; 328 SDValue Tmp = N->getOperand(0); 329 330 if (Tmp.getOpcode() != ISD::SRL) 331 return false; 332 if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1)) 333 return RHS->getZExtValue() == 16; 334 return false; 335}]>; 336 337//===----------------------------------------------------------------------===// 338// PatLeafs for zero immediate 339//===----------------------------------------------------------------------===// 340 341def immzero : PatLeaf<(imm), [{ return N->isZero(); }]>; 342def fpimmzero : PatLeaf<(fpimm), [{ return N->isZero(); }]>; 343 344//===----------------------------------------------------------------------===// 345// PatLeafs for floating-point comparisons 346//===----------------------------------------------------------------------===// 347 348def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>; 349def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>; 350def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>; 351def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>; 352def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>; 353def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>; 354def COND_O : PatFrags<(ops), [(OtherVT SETO)]>; 355def COND_UO : PatFrags<(ops), [(OtherVT SETUO)]>; 356 357//===----------------------------------------------------------------------===// 358// PatLeafs for unsigned / unordered comparisons 359//===----------------------------------------------------------------------===// 360 361def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>; 362def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>; 363def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>; 364def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>; 365def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>; 366def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>; 367 368// XXX - For some reason R600 version is preferring to use unordered 369// for setne? 370def COND_UNE_NE : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>; 371 372//===----------------------------------------------------------------------===// 373// PatLeafs for signed comparisons 374//===----------------------------------------------------------------------===// 375 376def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>; 377def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>; 378def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>; 379def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>; 380 381//===----------------------------------------------------------------------===// 382// PatLeafs for integer equality 383//===----------------------------------------------------------------------===// 384 385def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>; 386def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>; 387 388// FIXME: Should not need code predicate 389//def COND_NULL : PatLeaf<(OtherVT null_frag)>; 390def COND_NULL : PatLeaf < 391 (cond), 392 [{(void)N; return false;}] 393>; 394 395//===----------------------------------------------------------------------===// 396// PatLeafs for Texture Constants 397//===----------------------------------------------------------------------===// 398 399def TEX_ARRAY : PatLeaf< 400 (imm), 401 [{uint32_t TType = (uint32_t)N->getZExtValue(); 402 return TType == 9 || TType == 10 || TType == 16; 403 }] 404>; 405 406def TEX_RECT : PatLeaf< 407 (imm), 408 [{uint32_t TType = (uint32_t)N->getZExtValue(); 409 return TType == 5; 410 }] 411>; 412 413def TEX_SHADOW : PatLeaf< 414 (imm), 415 [{uint32_t TType = (uint32_t)N->getZExtValue(); 416 return (TType >= 6 && TType <= 8) || TType == 13; 417 }] 418>; 419 420def TEX_SHADOW_ARRAY : PatLeaf< 421 (imm), 422 [{uint32_t TType = (uint32_t)N->getZExtValue(); 423 return TType == 11 || TType == 12 || TType == 17; 424 }] 425>; 426 427//===----------------------------------------------------------------------===// 428// Load/Store Pattern Fragments 429//===----------------------------------------------------------------------===// 430 431def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, 432 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 433>; 434 435class AddressSpaceList<list<int> AS> { 436 list<int> AddrSpaces = AS; 437} 438 439class Aligned<int Bytes> { 440 int MinAlignment = Bytes; 441} 442 443class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag < 444 (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> { 445 let IsStore = 1; 446 let MemoryVT = vt; 447} 448 449def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant, 450 AddrSpaces.Constant32Bit ]>; 451def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, 452 AddrSpaces.Constant, 453 AddrSpaces.Constant32Bit ]>; 454def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>; 455 456def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, 457 AddrSpaces.Global, 458 AddrSpaces.Constant, 459 AddrSpaces.Constant32Bit ]>; 460def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>; 461 462def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 463def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 464 465def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 466def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 467 468def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 469def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 470 471 472 473foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 474let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 475 476def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { 477 let IsLoad = 1; 478 let IsNonExtLoad = 1; 479} 480 481def extloadi8_#as : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> { 482 let IsLoad = 1; 483} 484 485def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> { 486 let IsLoad = 1; 487} 488 489def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> { 490 let IsLoad = 1; 491} 492 493def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> { 494 let IsLoad = 1; 495} 496 497def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> { 498 let IsLoad = 1; 499} 500 501def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> { 502 let IsLoad = 1; 503} 504 505def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> { 506 let IsAtomic = 1; 507 let MemoryVT = i8; 508} 509 510def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> { 511 let IsAtomic = 1; 512 let MemoryVT = i16; 513} 514 515def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> { 516 let IsAtomic = 1; 517 let MemoryVT = i32; 518} 519 520def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> { 521 let IsAtomic = 1; 522 let MemoryVT = i64; 523} 524} // End let AddressSpaces 525} // End foreach as 526 527 528foreach as = [ "global", "flat", "local", "private", "region" ] in { 529let IsStore = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in { 530def store_#as : PatFrag<(ops node:$val, node:$ptr), 531 (unindexedstore node:$val, node:$ptr)> { 532 let IsTruncStore = 0; 533} 534 535// truncstore fragments. 536def truncstore_#as : PatFrag<(ops node:$val, node:$ptr), 537 (unindexedstore node:$val, node:$ptr)> { 538 let IsTruncStore = 1; 539} 540 541// TODO: We don't really need the truncstore here. We can use 542// unindexedstore with MemoryVT directly, which will save an 543// unnecessary check that the memory size is less than the value type 544// in the generated matcher table. 545def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr), 546 (truncstorei8 node:$val, node:$ptr)>; 547def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), 548 (truncstorei16 node:$val, node:$ptr)>; 549 550def store_hi16_#as : StoreHi16 <truncstorei16, i16>; 551def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>; 552def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>; 553} // End let IsStore = 1, AddressSpaces = ... 554 555let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in { 556def atomic_store_8_#as : PatFrag<(ops node:$val, node:$ptr), 557 (atomic_store_8 node:$val, node:$ptr)>; 558def atomic_store_16_#as : PatFrag<(ops node:$val, node:$ptr), 559 (atomic_store_16 node:$val, node:$ptr)>; 560def atomic_store_32_#as : PatFrag<(ops node:$val, node:$ptr), 561 (atomic_store_32 node:$val, node:$ptr)>; 562def atomic_store_64_#as : PatFrag<(ops node:$val, node:$ptr), 563 (atomic_store_64 node:$val, node:$ptr)>; 564} // End let IsAtomic = 1, AddressSpaces = ... 565} // End foreach as 566 567multiclass noret_op { 568 let HasNoUse = true in 569 def "_noret" : PatFrag<(ops node:$ptr, node:$data), 570 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>; 571} 572 573multiclass global_addr_space_atomic_op { 574 def "_noret_global_addrspace" : 575 PatFrag<(ops node:$ptr, node:$data), 576 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 577 let HasNoUse = true; 578 let AddressSpaces = LoadAddress_global.AddrSpaces; 579 let IsAtomic = 1; 580 } 581 def "_global_addrspace" : 582 PatFrag<(ops node:$ptr, node:$data), 583 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 584 let AddressSpaces = LoadAddress_global.AddrSpaces; 585 let IsAtomic = 1; 586 } 587} 588 589multiclass flat_addr_space_atomic_op { 590 def "_noret_flat_addrspace" : 591 PatFrag<(ops node:$ptr, node:$data), 592 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 593 let HasNoUse = true; 594 let AddressSpaces = LoadAddress_flat.AddrSpaces; 595 let IsAtomic = 1; 596 } 597 def "_flat_addrspace" : 598 PatFrag<(ops node:$ptr, node:$data), 599 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 600 let AddressSpaces = LoadAddress_flat.AddrSpaces; 601 let IsAtomic = 1; 602 } 603} 604 605multiclass local_addr_space_atomic_op { 606 def "_noret_local_addrspace" : 607 PatFrag<(ops node:$ptr, node:$data), 608 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 609 let HasNoUse = true; 610 let AddressSpaces = LoadAddress_local.AddrSpaces; 611 let IsAtomic = 1; 612 } 613 def "_local_addrspace" : 614 PatFrag<(ops node:$ptr, node:$data), 615 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 616 let AddressSpaces = LoadAddress_local.AddrSpaces; 617 let IsAtomic = 1; 618 } 619} 620 621defm int_amdgcn_flat_atomic_fadd : noret_op; 622defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op; 623defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; 624defm int_amdgcn_flat_atomic_fmin : noret_op; 625defm int_amdgcn_flat_atomic_fmax : noret_op; 626defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op; 627defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; 628defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; 629defm int_amdgcn_global_atomic_fmin : noret_op; 630defm int_amdgcn_global_atomic_fmax : noret_op; 631defm int_amdgcn_global_atomic_csub : noret_op; 632defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op; 633defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op; 634defm int_amdgcn_flat_atomic_fmin_num : noret_op; 635defm int_amdgcn_flat_atomic_fmax_num : noret_op; 636defm int_amdgcn_global_atomic_fmin_num : noret_op; 637defm int_amdgcn_global_atomic_fmax_num : noret_op; 638defm int_amdgcn_atomic_cond_sub_u32 : local_addr_space_atomic_op; 639defm int_amdgcn_atomic_cond_sub_u32 : flat_addr_space_atomic_op; 640defm int_amdgcn_atomic_cond_sub_u32 : global_addr_space_atomic_op; 641 642multiclass noret_binary_atomic_op<SDNode atomic_op> { 643 let HasNoUse = true in 644 defm "_noret" : binary_atomic_op<atomic_op>; 645} 646 647multiclass noret_binary_atomic_op_fp<SDNode atomic_op> { 648 let HasNoUse = true in 649 defm "_noret" : binary_atomic_op_fp<atomic_op>; 650} 651 652multiclass noret_ternary_atomic_op<SDNode atomic_op> { 653 let HasNoUse = true in 654 defm "_noret" : ternary_atomic_op<atomic_op>; 655} 656 657defvar atomic_addrspace_names = [ "global", "flat", "constant", "local", "private", "region" ]; 658 659multiclass binary_atomic_op_all_as<SDNode atomic_op> { 660 foreach as = atomic_addrspace_names in { 661 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 662 defm "_"#as : binary_atomic_op<atomic_op>; 663 defm "_"#as : noret_binary_atomic_op<atomic_op>; 664 } 665 } 666} 667multiclass binary_atomic_op_fp_all_as<SDNode atomic_op> { 668 foreach as = atomic_addrspace_names in { 669 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 670 defm "_"#as : binary_atomic_op_fp<atomic_op>; 671 defm "_"#as : noret_binary_atomic_op_fp<atomic_op>; 672 } 673 } 674} 675 676defm atomic_swap : binary_atomic_op_all_as<atomic_swap>; 677defm atomic_load_add : binary_atomic_op_all_as<atomic_load_add>; 678defm atomic_load_and : binary_atomic_op_all_as<atomic_load_and>; 679defm atomic_load_max : binary_atomic_op_all_as<atomic_load_max>; 680defm atomic_load_min : binary_atomic_op_all_as<atomic_load_min>; 681defm atomic_load_or : binary_atomic_op_all_as<atomic_load_or>; 682defm atomic_load_sub : binary_atomic_op_all_as<atomic_load_sub>; 683defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>; 684defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>; 685defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>; 686defm atomic_load_fadd : binary_atomic_op_fp_all_as<atomic_load_fadd>; 687defm atomic_load_fmin : binary_atomic_op_fp_all_as<atomic_load_fmin>; 688defm atomic_load_fmax : binary_atomic_op_fp_all_as<atomic_load_fmax>; 689defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>; 690defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>; 691defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>; 692 693def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 694 Aligned<8> { 695 let IsLoad = 1; 696} 697 698def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 699 Aligned<16> { 700 let IsLoad = 1; 701} 702 703def store_align8_local: PatFrag<(ops node:$val, node:$ptr), 704 (store_local node:$val, node:$ptr)>, Aligned<8> { 705 let IsStore = 1; 706} 707 708def store_align16_local: PatFrag<(ops node:$val, node:$ptr), 709 (store_local node:$val, node:$ptr)>, Aligned<16> { 710 let IsStore = 1; 711} 712 713let AddressSpaces = StoreAddress_local.AddrSpaces in { 714defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>; 715defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>; 716defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>; 717defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 718} 719 720let AddressSpaces = StoreAddress_region.AddrSpaces in { 721defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>; 722defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>; 723defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 724} 725 726//===----------------------------------------------------------------------===// 727// Misc Pattern Fragments 728//===----------------------------------------------------------------------===// 729 730class Constants { 731int TWO_PI = 0x40c90fdb; 732int PI = 0x40490fdb; 733int TWO_PI_INV = 0x3e22f983; 734int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9 735int FP16_ONE = 0x3C00; 736int FP16_NEG_ONE = 0xBC00; 737int FP32_ONE = 0x3f800000; 738int FP32_NEG_ONE = 0xbf800000; 739int FP64_ONE = 0x3ff0000000000000; 740int FP64_NEG_ONE = 0xbff0000000000000; 741} 742def CONST : Constants; 743 744def FP_ZERO : PatLeaf < 745 (fpimm), 746 [{return N->getValueAPF().isZero();}] 747>; 748 749def FP_ONE : PatLeaf < 750 (fpimm), 751 [{return N->isExactlyValue(1.0);}] 752>; 753 754def FP_HALF : PatLeaf < 755 (fpimm), 756 [{return N->isExactlyValue(0.5);}] 757>; 758 759/* Generic helper patterns for intrinsics */ 760/* -------------------------------------- */ 761 762class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 763 : AMDGPUPat < 764 (fpow f32:$src0, f32:$src1), 765 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 766>; 767 768/* Other helper patterns */ 769/* --------------------- */ 770 771/* Extract element pattern */ 772class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 773 SubRegIndex sub_reg> 774 : AMDGPUPat< 775 (sub_type (extractelt vec_type:$src, sub_idx)), 776 (EXTRACT_SUBREG $src, sub_reg) 777>; 778 779/* Insert element pattern */ 780class Insert_Element <ValueType elem_type, ValueType vec_type, 781 int sub_idx, SubRegIndex sub_reg> 782 : AMDGPUPat < 783 (insertelt vec_type:$vec, elem_type:$elem, sub_idx), 784 (INSERT_SUBREG $vec, $elem, sub_reg) 785>; 786 787// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 788// can handle COPY instructions. 789// bitconvert pattern 790class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat < 791 (dt (bitconvert (st rc:$src0))), 792 (dt rc:$src0) 793>; 794 795// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 796// can handle COPY instructions. 797class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat < 798 (vt (AMDGPUdwordaddr (vt rc:$addr))), 799 (vt rc:$addr) 800>; 801 802// rotr pattern 803class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat < 804 (rotr i32:$src0, i32:$src1), 805 (BIT_ALIGN $src0, $src0, $src1) 806>; 807 808// Special conversion patterns 809 810def cvt_rpi_i32_f32 : PatFrag < 811 (ops node:$src), 812 (fp_to_sint (ffloor (fadd $src, FP_HALF))), 813 [{ (void) N; return TM.Options.NoNaNsFPMath; }] 814>; 815 816def cvt_flr_i32_f32 : PatFrag < 817 (ops node:$src), 818 (fp_to_sint (ffloor $src)), 819 [{ (void)N; return TM.Options.NoNaNsFPMath; }] 820>; 821 822let AddedComplexity = 2 in { 823class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 824 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), 825 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 826 (Inst $src0, $src1, $src2)) 827>; 828 829class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 830 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), 831 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 832 (Inst $src0, $src1, $src2)) 833>; 834} // AddedComplexity. 835 836class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat < 837 (fdiv FP_ONE, vt:$src), 838 (RcpInst $src) 839>; 840 841// Instructions which select to the same v_min_f* 842def fminnum_like : PatFrags<(ops node:$src0, node:$src1), 843 [(fminnum_ieee node:$src0, node:$src1), 844 (fminnum node:$src0, node:$src1)] 845>; 846 847// Instructions which select to the same v_max_f* 848def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1), 849 [(fmaxnum_ieee node:$src0, node:$src1), 850 (fmaxnum node:$src0, node:$src1)] 851>; 852 853class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> { 854 let PredicateCode = [{ 855 return CurDAG->isKnownNeverNaN(SDValue(N,0)); 856 }]; 857 let GISelPredicateCode = [{ 858 return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI); 859 }]; 860} 861 862def fminnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1), 863 [(fminnum_ieee node:$src0, node:$src1), 864 (fminnum node:$src0, node:$src1)] 865>; 866 867def fmaxnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1), 868 [(fmaxnum_ieee node:$src0, node:$src1), 869 (fmaxnum node:$src0, node:$src1)] 870>; 871 872def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 873 [(fminnum_ieee_oneuse node:$src0, node:$src1), 874 (fminnum_oneuse node:$src0, node:$src1)] 875>; 876 877def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 878 [(fmaxnum_ieee_oneuse node:$src0, node:$src1), 879 (fmaxnum_oneuse node:$src0, node:$src1)] 880>; 881 882def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2), 883 [(fmad node:$src0, node:$src1, node:$src2), 884 (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)] 885>; 886 887// FIXME: fsqrt should not select directly 888def any_amdgcn_sqrt : PatFrags<(ops node:$src0), 889 [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)] 890>; 891