1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains instruction defs that are common to all hw codegen 10// targets. 11// 12//===----------------------------------------------------------------------===// 13 14class AddressSpacesImpl { 15 int Flat = 0; 16 int Global = 1; 17 int Region = 2; 18 int Local = 3; 19 int Constant = 4; 20 int Private = 5; 21 int Constant32Bit = 6; 22} 23 24def AddrSpaces : AddressSpacesImpl; 25 26 27class AMDGPUInst <dag outs, dag ins, string asm = "", 28 list<dag> pattern = []> : Instruction { 29 field bit isRegisterLoad = 0; 30 field bit isRegisterStore = 0; 31 32 let Namespace = "AMDGPU"; 33 let OutOperandList = outs; 34 let InOperandList = ins; 35 let AsmString = asm; 36 let Pattern = pattern; 37 let Itinerary = NullALU; 38 39 // SoftFail is a field the disassembler can use to provide a way for 40 // instructions to not match without killing the whole decode process. It is 41 // mainly used for ARM, but Tablegen expects this field to exist or it fails 42 // to build the decode table. 43 field bits<96> SoftFail = 0; 44 45 let DecoderNamespace = Namespace; 46 47 let TSFlags{63} = isRegisterLoad; 48 let TSFlags{62} = isRegisterStore; 49} 50 51class AMDGPUShaderInst <dag outs, dag ins, string asm = "", 52 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> { 53 54 field bits<32> Inst = 0xffffffff; 55} 56 57//===---------------------------------------------------------------------===// 58// Return instruction 59//===---------------------------------------------------------------------===// 60 61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> 62: Instruction { 63 64 let Namespace = "AMDGPU"; 65 dag OutOperandList = outs; 66 dag InOperandList = ins; 67 let Pattern = pattern; 68 let AsmString = !strconcat(asmstr, "\n"); 69 let isPseudo = 1; 70 let Itinerary = NullALU; 71 bit hasIEEEFlag = 0; 72 bit hasZeroOpFlag = 0; 73 let mayLoad = 0; 74 let mayStore = 0; 75 let hasSideEffects = 0; 76 let isCodeGenOnly = 1; 77} 78 79def TruePredicate : Predicate<"">; 80 81// FIXME: Tablegen should specially supports this 82def FalsePredicate : Predicate<"false">; 83 84// Add a predicate to the list if does not already exist to deduplicate it. 85class PredConcat<list<Predicate> lst, Predicate pred> { 86 list<Predicate> ret = !listconcat(lst, !listremove([pred], lst)); 87} 88 89// Get the union of two Register lists 90class RegListUnion<list<Register> lstA, list<Register> lstB> { 91 list<Register> ret = !listconcat(lstA, !listremove(lstB, lstA)); 92} 93 94class PredicateControl { 95 Predicate SubtargetPredicate = TruePredicate; 96 Predicate AssemblerPredicate = TruePredicate; 97 Predicate WaveSizePredicate = TruePredicate; 98 list<Predicate> OtherPredicates = []; 99 list<Predicate> Predicates = PredConcat< 100 PredConcat<PredConcat<OtherPredicates, 101 SubtargetPredicate>.ret, 102 AssemblerPredicate>.ret, 103 WaveSizePredicate>.ret; 104} 105 106class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>, 107 PredicateControl, GISelFlags; 108 109let GIIgnoreCopies = 1 in 110class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>; 111 112let RecomputePerFunction = 1 in { 113def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">; 114def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">; 115def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">; 116def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">; 117def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">; 118def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">; 119def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; 120} 121 122def FMA : Predicate<"Subtarget->hasFMA()">; 123 124def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 125 126def i1imm_0 : OperandWithDefaultOps<i1, (ops (i1 0))>; 127 128class CustomOperandClass<string name, bit optional, string parserMethod, 129 string defaultMethod> 130 : AsmOperandClass { 131 let Name = name; 132 let PredicateMethod = "is"#name; 133 let ParserMethod = parserMethod; 134 let RenderMethod = "addImmOperands"; 135 let IsOptional = optional; 136 let DefaultMethod = defaultMethod; 137} 138 139class CustomOperandProps<bit optional = 0, string name = NAME> { 140 string ImmTy = "ImmTy"#name; 141 string ParserMethod = "parse"#name; 142 string DefaultValue = "0"; 143 string DefaultMethod = "[this]() { return "# 144 "AMDGPUOperand::CreateImm(this, "#DefaultValue#", SMLoc(), "# 145 "AMDGPUOperand::"#ImmTy#"); }"; 146 string PrintMethod = "print"#name; 147 AsmOperandClass ParserMatchClass = 148 CustomOperandClass<name, optional, ParserMethod, DefaultMethod>; 149 string OperandType = "OPERAND_IMMEDIATE"; 150} 151 152class CustomOperand<ValueType type, bit optional = 0, string name = NAME> 153 : Operand<type>, CustomOperandProps<optional, name>; 154 155class ImmOperand<ValueType type, string name = NAME, bit optional = 0, 156 string printer = "print"#name> 157 : CustomOperand<type, optional, name> { 158 let ImmTy = "ImmTyNone"; 159 let ParserMethod = ""; 160 let PrintMethod = printer; 161} 162 163def s16imm : ImmOperand<i16, "S16Imm", 0, "printU16ImmOperand">; 164def u16imm : ImmOperand<i16, "U16Imm", 0, "printU16ImmOperand">; 165 166//===--------------------------------------------------------------------===// 167// Custom Operands 168//===--------------------------------------------------------------------===// 169def brtarget : Operand<OtherVT>; 170 171//===----------------------------------------------------------------------===// 172// Misc. PatFrags 173//===----------------------------------------------------------------------===// 174 175class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag< 176 (ops node:$src0), 177 (op $src0), 178 [{ return N->hasOneUse(); }]> { 179 180 let GISelPredicateCode = [{ 181 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 182 }]; 183} 184 185class HasOneUseBinOp<SDPatternOperator op> : PatFrag< 186 (ops node:$src0, node:$src1), 187 (op $src0, $src1), 188 [{ return N->hasOneUse(); }]> { 189 let GISelPredicateCode = [{ 190 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 191 }]; 192} 193 194class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< 195 (ops node:$src0, node:$src1, node:$src2), 196 (op $src0, $src1, $src2), 197 [{ return N->hasOneUse(); }]> { 198 let GISelPredicateCode = [{ 199 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 200 }]; 201} 202 203class is_canonicalized<SDPatternOperator op> : PatFrag< 204 (ops node:$src0, node:$src1), 205 (op $src0, $src1), 206 [{ 207 const SITargetLowering &Lowering = 208 *static_cast<const SITargetLowering *>(getTargetLowering()); 209 210 return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) && 211 Lowering.isCanonicalized(*CurDAG, N->getOperand(1)); 212 }]> { 213 214 // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class 215 let GISelPredicateCode = [{ 216 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 217 MF.getSubtarget().getTargetLowering()); 218 219 return TLI->isCanonicalized(MI.getOperand(1).getReg(), const_cast<MachineFunction&>(MF)) && 220 TLI->isCanonicalized(MI.getOperand(2).getReg(), const_cast<MachineFunction&>(MF)); 221 }]; 222} 223 224class FoldTernaryOpPat<SDPatternOperator op1, SDPatternOperator op2> : PatFrag< 225 (ops node:$src0, node:$src1, node:$src2), 226 (op2 (op1 node:$src0, node:$src1), node:$src2) 227>; 228 229def imad : FoldTernaryOpPat<mul, add>; 230 231let Properties = [SDNPCommutative, SDNPAssociative] in { 232def smax_oneuse : HasOneUseBinOp<smax>; 233def smin_oneuse : HasOneUseBinOp<smin>; 234def umax_oneuse : HasOneUseBinOp<umax>; 235def umin_oneuse : HasOneUseBinOp<umin>; 236 237def fminnum_oneuse : HasOneUseBinOp<fminnum>; 238def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>; 239 240def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>; 241def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>; 242 243 244def and_oneuse : HasOneUseBinOp<and>; 245def or_oneuse : HasOneUseBinOp<or>; 246def xor_oneuse : HasOneUseBinOp<xor>; 247} // Properties = [SDNPCommutative, SDNPAssociative] 248 249def not_oneuse : HasOneUseUnaryOp<not>; 250 251def add_oneuse : HasOneUseBinOp<add>; 252def sub_oneuse : HasOneUseBinOp<sub>; 253 254def srl_oneuse : HasOneUseBinOp<srl>; 255def shl_oneuse : HasOneUseBinOp<shl>; 256 257def select_oneuse : HasOneUseTernaryOp<select>; 258 259def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>; 260def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>; 261 262//===----------------------------------------------------------------------===// 263// PatFrags for shifts 264//===----------------------------------------------------------------------===// 265 266// Constrained shift PatFrags. 267 268def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm), 269 [{ return isUnneededShiftMask(N, 4); }]> { 270 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }]; 271 } 272 273def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm), 274 [{ return isUnneededShiftMask(N, 5); }]> { 275 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }]; 276 } 277 278def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm), 279 [{ return isUnneededShiftMask(N, 6); }]> { 280 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }]; 281 } 282 283foreach width = [16, 32, 64] in { 284defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width); 285 286def cshl_#width : PatFrags<(ops node:$src0, node:$src1), 287 [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>; 288defvar cshl = !cast<SDPatternOperator>("cshl_"#width); 289def cshl_#width#_oneuse : HasOneUseBinOp<cshl>; 290def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1), 291 (cshl $src1, $src0)>; 292 293def csrl_#width : PatFrags<(ops node:$src0, node:$src1), 294 [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>; 295defvar csrl = !cast<SDPatternOperator>("csrl_"#width); 296def csrl_#width#_oneuse : HasOneUseBinOp<csrl>; 297def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1), 298 (csrl $src1, $src0)>; 299 300def csra_#width : PatFrags<(ops node:$src0, node:$src1), 301 [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>; 302defvar csra = !cast<SDPatternOperator>("csra_"#width); 303def csra_#width#_oneuse : HasOneUseBinOp<csra>; 304def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1), 305 (csra $src1, $src0)>; 306} // end foreach width 307 308def srl_16 : PatFrag< 309 (ops node:$src0), (srl_oneuse node:$src0, (i32 16)) 310>; 311 312 313def hi_i16_elt : PatFrag< 314 (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0)))) 315>; 316 317 318def hi_f16_elt : PatLeaf< 319 (vt), [{ 320 if (N->getOpcode() != ISD::BITCAST) 321 return false; 322 SDValue Tmp = N->getOperand(0); 323 324 if (Tmp.getOpcode() != ISD::SRL) 325 return false; 326 if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1)) 327 return RHS->getZExtValue() == 16; 328 return false; 329}]>; 330 331//===----------------------------------------------------------------------===// 332// PatLeafs for zero immediate 333//===----------------------------------------------------------------------===// 334 335def immzero : PatLeaf<(imm), [{ return N->isZero(); }]>; 336def fpimmzero : PatLeaf<(fpimm), [{ return N->isZero(); }]>; 337 338//===----------------------------------------------------------------------===// 339// PatLeafs for floating-point comparisons 340//===----------------------------------------------------------------------===// 341 342def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>; 343def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>; 344def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>; 345def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>; 346def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>; 347def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>; 348def COND_O : PatFrags<(ops), [(OtherVT SETO)]>; 349def COND_UO : PatFrags<(ops), [(OtherVT SETUO)]>; 350 351//===----------------------------------------------------------------------===// 352// PatLeafs for unsigned / unordered comparisons 353//===----------------------------------------------------------------------===// 354 355def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>; 356def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>; 357def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>; 358def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>; 359def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>; 360def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>; 361 362// XXX - For some reason R600 version is preferring to use unordered 363// for setne? 364def COND_UNE_NE : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>; 365 366//===----------------------------------------------------------------------===// 367// PatLeafs for signed comparisons 368//===----------------------------------------------------------------------===// 369 370def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>; 371def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>; 372def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>; 373def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>; 374 375//===----------------------------------------------------------------------===// 376// PatLeafs for integer equality 377//===----------------------------------------------------------------------===// 378 379def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>; 380def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>; 381 382// FIXME: Should not need code predicate 383//def COND_NULL : PatLeaf<(OtherVT null_frag)>; 384def COND_NULL : PatLeaf < 385 (cond), 386 [{(void)N; return false;}] 387>; 388 389//===----------------------------------------------------------------------===// 390// PatLeafs for Texture Constants 391//===----------------------------------------------------------------------===// 392 393def TEX_ARRAY : PatLeaf< 394 (imm), 395 [{uint32_t TType = (uint32_t)N->getZExtValue(); 396 return TType == 9 || TType == 10 || TType == 16; 397 }] 398>; 399 400def TEX_RECT : PatLeaf< 401 (imm), 402 [{uint32_t TType = (uint32_t)N->getZExtValue(); 403 return TType == 5; 404 }] 405>; 406 407def TEX_SHADOW : PatLeaf< 408 (imm), 409 [{uint32_t TType = (uint32_t)N->getZExtValue(); 410 return (TType >= 6 && TType <= 8) || TType == 13; 411 }] 412>; 413 414def TEX_SHADOW_ARRAY : PatLeaf< 415 (imm), 416 [{uint32_t TType = (uint32_t)N->getZExtValue(); 417 return TType == 11 || TType == 12 || TType == 17; 418 }] 419>; 420 421//===----------------------------------------------------------------------===// 422// Load/Store Pattern Fragments 423//===----------------------------------------------------------------------===// 424 425def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, 426 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 427>; 428 429class AddressSpaceList<list<int> AS> { 430 list<int> AddrSpaces = AS; 431} 432 433class Aligned<int Bytes> { 434 int MinAlignment = Bytes; 435} 436 437class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag < 438 (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> { 439 let IsStore = 1; 440 let MemoryVT = vt; 441} 442 443def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant, 444 AddrSpaces.Constant32Bit ]>; 445def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, 446 AddrSpaces.Constant, 447 AddrSpaces.Constant32Bit ]>; 448def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>; 449 450def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, 451 AddrSpaces.Global, 452 AddrSpaces.Constant, 453 AddrSpaces.Constant32Bit ]>; 454def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>; 455 456def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 457def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 458 459def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 460def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 461 462def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 463def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 464 465 466 467foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 468let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 469 470def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { 471 let IsLoad = 1; 472 let IsNonExtLoad = 1; 473} 474 475def extloadi8_#as : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> { 476 let IsLoad = 1; 477} 478 479def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> { 480 let IsLoad = 1; 481} 482 483def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> { 484 let IsLoad = 1; 485} 486 487def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> { 488 let IsLoad = 1; 489} 490 491def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> { 492 let IsLoad = 1; 493} 494 495def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> { 496 let IsLoad = 1; 497} 498 499def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> { 500 let IsAtomic = 1; 501 let MemoryVT = i8; 502} 503 504def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> { 505 let IsAtomic = 1; 506 let MemoryVT = i16; 507} 508 509def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> { 510 let IsAtomic = 1; 511 let MemoryVT = i32; 512} 513 514def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> { 515 let IsAtomic = 1; 516 let MemoryVT = i64; 517} 518} // End let AddressSpaces 519} // End foreach as 520 521 522foreach as = [ "global", "flat", "local", "private", "region" ] in { 523let IsStore = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in { 524def store_#as : PatFrag<(ops node:$val, node:$ptr), 525 (unindexedstore node:$val, node:$ptr)> { 526 let IsTruncStore = 0; 527} 528 529// truncstore fragments. 530def truncstore_#as : PatFrag<(ops node:$val, node:$ptr), 531 (unindexedstore node:$val, node:$ptr)> { 532 let IsTruncStore = 1; 533} 534 535// TODO: We don't really need the truncstore here. We can use 536// unindexedstore with MemoryVT directly, which will save an 537// unnecessary check that the memory size is less than the value type 538// in the generated matcher table. 539def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr), 540 (truncstorei8 node:$val, node:$ptr)>; 541def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), 542 (truncstorei16 node:$val, node:$ptr)>; 543 544def store_hi16_#as : StoreHi16 <truncstorei16, i16>; 545def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>; 546def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>; 547 548} // End let IsStore = 1, AddressSpaces = ... 549 550let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in { 551def atomic_store_8_#as : PatFrag<(ops node:$ptr, node:$val), 552 (atomic_store_8 node:$ptr, node:$val)>; 553def atomic_store_16_#as : PatFrag<(ops node:$ptr, node:$val), 554 (atomic_store_16 node:$ptr, node:$val)>; 555def atomic_store_32_#as : PatFrag<(ops node:$ptr, node:$val), 556 (atomic_store_32 node:$ptr, node:$val)>; 557def atomic_store_64_#as : PatFrag<(ops node:$ptr, node:$val), 558 (atomic_store_64 node:$ptr, node:$val)>; 559} 560} // End foreach as 561 562multiclass noret_op { 563 let HasNoUse = true in 564 def "_noret" : PatFrag<(ops node:$ptr, node:$data), 565 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>; 566} 567 568multiclass global_addr_space_atomic_op { 569 def "_noret_global_addrspace" : 570 PatFrag<(ops node:$ptr, node:$data), 571 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 572 let HasNoUse = true; 573 let AddressSpaces = LoadAddress_global.AddrSpaces; 574 let IsAtomic = 1; 575 } 576 def "_global_addrspace" : 577 PatFrag<(ops node:$ptr, node:$data), 578 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 579 let AddressSpaces = LoadAddress_global.AddrSpaces; 580 let IsAtomic = 1; 581 } 582} 583 584multiclass flat_addr_space_atomic_op { 585 def "_noret_flat_addrspace" : 586 PatFrag<(ops node:$ptr, node:$data), 587 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 588 let HasNoUse = true; 589 let AddressSpaces = LoadAddress_flat.AddrSpaces; 590 let IsAtomic = 1; 591 } 592 def "_flat_addrspace" : 593 PatFrag<(ops node:$ptr, node:$data), 594 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 595 let AddressSpaces = LoadAddress_flat.AddrSpaces; 596 let IsAtomic = 1; 597 } 598} 599 600multiclass local_addr_space_atomic_op { 601 def "_noret_local_addrspace" : 602 PatFrag<(ops node:$ptr, node:$data), 603 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 604 let HasNoUse = true; 605 let AddressSpaces = LoadAddress_local.AddrSpaces; 606 let IsAtomic = 1; 607 } 608 def "_local_addrspace" : 609 PatFrag<(ops node:$ptr, node:$data), 610 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 611 let AddressSpaces = LoadAddress_local.AddrSpaces; 612 let IsAtomic = 1; 613 } 614} 615 616defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op; 617defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; 618defm int_amdgcn_flat_atomic_fmin : noret_op; 619defm int_amdgcn_flat_atomic_fmax : noret_op; 620defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op; 621defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; 622defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; 623defm int_amdgcn_global_atomic_fmin : noret_op; 624defm int_amdgcn_global_atomic_fmax : noret_op; 625defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op; 626defm int_amdgcn_ds_fadd_v2bf16 : noret_op; 627 628multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { 629 let HasNoUse = true in 630 defm "_noret" : binary_atomic_op<atomic_op, IsInt>; 631} 632 633multiclass noret_ternary_atomic_op<SDNode atomic_op> { 634 let HasNoUse = true in 635 defm "_noret" : ternary_atomic_op<atomic_op>; 636} 637 638multiclass binary_atomic_op_all_as<SDNode atomic_op, bit IsInt = 1> { 639 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 640 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 641 defm "_"#as : binary_atomic_op<atomic_op, IsInt>; 642 defm "_"#as : noret_binary_atomic_op<atomic_op, IsInt>; 643 } 644 } 645} 646 647defm atomic_swap : binary_atomic_op_all_as<atomic_swap>; 648defm atomic_load_add : binary_atomic_op_all_as<atomic_load_add>; 649defm atomic_load_and : binary_atomic_op_all_as<atomic_load_and>; 650defm atomic_load_max : binary_atomic_op_all_as<atomic_load_max>; 651defm atomic_load_min : binary_atomic_op_all_as<atomic_load_min>; 652defm atomic_load_or : binary_atomic_op_all_as<atomic_load_or>; 653defm atomic_load_sub : binary_atomic_op_all_as<atomic_load_sub>; 654defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>; 655defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>; 656defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>; 657defm atomic_load_fadd : binary_atomic_op_all_as<atomic_load_fadd, 0>; 658defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>; 659defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>; 660let MemoryVT = v2f16 in 661defm atomic_load_fadd_v2f16 : binary_atomic_op_all_as<atomic_load_fadd, 0>; 662defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>; 663 664def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 665 Aligned<8> { 666 let IsLoad = 1; 667} 668 669def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 670 Aligned<16> { 671 let IsLoad = 1; 672} 673 674def store_align8_local: PatFrag<(ops node:$val, node:$ptr), 675 (store_local node:$val, node:$ptr)>, Aligned<8> { 676 let IsStore = 1; 677} 678 679def store_align16_local: PatFrag<(ops node:$val, node:$ptr), 680 (store_local node:$val, node:$ptr)>, Aligned<16> { 681 let IsStore = 1; 682} 683 684let AddressSpaces = StoreAddress_local.AddrSpaces in { 685defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>; 686defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>; 687defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>; 688defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 689} 690 691let AddressSpaces = StoreAddress_region.AddrSpaces in { 692defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>; 693defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>; 694defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 695} 696 697//===----------------------------------------------------------------------===// 698// Misc Pattern Fragments 699//===----------------------------------------------------------------------===// 700 701class Constants { 702int TWO_PI = 0x40c90fdb; 703int PI = 0x40490fdb; 704int TWO_PI_INV = 0x3e22f983; 705int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9 706int FP16_ONE = 0x3C00; 707int FP16_NEG_ONE = 0xBC00; 708int FP32_ONE = 0x3f800000; 709int FP32_NEG_ONE = 0xbf800000; 710int FP64_ONE = 0x3ff0000000000000; 711int FP64_NEG_ONE = 0xbff0000000000000; 712} 713def CONST : Constants; 714 715def FP_ZERO : PatLeaf < 716 (fpimm), 717 [{return N->getValueAPF().isZero();}] 718>; 719 720def FP_ONE : PatLeaf < 721 (fpimm), 722 [{return N->isExactlyValue(1.0);}] 723>; 724 725def FP_HALF : PatLeaf < 726 (fpimm), 727 [{return N->isExactlyValue(0.5);}] 728>; 729 730/* Generic helper patterns for intrinsics */ 731/* -------------------------------------- */ 732 733class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 734 : AMDGPUPat < 735 (fpow f32:$src0, f32:$src1), 736 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 737>; 738 739/* Other helper patterns */ 740/* --------------------- */ 741 742/* Extract element pattern */ 743class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 744 SubRegIndex sub_reg> 745 : AMDGPUPat< 746 (sub_type (extractelt vec_type:$src, sub_idx)), 747 (EXTRACT_SUBREG $src, sub_reg) 748>; 749 750/* Insert element pattern */ 751class Insert_Element <ValueType elem_type, ValueType vec_type, 752 int sub_idx, SubRegIndex sub_reg> 753 : AMDGPUPat < 754 (insertelt vec_type:$vec, elem_type:$elem, sub_idx), 755 (INSERT_SUBREG $vec, $elem, sub_reg) 756>; 757 758// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 759// can handle COPY instructions. 760// bitconvert pattern 761class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat < 762 (dt (bitconvert (st rc:$src0))), 763 (dt rc:$src0) 764>; 765 766// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 767// can handle COPY instructions. 768class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat < 769 (vt (AMDGPUdwordaddr (vt rc:$addr))), 770 (vt rc:$addr) 771>; 772 773// rotr pattern 774class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat < 775 (rotr i32:$src0, i32:$src1), 776 (BIT_ALIGN $src0, $src0, $src1) 777>; 778 779// Special conversion patterns 780 781def cvt_rpi_i32_f32 : PatFrag < 782 (ops node:$src), 783 (fp_to_sint (ffloor (fadd $src, FP_HALF))), 784 [{ (void) N; return TM.Options.NoNaNsFPMath; }] 785>; 786 787def cvt_flr_i32_f32 : PatFrag < 788 (ops node:$src), 789 (fp_to_sint (ffloor $src)), 790 [{ (void)N; return TM.Options.NoNaNsFPMath; }] 791>; 792 793let AddedComplexity = 2 in { 794class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 795 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), 796 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 797 (Inst $src0, $src1, $src2)) 798>; 799 800class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 801 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), 802 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 803 (Inst $src0, $src1, $src2)) 804>; 805} // AddedComplexity. 806 807class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat < 808 (fdiv FP_ONE, vt:$src), 809 (RcpInst $src) 810>; 811 812// Instructions which select to the same v_min_f* 813def fminnum_like : PatFrags<(ops node:$src0, node:$src1), 814 [(fminnum_ieee node:$src0, node:$src1), 815 (fminnum node:$src0, node:$src1)] 816>; 817 818// Instructions which select to the same v_max_f* 819def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1), 820 [(fmaxnum_ieee node:$src0, node:$src1), 821 (fmaxnum node:$src0, node:$src1)] 822>; 823 824class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> { 825 let PredicateCode = [{ 826 return CurDAG->isKnownNeverNaN(SDValue(N,0)); 827 }]; 828 let GISelPredicateCode = [{ 829 return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI); 830 }]; 831} 832 833def fminnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1), 834 [(fminnum_ieee node:$src0, node:$src1), 835 (fminnum node:$src0, node:$src1)] 836>; 837 838def fmaxnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1), 839 [(fmaxnum_ieee node:$src0, node:$src1), 840 (fmaxnum node:$src0, node:$src1)] 841>; 842 843def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 844 [(fminnum_ieee_oneuse node:$src0, node:$src1), 845 (fminnum_oneuse node:$src0, node:$src1)] 846>; 847 848def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 849 [(fmaxnum_ieee_oneuse node:$src0, node:$src1), 850 (fmaxnum_oneuse node:$src0, node:$src1)] 851>; 852 853def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2), 854 [(fmad node:$src0, node:$src1, node:$src2), 855 (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)] 856>; 857 858// FIXME: fsqrt should not select directly 859def any_amdgcn_sqrt : PatFrags<(ops node:$src0), 860 [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)] 861>; 862