1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains instruction defs that are common to all hw codegen 10// targets. 11// 12//===----------------------------------------------------------------------===// 13 14class AddressSpacesImpl { 15 int Flat = 0; 16 int Global = 1; 17 int Region = 2; 18 int Local = 3; 19 int Constant = 4; 20 int Private = 5; 21 int Constant32Bit = 6; 22} 23 24def AddrSpaces : AddressSpacesImpl; 25 26 27class AMDGPUInst <dag outs, dag ins, string asm = "", 28 list<dag> pattern = []> : Instruction { 29 field bit isRegisterLoad = 0; 30 field bit isRegisterStore = 0; 31 32 let Namespace = "AMDGPU"; 33 let OutOperandList = outs; 34 let InOperandList = ins; 35 let AsmString = asm; 36 let Pattern = pattern; 37 let Itinerary = NullALU; 38 39 // SoftFail is a field the disassembler can use to provide a way for 40 // instructions to not match without killing the whole decode process. It is 41 // mainly used for ARM, but Tablegen expects this field to exist or it fails 42 // to build the decode table. 43 field bits<96> SoftFail = 0; 44 45 let DecoderNamespace = Namespace; 46 47 let TSFlags{63} = isRegisterLoad; 48 let TSFlags{62} = isRegisterStore; 49} 50 51class AMDGPUShaderInst <dag outs, dag ins, string asm = "", 52 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> { 53 54 field bits<32> Inst = 0xffffffff; 55} 56 57//===---------------------------------------------------------------------===// 58// Return instruction 59//===---------------------------------------------------------------------===// 60 61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> 62: Instruction { 63 64 let Namespace = "AMDGPU"; 65 dag OutOperandList = outs; 66 dag InOperandList = ins; 67 let Pattern = pattern; 68 let AsmString = !strconcat(asmstr, "\n"); 69 let isPseudo = 1; 70 let Itinerary = NullALU; 71 bit hasIEEEFlag = 0; 72 bit hasZeroOpFlag = 0; 73 let mayLoad = 0; 74 let mayStore = 0; 75 let hasSideEffects = 0; 76 let isCodeGenOnly = 1; 77} 78 79def TruePredicate : Predicate<"">; 80 81// FIXME: Tablegen should specially supports this 82def FalsePredicate : Predicate<"false">; 83 84// Add a predicate to the list if does not already exist to deduplicate it. 85class PredConcat<list<Predicate> lst, Predicate pred> { 86 list<Predicate> ret = !listconcat(lst, !listremove([pred], lst)); 87} 88 89// Get the union of two Register lists 90class RegListUnion<list<Register> lstA, list<Register> lstB> { 91 list<Register> ret = !listconcat(lstA, !listremove(lstB, lstA)); 92} 93 94class PredicateControl { 95 Predicate SubtargetPredicate = TruePredicate; 96 Predicate AssemblerPredicate = TruePredicate; 97 Predicate WaveSizePredicate = TruePredicate; 98 list<Predicate> OtherPredicates = []; 99 list<Predicate> Predicates = PredConcat< 100 PredConcat<PredConcat<OtherPredicates, 101 SubtargetPredicate>.ret, 102 AssemblerPredicate>.ret, 103 WaveSizePredicate>.ret; 104} 105 106class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>, 107 PredicateControl, GISelFlags; 108 109let GIIgnoreCopies = 1 in 110class AMDGPUPatIgnoreCopies<dag pattern, dag result> : AMDGPUPat<pattern, result>; 111 112let RecomputePerFunction = 1 in { 113def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">; 114def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals != DenormalMode::getPreserveSign()">; 115def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals != DenormalMode::getPreserveSign()">; 116def NoFP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">; 117def NoFP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP32Denormals == DenormalMode::getPreserveSign()">; 118def NoFP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().FP64FP16Denormals == DenormalMode::getPreserveSign()">; 119def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; 120} 121 122def FMA : Predicate<"Subtarget->hasFMA()">; 123 124def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 125 126def i1imm_0 : OperandWithDefaultOps<i1, (ops (i1 0))>; 127 128class CustomOperandClass<string name, bit optional, string predicateMethod, 129 string parserMethod, string defaultMethod> 130 : AsmOperandClass { 131 let Name = name; 132 let PredicateMethod = predicateMethod; 133 let ParserMethod = parserMethod; 134 let RenderMethod = "addImmOperands"; 135 let IsOptional = optional; 136 let DefaultMethod = defaultMethod; 137} 138 139class CustomOperandProps<bit optional = 0, string name = NAME> { 140 string ImmTy = "ImmTy"#name; 141 string PredicateMethod = "is"#name; 142 string ParserMethod = "parse"#name; 143 string DefaultValue = "0"; 144 string DefaultMethod = "[this]() { return "# 145 "AMDGPUOperand::CreateImm(this, "#DefaultValue#", SMLoc(), "# 146 "AMDGPUOperand::"#ImmTy#"); }"; 147 string PrintMethod = "print"#name; 148 AsmOperandClass ParserMatchClass = 149 CustomOperandClass<name, optional, PredicateMethod, ParserMethod, 150 DefaultMethod>; 151 string OperandType = "OPERAND_IMMEDIATE"; 152} 153 154class CustomOperand<ValueType type, bit optional = 0, string name = NAME> 155 : Operand<type>, CustomOperandProps<optional, name>; 156 157class ImmOperand<ValueType type, string name = NAME, bit optional = 0, 158 string printer = "print"#name> 159 : CustomOperand<type, optional, name> { 160 let ImmTy = "ImmTyNone"; 161 let ParserMethod = ""; 162 let PrintMethod = printer; 163} 164 165def s16imm : ImmOperand<i16, "S16Imm", 0, "printU16ImmOperand">; 166def u16imm : ImmOperand<i16, "U16Imm", 0, "printU16ImmOperand">; 167 168class ValuePredicatedOperand<CustomOperand op, string valuePredicate, 169 bit optional = 0> 170 : CustomOperand<op.Type, optional> { 171 let ImmTy = op.ImmTy; 172 defvar OpPredicate = op.ParserMatchClass.PredicateMethod; 173 let PredicateMethod = 174 "getPredicate([](const AMDGPUOperand &Op) -> bool { "# 175 "return Op."#OpPredicate#"() && "#valuePredicate#"; })"; 176 let ParserMethod = op.ParserMatchClass.ParserMethod; 177 let DefaultValue = op.DefaultValue; 178 let DefaultMethod = op.DefaultMethod; 179 let PrintMethod = op.PrintMethod; 180} 181 182//===--------------------------------------------------------------------===// 183// Custom Operands 184//===--------------------------------------------------------------------===// 185def brtarget : Operand<OtherVT>; 186 187//===----------------------------------------------------------------------===// 188// Misc. PatFrags 189//===----------------------------------------------------------------------===// 190 191class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag< 192 (ops node:$src0), 193 (op $src0), 194 [{ return N->hasOneUse(); }]> { 195 196 let GISelPredicateCode = [{ 197 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 198 }]; 199} 200 201class HasOneUseBinOp<SDPatternOperator op> : PatFrag< 202 (ops node:$src0, node:$src1), 203 (op $src0, $src1), 204 [{ return N->hasOneUse(); }]> { 205 let GISelPredicateCode = [{ 206 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 207 }]; 208} 209 210class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< 211 (ops node:$src0, node:$src1, node:$src2), 212 (op $src0, $src1, $src2), 213 [{ return N->hasOneUse(); }]> { 214 let GISelPredicateCode = [{ 215 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 216 }]; 217} 218 219class is_canonicalized<SDPatternOperator op> : PatFrag< 220 (ops node:$src0, node:$src1), 221 (op $src0, $src1), 222 [{ 223 const SITargetLowering &Lowering = 224 *static_cast<const SITargetLowering *>(getTargetLowering()); 225 226 return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) && 227 Lowering.isCanonicalized(*CurDAG, N->getOperand(1)); 228 }]> { 229 230 // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class 231 let GISelPredicateCode = [{ 232 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 233 MF.getSubtarget().getTargetLowering()); 234 235 return TLI->isCanonicalized(MI.getOperand(1).getReg(), const_cast<MachineFunction&>(MF)) && 236 TLI->isCanonicalized(MI.getOperand(2).getReg(), const_cast<MachineFunction&>(MF)); 237 }]; 238} 239 240class FoldTernaryOpPat<SDPatternOperator op1, SDPatternOperator op2> : PatFrag< 241 (ops node:$src0, node:$src1, node:$src2), 242 (op2 (op1 node:$src0, node:$src1), node:$src2) 243>; 244 245def imad : FoldTernaryOpPat<mul, add>; 246 247let Properties = [SDNPCommutative, SDNPAssociative] in { 248def smax_oneuse : HasOneUseBinOp<smax>; 249def smin_oneuse : HasOneUseBinOp<smin>; 250def umax_oneuse : HasOneUseBinOp<umax>; 251def umin_oneuse : HasOneUseBinOp<umin>; 252 253def fminnum_oneuse : HasOneUseBinOp<fminnum>; 254def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>; 255def fminimum_oneuse : HasOneUseBinOp<fminimum>; 256def fmaximum_oneuse : HasOneUseBinOp<fmaximum>; 257 258def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>; 259def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>; 260 261 262def and_oneuse : HasOneUseBinOp<and>; 263def or_oneuse : HasOneUseBinOp<or>; 264def xor_oneuse : HasOneUseBinOp<xor>; 265} // Properties = [SDNPCommutative, SDNPAssociative] 266 267def not_oneuse : HasOneUseUnaryOp<not>; 268 269def add_oneuse : HasOneUseBinOp<add>; 270def sub_oneuse : HasOneUseBinOp<sub>; 271 272def srl_oneuse : HasOneUseBinOp<srl>; 273def shl_oneuse : HasOneUseBinOp<shl>; 274 275def select_oneuse : HasOneUseTernaryOp<select>; 276 277def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>; 278def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>; 279 280//===----------------------------------------------------------------------===// 281// PatFrags for shifts 282//===----------------------------------------------------------------------===// 283 284// Constrained shift PatFrags. 285 286def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm), 287 [{ return isUnneededShiftMask(N, 4); }]> { 288 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }]; 289 } 290 291def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm), 292 [{ return isUnneededShiftMask(N, 5); }]> { 293 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }]; 294 } 295 296def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm), 297 [{ return isUnneededShiftMask(N, 6); }]> { 298 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }]; 299 } 300 301foreach width = [16, 32, 64] in { 302defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width); 303 304def cshl_#width : PatFrags<(ops node:$src0, node:$src1), 305 [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>; 306defvar cshl = !cast<SDPatternOperator>("cshl_"#width); 307def cshl_#width#_oneuse : HasOneUseBinOp<cshl>; 308def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1), 309 (cshl $src1, $src0)>; 310 311def csrl_#width : PatFrags<(ops node:$src0, node:$src1), 312 [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>; 313defvar csrl = !cast<SDPatternOperator>("csrl_"#width); 314def csrl_#width#_oneuse : HasOneUseBinOp<csrl>; 315def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1), 316 (csrl $src1, $src0)>; 317 318def csra_#width : PatFrags<(ops node:$src0, node:$src1), 319 [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>; 320defvar csra = !cast<SDPatternOperator>("csra_"#width); 321def csra_#width#_oneuse : HasOneUseBinOp<csra>; 322def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1), 323 (csra $src1, $src0)>; 324} // end foreach width 325 326def srl_16 : PatFrag< 327 (ops node:$src0), (srl_oneuse node:$src0, (i32 16)) 328>; 329 330 331def hi_i16_elt : PatFrag< 332 (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0)))) 333>; 334 335 336def hi_f16_elt : PatLeaf< 337 (vt), [{ 338 if (N->getOpcode() != ISD::BITCAST) 339 return false; 340 SDValue Tmp = N->getOperand(0); 341 342 if (Tmp.getOpcode() != ISD::SRL) 343 return false; 344 if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1)) 345 return RHS->getZExtValue() == 16; 346 return false; 347}]>; 348 349//===----------------------------------------------------------------------===// 350// PatLeafs for zero immediate 351//===----------------------------------------------------------------------===// 352 353def immzero : PatLeaf<(imm), [{ return N->isZero(); }]>; 354def fpimmzero : PatLeaf<(fpimm), [{ return N->isZero(); }]>; 355 356//===----------------------------------------------------------------------===// 357// PatLeafs for floating-point comparisons 358//===----------------------------------------------------------------------===// 359 360def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>; 361def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>; 362def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>; 363def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>; 364def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>; 365def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>; 366def COND_O : PatFrags<(ops), [(OtherVT SETO)]>; 367def COND_UO : PatFrags<(ops), [(OtherVT SETUO)]>; 368 369//===----------------------------------------------------------------------===// 370// PatLeafs for unsigned / unordered comparisons 371//===----------------------------------------------------------------------===// 372 373def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>; 374def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>; 375def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>; 376def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>; 377def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>; 378def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>; 379 380// XXX - For some reason R600 version is preferring to use unordered 381// for setne? 382def COND_UNE_NE : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>; 383 384//===----------------------------------------------------------------------===// 385// PatLeafs for signed comparisons 386//===----------------------------------------------------------------------===// 387 388def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>; 389def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>; 390def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>; 391def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>; 392 393//===----------------------------------------------------------------------===// 394// PatLeafs for integer equality 395//===----------------------------------------------------------------------===// 396 397def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>; 398def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>; 399 400// FIXME: Should not need code predicate 401//def COND_NULL : PatLeaf<(OtherVT null_frag)>; 402def COND_NULL : PatLeaf < 403 (cond), 404 [{(void)N; return false;}] 405>; 406 407//===----------------------------------------------------------------------===// 408// PatLeafs for Texture Constants 409//===----------------------------------------------------------------------===// 410 411def TEX_ARRAY : PatLeaf< 412 (imm), 413 [{uint32_t TType = (uint32_t)N->getZExtValue(); 414 return TType == 9 || TType == 10 || TType == 16; 415 }] 416>; 417 418def TEX_RECT : PatLeaf< 419 (imm), 420 [{uint32_t TType = (uint32_t)N->getZExtValue(); 421 return TType == 5; 422 }] 423>; 424 425def TEX_SHADOW : PatLeaf< 426 (imm), 427 [{uint32_t TType = (uint32_t)N->getZExtValue(); 428 return (TType >= 6 && TType <= 8) || TType == 13; 429 }] 430>; 431 432def TEX_SHADOW_ARRAY : PatLeaf< 433 (imm), 434 [{uint32_t TType = (uint32_t)N->getZExtValue(); 435 return TType == 11 || TType == 12 || TType == 17; 436 }] 437>; 438 439//===----------------------------------------------------------------------===// 440// Load/Store Pattern Fragments 441//===----------------------------------------------------------------------===// 442 443def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, 444 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 445>; 446 447class AddressSpaceList<list<int> AS> { 448 list<int> AddrSpaces = AS; 449} 450 451class Aligned<int Bytes> { 452 int MinAlignment = Bytes; 453} 454 455class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag < 456 (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> { 457 let IsStore = 1; 458 let MemoryVT = vt; 459} 460 461def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant, 462 AddrSpaces.Constant32Bit ]>; 463def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, 464 AddrSpaces.Constant, 465 AddrSpaces.Constant32Bit ]>; 466def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>; 467 468def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, 469 AddrSpaces.Global, 470 AddrSpaces.Constant, 471 AddrSpaces.Constant32Bit ]>; 472def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>; 473 474def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 475def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 476 477def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 478def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 479 480def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 481def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 482 483 484 485foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 486let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 487 488def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { 489 let IsLoad = 1; 490 let IsNonExtLoad = 1; 491} 492 493def extloadi8_#as : PatFrag<(ops node:$ptr), (extloadi8 node:$ptr)> { 494 let IsLoad = 1; 495} 496 497def extloadi16_#as : PatFrag<(ops node:$ptr), (extloadi16 node:$ptr)> { 498 let IsLoad = 1; 499} 500 501def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextloadi8 node:$ptr)> { 502 let IsLoad = 1; 503} 504 505def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextloadi16 node:$ptr)> { 506 let IsLoad = 1; 507} 508 509def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr)> { 510 let IsLoad = 1; 511} 512 513def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextloadi16 node:$ptr)> { 514 let IsLoad = 1; 515} 516 517def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> { 518 let IsAtomic = 1; 519 let MemoryVT = i8; 520} 521 522def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> { 523 let IsAtomic = 1; 524 let MemoryVT = i16; 525} 526 527def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> { 528 let IsAtomic = 1; 529 let MemoryVT = i32; 530} 531 532def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> { 533 let IsAtomic = 1; 534 let MemoryVT = i64; 535} 536} // End let AddressSpaces 537} // End foreach as 538 539 540foreach as = [ "global", "flat", "local", "private", "region" ] in { 541let IsStore = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in { 542def store_#as : PatFrag<(ops node:$val, node:$ptr), 543 (unindexedstore node:$val, node:$ptr)> { 544 let IsTruncStore = 0; 545} 546 547// truncstore fragments. 548def truncstore_#as : PatFrag<(ops node:$val, node:$ptr), 549 (unindexedstore node:$val, node:$ptr)> { 550 let IsTruncStore = 1; 551} 552 553// TODO: We don't really need the truncstore here. We can use 554// unindexedstore with MemoryVT directly, which will save an 555// unnecessary check that the memory size is less than the value type 556// in the generated matcher table. 557def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr), 558 (truncstorei8 node:$val, node:$ptr)>; 559def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), 560 (truncstorei16 node:$val, node:$ptr)>; 561 562def store_hi16_#as : StoreHi16 <truncstorei16, i16>; 563def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>; 564def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>; 565} // End let IsStore = 1, AddressSpaces = ... 566 567let IsAtomic = 1, AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in { 568def atomic_store_8_#as : PatFrag<(ops node:$val, node:$ptr), 569 (atomic_store_8 node:$val, node:$ptr)>; 570def atomic_store_16_#as : PatFrag<(ops node:$val, node:$ptr), 571 (atomic_store_16 node:$val, node:$ptr)>; 572def atomic_store_32_#as : PatFrag<(ops node:$val, node:$ptr), 573 (atomic_store_32 node:$val, node:$ptr)>; 574def atomic_store_64_#as : PatFrag<(ops node:$val, node:$ptr), 575 (atomic_store_64 node:$val, node:$ptr)>; 576} // End let IsAtomic = 1, AddressSpaces = ... 577} // End foreach as 578 579multiclass noret_op { 580 let HasNoUse = true in 581 def "_noret" : PatFrag<(ops node:$ptr, node:$data), 582 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>; 583} 584 585multiclass global_addr_space_atomic_op { 586 def "_noret_global_addrspace" : 587 PatFrag<(ops node:$ptr, node:$data), 588 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 589 let HasNoUse = true; 590 let AddressSpaces = LoadAddress_global.AddrSpaces; 591 let IsAtomic = 1; 592 } 593 def "_global_addrspace" : 594 PatFrag<(ops node:$ptr, node:$data), 595 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 596 let AddressSpaces = LoadAddress_global.AddrSpaces; 597 let IsAtomic = 1; 598 } 599} 600 601multiclass flat_addr_space_atomic_op { 602 def "_noret_flat_addrspace" : 603 PatFrag<(ops node:$ptr, node:$data), 604 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 605 let HasNoUse = true; 606 let AddressSpaces = LoadAddress_flat.AddrSpaces; 607 let IsAtomic = 1; 608 } 609 def "_flat_addrspace" : 610 PatFrag<(ops node:$ptr, node:$data), 611 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 612 let AddressSpaces = LoadAddress_flat.AddrSpaces; 613 let IsAtomic = 1; 614 } 615} 616 617multiclass local_addr_space_atomic_op { 618 def "_noret_local_addrspace" : 619 PatFrag<(ops node:$ptr, node:$data), 620 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 621 let HasNoUse = true; 622 let AddressSpaces = LoadAddress_local.AddrSpaces; 623 let IsAtomic = 1; 624 } 625 def "_local_addrspace" : 626 PatFrag<(ops node:$ptr, node:$data), 627 (!cast<SDPatternOperator>(NAME) node:$ptr, node:$data)>{ 628 let AddressSpaces = LoadAddress_local.AddrSpaces; 629 let IsAtomic = 1; 630 } 631} 632 633defm int_amdgcn_flat_atomic_fadd : flat_addr_space_atomic_op; 634defm int_amdgcn_flat_atomic_fadd_v2bf16 : noret_op; 635defm int_amdgcn_flat_atomic_fmin : noret_op; 636defm int_amdgcn_flat_atomic_fmax : noret_op; 637defm int_amdgcn_global_atomic_fadd : global_addr_space_atomic_op; 638defm int_amdgcn_flat_atomic_fadd : global_addr_space_atomic_op; 639defm int_amdgcn_global_atomic_fadd_v2bf16 : noret_op; 640defm int_amdgcn_global_atomic_fmin : noret_op; 641defm int_amdgcn_global_atomic_fmax : noret_op; 642defm int_amdgcn_global_atomic_csub : noret_op; 643defm int_amdgcn_flat_atomic_fadd : local_addr_space_atomic_op; 644defm int_amdgcn_ds_fadd_v2bf16 : noret_op; 645defm int_amdgcn_global_atomic_ordered_add_b64 : noret_op; 646defm int_amdgcn_flat_atomic_fmin_num : noret_op; 647defm int_amdgcn_flat_atomic_fmax_num : noret_op; 648defm int_amdgcn_global_atomic_fmin_num : noret_op; 649defm int_amdgcn_global_atomic_fmax_num : noret_op; 650defm int_amdgcn_atomic_cond_sub_u32 : local_addr_space_atomic_op; 651defm int_amdgcn_atomic_cond_sub_u32 : flat_addr_space_atomic_op; 652defm int_amdgcn_atomic_cond_sub_u32 : global_addr_space_atomic_op; 653 654multiclass noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { 655 let HasNoUse = true in 656 defm "_noret" : binary_atomic_op<atomic_op, IsInt>; 657} 658 659multiclass noret_ternary_atomic_op<SDNode atomic_op> { 660 let HasNoUse = true in 661 defm "_noret" : ternary_atomic_op<atomic_op>; 662} 663 664multiclass binary_atomic_op_all_as<SDNode atomic_op, bit IsInt = 1> { 665 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 666 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 667 defm "_"#as : binary_atomic_op<atomic_op, IsInt>; 668 defm "_"#as : noret_binary_atomic_op<atomic_op, IsInt>; 669 } 670 } 671} 672 673defm atomic_swap : binary_atomic_op_all_as<atomic_swap>; 674defm atomic_load_add : binary_atomic_op_all_as<atomic_load_add>; 675defm atomic_load_and : binary_atomic_op_all_as<atomic_load_and>; 676defm atomic_load_max : binary_atomic_op_all_as<atomic_load_max>; 677defm atomic_load_min : binary_atomic_op_all_as<atomic_load_min>; 678defm atomic_load_or : binary_atomic_op_all_as<atomic_load_or>; 679defm atomic_load_sub : binary_atomic_op_all_as<atomic_load_sub>; 680defm atomic_load_umax : binary_atomic_op_all_as<atomic_load_umax>; 681defm atomic_load_umin : binary_atomic_op_all_as<atomic_load_umin>; 682defm atomic_load_xor : binary_atomic_op_all_as<atomic_load_xor>; 683defm atomic_load_fadd : binary_atomic_op_all_as<atomic_load_fadd, 0>; 684defm atomic_load_uinc_wrap : binary_atomic_op_all_as<atomic_load_uinc_wrap>; 685defm atomic_load_udec_wrap : binary_atomic_op_all_as<atomic_load_udec_wrap>; 686let MemoryVT = v2f16 in 687defm atomic_load_fadd_v2f16 : binary_atomic_op_all_as<atomic_load_fadd, 0>; 688defm AMDGPUatomic_cmp_swap : binary_atomic_op_all_as<AMDGPUatomic_cmp_swap>; 689 690def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 691 Aligned<8> { 692 let IsLoad = 1; 693} 694 695def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 696 Aligned<16> { 697 let IsLoad = 1; 698} 699 700def store_align8_local: PatFrag<(ops node:$val, node:$ptr), 701 (store_local node:$val, node:$ptr)>, Aligned<8> { 702 let IsStore = 1; 703} 704 705def store_align16_local: PatFrag<(ops node:$val, node:$ptr), 706 (store_local node:$val, node:$ptr)>, Aligned<16> { 707 let IsStore = 1; 708} 709 710let AddressSpaces = StoreAddress_local.AddrSpaces in { 711defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>; 712defm atomic_cmp_swap_local : noret_ternary_atomic_op<atomic_cmp_swap>; 713defm atomic_cmp_swap_local_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>; 714defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 715} 716 717let AddressSpaces = StoreAddress_region.AddrSpaces in { 718defm atomic_cmp_swap_region : noret_ternary_atomic_op<atomic_cmp_swap>; 719defm atomic_cmp_swap_region_m0 : noret_ternary_atomic_op<atomic_cmp_swap_glue>; 720defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 721} 722 723//===----------------------------------------------------------------------===// 724// Misc Pattern Fragments 725//===----------------------------------------------------------------------===// 726 727class Constants { 728int TWO_PI = 0x40c90fdb; 729int PI = 0x40490fdb; 730int TWO_PI_INV = 0x3e22f983; 731int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9 732int FP16_ONE = 0x3C00; 733int FP16_NEG_ONE = 0xBC00; 734int FP32_ONE = 0x3f800000; 735int FP32_NEG_ONE = 0xbf800000; 736int FP64_ONE = 0x3ff0000000000000; 737int FP64_NEG_ONE = 0xbff0000000000000; 738} 739def CONST : Constants; 740 741def FP_ZERO : PatLeaf < 742 (fpimm), 743 [{return N->getValueAPF().isZero();}] 744>; 745 746def FP_ONE : PatLeaf < 747 (fpimm), 748 [{return N->isExactlyValue(1.0);}] 749>; 750 751def FP_HALF : PatLeaf < 752 (fpimm), 753 [{return N->isExactlyValue(0.5);}] 754>; 755 756/* Generic helper patterns for intrinsics */ 757/* -------------------------------------- */ 758 759class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 760 : AMDGPUPat < 761 (fpow f32:$src0, f32:$src1), 762 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 763>; 764 765/* Other helper patterns */ 766/* --------------------- */ 767 768/* Extract element pattern */ 769class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 770 SubRegIndex sub_reg> 771 : AMDGPUPat< 772 (sub_type (extractelt vec_type:$src, sub_idx)), 773 (EXTRACT_SUBREG $src, sub_reg) 774>; 775 776/* Insert element pattern */ 777class Insert_Element <ValueType elem_type, ValueType vec_type, 778 int sub_idx, SubRegIndex sub_reg> 779 : AMDGPUPat < 780 (insertelt vec_type:$vec, elem_type:$elem, sub_idx), 781 (INSERT_SUBREG $vec, $elem, sub_reg) 782>; 783 784// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 785// can handle COPY instructions. 786// bitconvert pattern 787class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat < 788 (dt (bitconvert (st rc:$src0))), 789 (dt rc:$src0) 790>; 791 792// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 793// can handle COPY instructions. 794class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat < 795 (vt (AMDGPUdwordaddr (vt rc:$addr))), 796 (vt rc:$addr) 797>; 798 799// rotr pattern 800class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat < 801 (rotr i32:$src0, i32:$src1), 802 (BIT_ALIGN $src0, $src0, $src1) 803>; 804 805// Special conversion patterns 806 807def cvt_rpi_i32_f32 : PatFrag < 808 (ops node:$src), 809 (fp_to_sint (ffloor (fadd $src, FP_HALF))), 810 [{ (void) N; return TM.Options.NoNaNsFPMath; }] 811>; 812 813def cvt_flr_i32_f32 : PatFrag < 814 (ops node:$src), 815 (fp_to_sint (ffloor $src)), 816 [{ (void)N; return TM.Options.NoNaNsFPMath; }] 817>; 818 819let AddedComplexity = 2 in { 820class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 821 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), 822 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 823 (Inst $src0, $src1, $src2)) 824>; 825 826class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 827 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), 828 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 829 (Inst $src0, $src1, $src2)) 830>; 831} // AddedComplexity. 832 833class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat < 834 (fdiv FP_ONE, vt:$src), 835 (RcpInst $src) 836>; 837 838// Instructions which select to the same v_min_f* 839def fminnum_like : PatFrags<(ops node:$src0, node:$src1), 840 [(fminnum_ieee node:$src0, node:$src1), 841 (fminnum node:$src0, node:$src1)] 842>; 843 844// Instructions which select to the same v_max_f* 845def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1), 846 [(fmaxnum_ieee node:$src0, node:$src1), 847 (fmaxnum node:$src0, node:$src1)] 848>; 849 850class NeverNaNPats<dag ops, list<dag> frags> : PatFrags<ops, frags> { 851 let PredicateCode = [{ 852 return CurDAG->isKnownNeverNaN(SDValue(N,0)); 853 }]; 854 let GISelPredicateCode = [{ 855 return isKnownNeverNaN(MI.getOperand(0).getReg(), MRI); 856 }]; 857} 858 859def fminnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1), 860 [(fminnum_ieee node:$src0, node:$src1), 861 (fminnum node:$src0, node:$src1)] 862>; 863 864def fmaxnum_like_nnan : NeverNaNPats<(ops node:$src0, node:$src1), 865 [(fmaxnum_ieee node:$src0, node:$src1), 866 (fmaxnum node:$src0, node:$src1)] 867>; 868 869def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 870 [(fminnum_ieee_oneuse node:$src0, node:$src1), 871 (fminnum_oneuse node:$src0, node:$src1)] 872>; 873 874def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 875 [(fmaxnum_ieee_oneuse node:$src0, node:$src1), 876 (fmaxnum_oneuse node:$src0, node:$src1)] 877>; 878 879def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2), 880 [(fmad node:$src0, node:$src1, node:$src2), 881 (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)] 882>; 883 884// FIXME: fsqrt should not select directly 885def any_amdgcn_sqrt : PatFrags<(ops node:$src0), 886 [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)] 887>; 888