1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file contains instruction defs that are common to all hw codegen 10// targets. 11// 12//===----------------------------------------------------------------------===// 13 14class AddressSpacesImpl { 15 int Flat = 0; 16 int Global = 1; 17 int Region = 2; 18 int Local = 3; 19 int Constant = 4; 20 int Private = 5; 21 int Constant32Bit = 6; 22} 23 24def AddrSpaces : AddressSpacesImpl; 25 26 27class AMDGPUInst <dag outs, dag ins, string asm = "", 28 list<dag> pattern = []> : Instruction { 29 field bit isRegisterLoad = 0; 30 field bit isRegisterStore = 0; 31 32 let Namespace = "AMDGPU"; 33 let OutOperandList = outs; 34 let InOperandList = ins; 35 let AsmString = asm; 36 let Pattern = pattern; 37 let Itinerary = NullALU; 38 39 // SoftFail is a field the disassembler can use to provide a way for 40 // instructions to not match without killing the whole decode process. It is 41 // mainly used for ARM, but Tablegen expects this field to exist or it fails 42 // to build the decode table. 43 field bits<64> SoftFail = 0; 44 45 let DecoderNamespace = Namespace; 46 47 let TSFlags{63} = isRegisterLoad; 48 let TSFlags{62} = isRegisterStore; 49} 50 51class AMDGPUShaderInst <dag outs, dag ins, string asm = "", 52 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> { 53 54 field bits<32> Inst = 0xffffffff; 55} 56 57//===---------------------------------------------------------------------===// 58// Return instruction 59//===---------------------------------------------------------------------===// 60 61class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> 62: Instruction { 63 64 let Namespace = "AMDGPU"; 65 dag OutOperandList = outs; 66 dag InOperandList = ins; 67 let Pattern = pattern; 68 let AsmString = !strconcat(asmstr, "\n"); 69 let isPseudo = 1; 70 let Itinerary = NullALU; 71 bit hasIEEEFlag = 0; 72 bit hasZeroOpFlag = 0; 73 let mayLoad = 0; 74 let mayStore = 0; 75 let hasSideEffects = 0; 76 let isCodeGenOnly = 1; 77} 78 79def TruePredicate : Predicate<"">; 80 81// FIXME: Tablegen should specially supports this 82def FalsePredicate : Predicate<"false">; 83 84// Add a predicate to the list if does not already exist to deduplicate it. 85class PredConcat<list<Predicate> lst, Predicate pred> { 86 list<Predicate> ret = 87 !listconcat([pred], !filter(item, lst, !ne(item, pred))); 88} 89 90class PredicateControl { 91 Predicate SubtargetPredicate = TruePredicate; 92 Predicate AssemblerPredicate = TruePredicate; 93 Predicate WaveSizePredicate = TruePredicate; 94 list<Predicate> OtherPredicates = []; 95 list<Predicate> Predicates = PredConcat< 96 PredConcat<PredConcat<OtherPredicates, 97 SubtargetPredicate>.ret, 98 AssemblerPredicate>.ret, 99 WaveSizePredicate>.ret; 100} 101 102class AMDGPUPat<dag pattern, dag result> : Pat<pattern, result>, 103 PredicateControl; 104 105let RecomputePerFunction = 1 in { 106def FP16Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 107def FP32Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">; 108def FP64Denormals : Predicate<"MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 109def NoFP16Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 110def NoFP32Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP32Denormals()">; 111def NoFP64Denormals : Predicate<"!MF->getInfo<SIMachineFunctionInfo>()->getMode().allFP64FP16Denormals()">; 112def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; 113} 114 115def FMA : Predicate<"Subtarget->hasFMA()">; 116 117def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 118 119def u16ImmTarget : AsmOperandClass { 120 let Name = "U16Imm"; 121 let RenderMethod = "addImmOperands"; 122} 123 124def s16ImmTarget : AsmOperandClass { 125 let Name = "S16Imm"; 126 let RenderMethod = "addImmOperands"; 127} 128 129let OperandType = "OPERAND_IMMEDIATE" in { 130 131def u32imm : Operand<i32> { 132 let PrintMethod = "printU32ImmOperand"; 133} 134 135def u16imm : Operand<i16> { 136 let PrintMethod = "printU16ImmOperand"; 137 let ParserMatchClass = u16ImmTarget; 138} 139 140def s16imm : Operand<i16> { 141 let PrintMethod = "printU16ImmOperand"; 142 let ParserMatchClass = s16ImmTarget; 143} 144 145def u8imm : Operand<i8> { 146 let PrintMethod = "printU8ImmOperand"; 147} 148 149} // End OperandType = "OPERAND_IMMEDIATE" 150 151//===--------------------------------------------------------------------===// 152// Custom Operands 153//===--------------------------------------------------------------------===// 154def brtarget : Operand<OtherVT>; 155 156//===----------------------------------------------------------------------===// 157// Misc. PatFrags 158//===----------------------------------------------------------------------===// 159 160class HasOneUseUnaryOp<SDPatternOperator op> : PatFrag< 161 (ops node:$src0), 162 (op $src0), 163 [{ return N->hasOneUse(); }]> { 164 165 let GISelPredicateCode = [{ 166 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 167 }]; 168} 169 170class HasOneUseBinOp<SDPatternOperator op> : PatFrag< 171 (ops node:$src0, node:$src1), 172 (op $src0, $src1), 173 [{ return N->hasOneUse(); }]> { 174 let GISelPredicateCode = [{ 175 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 176 }]; 177} 178 179class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< 180 (ops node:$src0, node:$src1, node:$src2), 181 (op $src0, $src1, $src2), 182 [{ return N->hasOneUse(); }]> { 183 let GISelPredicateCode = [{ 184 return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg()); 185 }]; 186} 187 188class is_canonicalized<SDPatternOperator op> : PatFrag< 189 (ops node:$src0, node:$src1), 190 (op $src0, $src1), 191 [{ 192 const SITargetLowering &Lowering = 193 *static_cast<const SITargetLowering *>(getTargetLowering()); 194 195 return Lowering.isCanonicalized(*CurDAG, N->getOperand(0)) && 196 Lowering.isCanonicalized(*CurDAG, N->getOperand(1)); 197 }]> { 198 199 // TODO: Improve the Legalizer for g_build_vector in Global Isel to match this class 200 let GISelPredicateCode = [{ 201 const SITargetLowering *TLI = static_cast<const SITargetLowering *>( 202 MF.getSubtarget().getTargetLowering()); 203 204 return TLI->isCanonicalized(MI.getOperand(1).getReg(), const_cast<MachineFunction&>(MF)) && 205 TLI->isCanonicalized(MI.getOperand(2).getReg(), const_cast<MachineFunction&>(MF)); 206 }]; 207} 208 209 210let Properties = [SDNPCommutative, SDNPAssociative] in { 211def smax_oneuse : HasOneUseBinOp<smax>; 212def smin_oneuse : HasOneUseBinOp<smin>; 213def umax_oneuse : HasOneUseBinOp<umax>; 214def umin_oneuse : HasOneUseBinOp<umin>; 215 216def fminnum_oneuse : HasOneUseBinOp<fminnum>; 217def fmaxnum_oneuse : HasOneUseBinOp<fmaxnum>; 218 219def fminnum_ieee_oneuse : HasOneUseBinOp<fminnum_ieee>; 220def fmaxnum_ieee_oneuse : HasOneUseBinOp<fmaxnum_ieee>; 221 222 223def and_oneuse : HasOneUseBinOp<and>; 224def or_oneuse : HasOneUseBinOp<or>; 225def xor_oneuse : HasOneUseBinOp<xor>; 226} // Properties = [SDNPCommutative, SDNPAssociative] 227 228def not_oneuse : HasOneUseUnaryOp<not>; 229 230def add_oneuse : HasOneUseBinOp<add>; 231def sub_oneuse : HasOneUseBinOp<sub>; 232 233def srl_oneuse : HasOneUseBinOp<srl>; 234def shl_oneuse : HasOneUseBinOp<shl>; 235 236def select_oneuse : HasOneUseTernaryOp<select>; 237 238def AMDGPUmul_u24_oneuse : HasOneUseBinOp<AMDGPUmul_u24>; 239def AMDGPUmul_i24_oneuse : HasOneUseBinOp<AMDGPUmul_i24>; 240 241//===----------------------------------------------------------------------===// 242// PatFrags for shifts 243//===----------------------------------------------------------------------===// 244 245// Constrained shift PatFrags. 246 247def csh_mask_16 : PatFrag<(ops node:$src0), (and node:$src0, imm), 248 [{ return isUnneededShiftMask(N, 4); }]> { 249 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 4); }]; 250 } 251 252def csh_mask_32 : PatFrag<(ops node:$src0), (and node:$src0, imm), 253 [{ return isUnneededShiftMask(N, 5); }]> { 254 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 5); }]; 255 } 256 257def csh_mask_64 : PatFrag<(ops node:$src0), (and node:$src0, imm), 258 [{ return isUnneededShiftMask(N, 6); }]> { 259 let GISelPredicateCode = [{ return isUnneededShiftMask(MI, 6); }]; 260 } 261 262foreach width = [16, 32, 64] in { 263defvar csh_mask = !cast<SDPatternOperator>("csh_mask_"#width); 264 265def cshl_#width : PatFrags<(ops node:$src0, node:$src1), 266 [(shl node:$src0, node:$src1), (shl node:$src0, (csh_mask node:$src1))]>; 267defvar cshl = !cast<SDPatternOperator>("cshl_"#width); 268def cshl_#width#_oneuse : HasOneUseBinOp<cshl>; 269def clshl_rev_#width : PatFrag <(ops node:$src0, node:$src1), 270 (cshl $src1, $src0)>; 271 272def csrl_#width : PatFrags<(ops node:$src0, node:$src1), 273 [(srl node:$src0, node:$src1), (srl node:$src0, (csh_mask node:$src1))]>; 274defvar csrl = !cast<SDPatternOperator>("csrl_"#width); 275def csrl_#width#_oneuse : HasOneUseBinOp<csrl>; 276def clshr_rev_#width : PatFrag <(ops node:$src0, node:$src1), 277 (csrl $src1, $src0)>; 278 279def csra_#width : PatFrags<(ops node:$src0, node:$src1), 280 [(sra node:$src0, node:$src1), (sra node:$src0, (csh_mask node:$src1))]>; 281defvar csra = !cast<SDPatternOperator>("csra_"#width); 282def csra_#width#_oneuse : HasOneUseBinOp<csra>; 283def cashr_rev_#width : PatFrag <(ops node:$src0, node:$src1), 284 (csra $src1, $src0)>; 285} // end foreach width 286 287def srl_16 : PatFrag< 288 (ops node:$src0), (srl_oneuse node:$src0, (i32 16)) 289>; 290 291 292def hi_i16_elt : PatFrag< 293 (ops node:$src0), (i16 (trunc (i32 (srl_16 node:$src0)))) 294>; 295 296 297def hi_f16_elt : PatLeaf< 298 (vt), [{ 299 if (N->getOpcode() != ISD::BITCAST) 300 return false; 301 SDValue Tmp = N->getOperand(0); 302 303 if (Tmp.getOpcode() != ISD::SRL) 304 return false; 305 if (const auto *RHS = dyn_cast<ConstantSDNode>(Tmp.getOperand(1)) 306 return RHS->getZExtValue() == 16; 307 return false; 308}]>; 309 310//===----------------------------------------------------------------------===// 311// PatLeafs for floating-point comparisons 312//===----------------------------------------------------------------------===// 313 314def COND_OEQ : PatFrags<(ops), [(OtherVT SETOEQ), (OtherVT SETEQ)]>; 315def COND_ONE : PatFrags<(ops), [(OtherVT SETONE), (OtherVT SETNE)]>; 316def COND_OGT : PatFrags<(ops), [(OtherVT SETOGT), (OtherVT SETGT)]>; 317def COND_OGE : PatFrags<(ops), [(OtherVT SETOGE), (OtherVT SETGE)]>; 318def COND_OLT : PatFrags<(ops), [(OtherVT SETOLT), (OtherVT SETLT)]>; 319def COND_OLE : PatFrags<(ops), [(OtherVT SETOLE), (OtherVT SETLE)]>; 320def COND_O : PatFrags<(ops), [(OtherVT SETO)]>; 321def COND_UO : PatFrags<(ops), [(OtherVT SETUO)]>; 322 323//===----------------------------------------------------------------------===// 324// PatLeafs for unsigned / unordered comparisons 325//===----------------------------------------------------------------------===// 326 327def COND_UEQ : PatFrag<(ops), (OtherVT SETUEQ)>; 328def COND_UNE : PatFrag<(ops), (OtherVT SETUNE)>; 329def COND_UGT : PatFrag<(ops), (OtherVT SETUGT)>; 330def COND_UGE : PatFrag<(ops), (OtherVT SETUGE)>; 331def COND_ULT : PatFrag<(ops), (OtherVT SETULT)>; 332def COND_ULE : PatFrag<(ops), (OtherVT SETULE)>; 333 334// XXX - For some reason R600 version is preferring to use unordered 335// for setne? 336def COND_UNE_NE : PatFrags<(ops), [(OtherVT SETUNE), (OtherVT SETNE)]>; 337 338//===----------------------------------------------------------------------===// 339// PatLeafs for signed comparisons 340//===----------------------------------------------------------------------===// 341 342def COND_SGT : PatFrag<(ops), (OtherVT SETGT)>; 343def COND_SGE : PatFrag<(ops), (OtherVT SETGE)>; 344def COND_SLT : PatFrag<(ops), (OtherVT SETLT)>; 345def COND_SLE : PatFrag<(ops), (OtherVT SETLE)>; 346 347//===----------------------------------------------------------------------===// 348// PatLeafs for integer equality 349//===----------------------------------------------------------------------===// 350 351def COND_EQ : PatFrags<(ops), [(OtherVT SETEQ), (OtherVT SETUEQ)]>; 352def COND_NE : PatFrags<(ops), [(OtherVT SETNE), (OtherVT SETUNE)]>; 353 354// FIXME: Should not need code predicate 355//def COND_NULL : PatLeaf<(OtherVT null_frag)>; 356def COND_NULL : PatLeaf < 357 (cond), 358 [{(void)N; return false;}] 359>; 360 361//===----------------------------------------------------------------------===// 362// PatLeafs for Texture Constants 363//===----------------------------------------------------------------------===// 364 365def TEX_ARRAY : PatLeaf< 366 (imm), 367 [{uint32_t TType = (uint32_t)N->getZExtValue(); 368 return TType == 9 || TType == 10 || TType == 16; 369 }] 370>; 371 372def TEX_RECT : PatLeaf< 373 (imm), 374 [{uint32_t TType = (uint32_t)N->getZExtValue(); 375 return TType == 5; 376 }] 377>; 378 379def TEX_SHADOW : PatLeaf< 380 (imm), 381 [{uint32_t TType = (uint32_t)N->getZExtValue(); 382 return (TType >= 6 && TType <= 8) || TType == 13; 383 }] 384>; 385 386def TEX_SHADOW_ARRAY : PatLeaf< 387 (imm), 388 [{uint32_t TType = (uint32_t)N->getZExtValue(); 389 return TType == 11 || TType == 12 || TType == 17; 390 }] 391>; 392 393//===----------------------------------------------------------------------===// 394// Load/Store Pattern Fragments 395//===----------------------------------------------------------------------===// 396 397def atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3, 398 [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand, SDNPInGlue] 399>; 400 401class AddressSpaceList<list<int> AS> { 402 list<int> AddrSpaces = AS; 403} 404 405class Aligned<int Bytes> { 406 int MinAlignment = Bytes; 407} 408 409class StoreHi16<SDPatternOperator op, ValueType vt> : PatFrag < 410 (ops node:$value, node:$ptr), (op (srl node:$value, (i32 16)), node:$ptr)> { 411 let IsStore = 1; 412 let MemoryVT = vt; 413} 414 415def LoadAddress_constant : AddressSpaceList<[ AddrSpaces.Constant, 416 AddrSpaces.Constant32Bit ]>; 417def LoadAddress_global : AddressSpaceList<[ AddrSpaces.Global, 418 AddrSpaces.Constant, 419 AddrSpaces.Constant32Bit ]>; 420def StoreAddress_global : AddressSpaceList<[ AddrSpaces.Global ]>; 421 422def LoadAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, 423 AddrSpaces.Global, 424 AddrSpaces.Constant, 425 AddrSpaces.Constant32Bit ]>; 426def StoreAddress_flat : AddressSpaceList<[ AddrSpaces.Flat, AddrSpaces.Global ]>; 427 428def LoadAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 429def StoreAddress_private : AddressSpaceList<[ AddrSpaces.Private ]>; 430 431def LoadAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 432def StoreAddress_local : AddressSpaceList<[ AddrSpaces.Local ]>; 433 434def LoadAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 435def StoreAddress_region : AddressSpaceList<[ AddrSpaces.Region ]>; 436 437 438 439foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 440let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 441 442def load_#as : PatFrag<(ops node:$ptr), (unindexedload node:$ptr)> { 443 let IsLoad = 1; 444 let IsNonExtLoad = 1; 445} 446 447def extloadi8_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> { 448 let IsLoad = 1; 449 let MemoryVT = i8; 450} 451 452def extloadi16_#as : PatFrag<(ops node:$ptr), (extload node:$ptr)> { 453 let IsLoad = 1; 454 let MemoryVT = i16; 455} 456 457def sextloadi8_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { 458 let IsLoad = 1; 459 let MemoryVT = i8; 460} 461 462def sextloadi16_#as : PatFrag<(ops node:$ptr), (sextload node:$ptr)> { 463 let IsLoad = 1; 464 let MemoryVT = i16; 465} 466 467def zextloadi8_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { 468 let IsLoad = 1; 469 let MemoryVT = i8; 470} 471 472def zextloadi16_#as : PatFrag<(ops node:$ptr), (zextload node:$ptr)> { 473 let IsLoad = 1; 474 let MemoryVT = i16; 475} 476 477def atomic_load_8_#as : PatFrag<(ops node:$ptr), (atomic_load_8 node:$ptr)> { 478 let IsAtomic = 1; 479 let MemoryVT = i8; 480} 481 482def atomic_load_16_#as : PatFrag<(ops node:$ptr), (atomic_load_16 node:$ptr)> { 483 let IsAtomic = 1; 484 let MemoryVT = i16; 485} 486 487def atomic_load_32_#as : PatFrag<(ops node:$ptr), (atomic_load_32 node:$ptr)> { 488 let IsAtomic = 1; 489 let MemoryVT = i32; 490} 491 492def atomic_load_64_#as : PatFrag<(ops node:$ptr), (atomic_load_64 node:$ptr)> { 493 let IsAtomic = 1; 494 let MemoryVT = i64; 495} 496} // End let AddressSpaces 497} // End foreach as 498 499 500foreach as = [ "global", "flat", "local", "private", "region" ] in { 501let AddressSpaces = !cast<AddressSpaceList>("StoreAddress_"#as).AddrSpaces in { 502def store_#as : PatFrag<(ops node:$val, node:$ptr), 503 (unindexedstore node:$val, node:$ptr)> { 504 let IsStore = 1; 505 let IsTruncStore = 0; 506} 507 508// truncstore fragments. 509def truncstore_#as : PatFrag<(ops node:$val, node:$ptr), 510 (unindexedstore node:$val, node:$ptr)> { 511 let IsStore = 1; 512 let IsTruncStore = 1; 513} 514 515// TODO: We don't really need the truncstore here. We can use 516// unindexedstore with MemoryVT directly, which will save an 517// unnecessary check that the memory size is less than the value type 518// in the generated matcher table. 519def truncstorei8_#as : PatFrag<(ops node:$val, node:$ptr), 520 (truncstore node:$val, node:$ptr)> { 521 let IsStore = 1; 522 let MemoryVT = i8; 523} 524 525def truncstorei16_#as : PatFrag<(ops node:$val, node:$ptr), 526 (truncstore node:$val, node:$ptr)> { 527 let IsStore = 1; 528 let MemoryVT = i16; 529} 530 531def store_hi16_#as : StoreHi16 <truncstorei16, i16>; 532def truncstorei8_hi16_#as : StoreHi16<truncstorei8, i8>; 533def truncstorei16_hi16_#as : StoreHi16<truncstorei16, i16>; 534 535defm atomic_store_#as : binary_atomic_op<atomic_store>; 536 537} // End let AddressSpaces 538} // End foreach as 539 540 541multiclass ret_noret_binary_atomic_op<SDNode atomic_op, bit IsInt = 1> { 542 foreach as = [ "global", "flat", "constant", "local", "private", "region" ] in { 543 let AddressSpaces = !cast<AddressSpaceList>("LoadAddress_"#as).AddrSpaces in { 544 defm "_"#as : binary_atomic_op<atomic_op, IsInt>; 545 546 let PredicateCode = [{return (SDValue(N, 0).use_empty());}] in { 547 defm "_"#as#"_noret" : binary_atomic_op<atomic_op, IsInt>; 548 } 549 550 let PredicateCode = [{return !(SDValue(N, 0).use_empty());}] in { 551 defm "_"#as#"_ret" : binary_atomic_op<atomic_op, IsInt>; 552 } 553 } 554 } 555} 556 557defm atomic_swap : ret_noret_binary_atomic_op<atomic_swap>; 558defm atomic_load_add : ret_noret_binary_atomic_op<atomic_load_add>; 559defm atomic_load_and : ret_noret_binary_atomic_op<atomic_load_and>; 560defm atomic_load_max : ret_noret_binary_atomic_op<atomic_load_max>; 561defm atomic_load_min : ret_noret_binary_atomic_op<atomic_load_min>; 562defm atomic_load_or : ret_noret_binary_atomic_op<atomic_load_or>; 563defm atomic_load_sub : ret_noret_binary_atomic_op<atomic_load_sub>; 564defm atomic_load_umax : ret_noret_binary_atomic_op<atomic_load_umax>; 565defm atomic_load_umin : ret_noret_binary_atomic_op<atomic_load_umin>; 566defm atomic_load_xor : ret_noret_binary_atomic_op<atomic_load_xor>; 567defm atomic_load_fadd : ret_noret_binary_atomic_op<atomic_load_fadd, 0>; 568let MemoryVT = v2f16 in 569defm atomic_load_fadd_v2f16 : ret_noret_binary_atomic_op<atomic_load_fadd, 0>; 570defm AMDGPUatomic_cmp_swap : ret_noret_binary_atomic_op<AMDGPUatomic_cmp_swap>; 571 572def load_align8_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 573 Aligned<8> { 574 let IsLoad = 1; 575 let IsNonExtLoad = 1; 576} 577 578def load_align16_local : PatFrag<(ops node:$ptr), (load_local node:$ptr)>, 579 Aligned<16> { 580 let IsLoad = 1; 581 let IsNonExtLoad = 1; 582} 583 584def store_align8_local: PatFrag<(ops node:$val, node:$ptr), 585 (store_local node:$val, node:$ptr)>, Aligned<8> { 586 let IsStore = 1; 587 let IsTruncStore = 0; 588} 589 590def store_align16_local: PatFrag<(ops node:$val, node:$ptr), 591 (store_local node:$val, node:$ptr)>, Aligned<16> { 592 let IsStore = 1; 593 let IsTruncStore = 0; 594} 595 596let AddressSpaces = StoreAddress_local.AddrSpaces in { 597defm atomic_cmp_swap_local : ternary_atomic_op<atomic_cmp_swap>; 598defm atomic_cmp_swap_local_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 599} 600 601let AddressSpaces = StoreAddress_region.AddrSpaces in { 602defm atomic_cmp_swap_region : ternary_atomic_op<atomic_cmp_swap>; 603defm atomic_cmp_swap_region_m0 : ternary_atomic_op<atomic_cmp_swap_glue>; 604} 605 606//===----------------------------------------------------------------------===// 607// Misc Pattern Fragments 608//===----------------------------------------------------------------------===// 609 610class Constants { 611int TWO_PI = 0x40c90fdb; 612int PI = 0x40490fdb; 613int TWO_PI_INV = 0x3e22f983; 614int FP_4294966784 = 0x4f7ffffe; // 4294966784 = 4294967296 - 512 = 2^32 - 2^9 615int FP16_ONE = 0x3C00; 616int FP16_NEG_ONE = 0xBC00; 617int FP32_ONE = 0x3f800000; 618int FP32_NEG_ONE = 0xbf800000; 619int FP64_ONE = 0x3ff0000000000000; 620int FP64_NEG_ONE = 0xbff0000000000000; 621} 622def CONST : Constants; 623 624def FP_ZERO : PatLeaf < 625 (fpimm), 626 [{return N->getValueAPF().isZero();}] 627>; 628 629def FP_ONE : PatLeaf < 630 (fpimm), 631 [{return N->isExactlyValue(1.0);}] 632>; 633 634def FP_HALF : PatLeaf < 635 (fpimm), 636 [{return N->isExactlyValue(0.5);}] 637>; 638 639/* Generic helper patterns for intrinsics */ 640/* -------------------------------------- */ 641 642class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 643 : AMDGPUPat < 644 (fpow f32:$src0, f32:$src1), 645 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 646>; 647 648/* Other helper patterns */ 649/* --------------------- */ 650 651/* Extract element pattern */ 652class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 653 SubRegIndex sub_reg> 654 : AMDGPUPat< 655 (sub_type (extractelt vec_type:$src, sub_idx)), 656 (EXTRACT_SUBREG $src, sub_reg) 657>; 658 659/* Insert element pattern */ 660class Insert_Element <ValueType elem_type, ValueType vec_type, 661 int sub_idx, SubRegIndex sub_reg> 662 : AMDGPUPat < 663 (insertelt vec_type:$vec, elem_type:$elem, sub_idx), 664 (INSERT_SUBREG $vec, $elem, sub_reg) 665>; 666 667// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 668// can handle COPY instructions. 669// bitconvert pattern 670class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : AMDGPUPat < 671 (dt (bitconvert (st rc:$src0))), 672 (dt rc:$src0) 673>; 674 675// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 676// can handle COPY instructions. 677class DwordAddrPat<ValueType vt, RegisterClass rc> : AMDGPUPat < 678 (vt (AMDGPUdwordaddr (vt rc:$addr))), 679 (vt rc:$addr) 680>; 681 682// rotr pattern 683class ROTRPattern <Instruction BIT_ALIGN> : AMDGPUPat < 684 (rotr i32:$src0, i32:$src1), 685 (BIT_ALIGN $src0, $src0, $src1) 686>; 687 688// Special conversion patterns 689 690def cvt_rpi_i32_f32 : PatFrag < 691 (ops node:$src), 692 (fp_to_sint (ffloor (fadd $src, FP_HALF))), 693 [{ (void) N; return TM.Options.NoNaNsFPMath; }] 694>; 695 696def cvt_flr_i32_f32 : PatFrag < 697 (ops node:$src), 698 (fp_to_sint (ffloor $src)), 699 [{ (void)N; return TM.Options.NoNaNsFPMath; }] 700>; 701 702let AddedComplexity = 2 in { 703class IMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 704 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), 705 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 706 (Inst $src0, $src1, $src2)) 707>; 708 709class UMad24Pat<Instruction Inst, bit HasClamp = 0> : AMDGPUPat < 710 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), 711 !if(HasClamp, (Inst $src0, $src1, $src2, (i1 0)), 712 (Inst $src0, $src1, $src2)) 713>; 714} // AddedComplexity. 715 716class RcpPat<Instruction RcpInst, ValueType vt> : AMDGPUPat < 717 (fdiv FP_ONE, vt:$src), 718 (RcpInst $src) 719>; 720 721// Instructions which select to the same v_min_f* 722def fminnum_like : PatFrags<(ops node:$src0, node:$src1), 723 [(fminnum_ieee node:$src0, node:$src1), 724 (fminnum node:$src0, node:$src1)] 725>; 726 727// Instructions which select to the same v_max_f* 728def fmaxnum_like : PatFrags<(ops node:$src0, node:$src1), 729 [(fmaxnum_ieee node:$src0, node:$src1), 730 (fmaxnum node:$src0, node:$src1)] 731>; 732 733def fminnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 734 [(fminnum_ieee_oneuse node:$src0, node:$src1), 735 (fminnum_oneuse node:$src0, node:$src1)] 736>; 737 738def fmaxnum_like_oneuse : PatFrags<(ops node:$src0, node:$src1), 739 [(fmaxnum_ieee_oneuse node:$src0, node:$src1), 740 (fmaxnum_oneuse node:$src0, node:$src1)] 741>; 742 743def any_fmad : PatFrags<(ops node:$src0, node:$src1, node:$src2), 744 [(fmad node:$src0, node:$src1, node:$src2), 745 (AMDGPUfmad_ftz node:$src0, node:$src1, node:$src2)] 746>; 747 748// FIXME: fsqrt should not select directly 749def any_amdgcn_sqrt : PatFrags<(ops node:$src0), 750 [(fsqrt node:$src0), (int_amdgcn_sqrt node:$src0)] 751>; 752