1//===---- SMInstructions.td - Scalar Memory Instruction Definitions -------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def smrd_offset_8 : ImmOperand<i32, "SMRDOffset8", 1>; 10 11let EncoderMethod = "getSMEMOffsetEncoding", 12 DecoderMethod = "decodeSMEMOffset" in { 13def SMEMOffset : ImmOperand<i32, "SMEMOffset", 1>; 14def SMEMOffsetMod : NamedIntOperand<i32, "offset", 0>; 15def OptSMEMOffsetMod : NamedIntOperand<i32, "offset"> { 16 let ImmTy = SMEMOffsetMod.ImmTy; 17 let PredicateMethod = SMEMOffsetMod.PredicateMethod; 18 let PrintMethod = SMEMOffsetMod.PrintMethod; 19} 20} 21 22//===----------------------------------------------------------------------===// 23// Scalar Memory classes 24//===----------------------------------------------------------------------===// 25 26class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> : 27 InstSI <outs, ins, "", pattern>, 28 SIMCInstr<NAME, SIEncodingFamily.NONE> { 29 let isPseudo = 1; 30 let isCodeGenOnly = 1; 31 32 let LGKM_CNT = 1; 33 let SMRD = 1; 34 let mayStore = 0; 35 let mayLoad = 1; 36 let hasSideEffects = 0; 37 let maybeAtomic = 0; 38 let UseNamedOperandTable = 1; 39 let SchedRW = [WriteSMEM]; 40 41 string Mnemonic = opName; 42 string AsmOperands = asmOps; 43 44 bits<1> has_sbase = 1; 45 bits<1> has_sdst = 1; 46 bit has_glc = 0; 47 bit has_dlc = 0; 48 bit has_offset = 0; 49 bit has_soffset = 0; 50 bit is_buffer = 0; 51} 52 53class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic> 54 : InstSI<ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands> { 55 56 let isPseudo = 0; 57 let isCodeGenOnly = 0; 58 59 Instruction Opcode = !cast<Instruction>(NAME); 60 61 // copy relevant pseudo op flags 62 let LGKM_CNT = ps.LGKM_CNT; 63 let SMRD = ps.SMRD; 64 let mayStore = ps.mayStore; 65 let mayLoad = ps.mayLoad; 66 let hasSideEffects = ps.hasSideEffects; 67 let UseNamedOperandTable = ps.UseNamedOperandTable; 68 let SchedRW = ps.SchedRW; 69 let SubtargetPredicate = ps.SubtargetPredicate; 70 let OtherPredicates = ps.OtherPredicates; 71 let AsmMatchConverter = ps.AsmMatchConverter; 72 let IsAtomicRet = ps.IsAtomicRet; 73 let IsAtomicNoRet = ps.IsAtomicNoRet; 74 let Uses = ps.Uses; 75 let Defs = ps.Defs; 76 77 let TSFlags = ps.TSFlags; 78 79 bit is_buffer = ps.is_buffer; 80 81 // encoding 82 bits<7> sbase; 83 bits<7> sdst; 84 bits<32> offset; 85 bits<8> soffset; 86 bits<5> cpol; 87} 88 89class OffsetMode<bit hasOffset, bit hasSOffset, string variant, 90 dag ins, string asm> { 91 bit HasOffset = hasOffset; 92 bit HasSOffset = hasSOffset; 93 string Variant = variant; 94 dag Ins = ins; 95 string Asm = asm; 96} 97 98def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins SMEMOffset:$offset), "$offset">; 99def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">; 100def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM", 101 (ins SReg_32:$soffset, SMEMOffsetMod:$offset), 102 "$soffset$offset">; 103def SGPR_IMM_OptOffset : OffsetMode<1, 1, "_SGPR_IMM", 104 (ins SReg_32:$soffset, OptSMEMOffsetMod:$offset), 105 "$soffset$offset">; 106 107class SM_Probe_Pseudo <string opName, RegisterClass baseClass, OffsetMode offsets> 108 : SM_Pseudo<opName, (outs), 109 !con((ins i8imm:$sdata, baseClass:$sbase), offsets.Ins), 110 " $sdata, $sbase, " # offsets.Asm> { 111 let mayLoad = 0; 112 let mayStore = 0; 113 let has_glc = 0; 114 let LGKM_CNT = 0; 115 let ScalarStore = 0; 116 let hasSideEffects = 1; 117 let has_offset = offsets.HasOffset; 118 let has_soffset = offsets.HasSOffset; 119} 120 121class SM_Load_Pseudo <string opName, RegisterClass baseClass, 122 RegisterClass dstClass, OffsetMode offsets> 123 : SM_Pseudo<opName, (outs dstClass:$sdst), 124 !con((ins baseClass:$sbase), offsets.Ins, (ins CPol:$cpol)), 125 " $sdst, $sbase, " # offsets.Asm # "$cpol", []> { 126 RegisterClass BaseClass = baseClass; 127 let mayLoad = 1; 128 let isReMaterializable = 1; 129 let mayStore = 0; 130 let has_glc = 1; 131 let has_dlc = 1; 132 let has_offset = offsets.HasOffset; 133 let has_soffset = offsets.HasSOffset; 134} 135 136class SM_Store_Pseudo <string opName, RegisterClass baseClass, 137 RegisterClass srcClass, OffsetMode offsets> 138 : SM_Pseudo<opName, (outs), !con((ins srcClass:$sdata, baseClass:$sbase), 139 offsets.Ins, (ins CPol:$cpol)), 140 " $sdata, $sbase, " # offsets.Asm # "$cpol"> { 141 RegisterClass BaseClass = baseClass; 142 let mayLoad = 0; 143 let mayStore = 1; 144 let has_glc = 1; 145 let has_dlc = 1; 146 let has_offset = offsets.HasOffset; 147 let has_soffset = offsets.HasSOffset; 148 let ScalarStore = 1; 149} 150 151class SM_Discard_Pseudo <string opName, OffsetMode offsets> 152 : SM_Pseudo<opName, (outs), !con((ins SReg_64:$sbase), offsets.Ins), 153 " $sbase, " # offsets.Asm> { 154 let mayLoad = 0; 155 let mayStore = 0; 156 let has_glc = 0; 157 let has_sdst = 0; 158 let ScalarStore = 0; 159 let hasSideEffects = 1; 160 let has_offset = offsets.HasOffset; 161 let has_soffset = offsets.HasSOffset; 162} 163 164multiclass SM_Load_Pseudos<string op, RegisterClass baseClass, 165 RegisterClass dstClass, OffsetMode offsets> { 166 defvar opName = !tolower(op); 167 def "" : SM_Load_Pseudo <opName, baseClass, dstClass, offsets>; 168 169 // The constrained multi-dword load equivalents with early clobber flag at 170 // the dst operands. They are needed only for codegen and there is no need 171 // for their real opcodes. 172 if !gt(dstClass.RegTypes[0].Size, 32) then 173 let Constraints = "@earlyclobber $sdst", 174 PseudoInstr = op # offsets.Variant in 175 def "" # _ec : SM_Load_Pseudo <opName, baseClass, dstClass, offsets>; 176} 177 178multiclass SM_Pseudo_Loads<RegisterClass baseClass, 179 RegisterClass dstClass> { 180 defm _IMM : SM_Load_Pseudos <NAME, baseClass, dstClass, IMM_Offset>; 181 defm _SGPR : SM_Load_Pseudos <NAME, baseClass, dstClass, SGPR_Offset>; 182 defm _SGPR_IMM : SM_Load_Pseudos <NAME, baseClass, dstClass, SGPR_IMM_Offset>; 183} 184 185multiclass SM_Pseudo_Stores<RegisterClass baseClass, 186 RegisterClass srcClass> { 187 defvar opName = !tolower(NAME); 188 def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass, IMM_Offset>; 189 def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_Offset>; 190 def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_IMM_Offset>; 191} 192 193multiclass SM_Pseudo_Discards { 194 defvar opName = !tolower(NAME); 195 def _IMM : SM_Discard_Pseudo <opName, IMM_Offset>; 196 def _SGPR : SM_Discard_Pseudo <opName, SGPR_Offset>; 197 def _SGPR_IMM : SM_Discard_Pseudo <opName, SGPR_IMM_Offset>; 198} 199 200class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo< 201 opName, (outs SReg_64_XEXEC:$sdst), (ins), 202 " $sdst", [(set i64:$sdst, (node))]> { 203 let hasSideEffects = 1; 204 205 let mayStore = 0; 206 let mayLoad = 0; 207 let has_sbase = 0; 208} 209 210class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo< 211 opName, (outs), (ins), "", [(node)]> { 212 let hasSideEffects = 1; 213 let mayLoad = 0; 214 let mayStore = 0; 215 let has_sdst = 0; 216 let has_sbase = 0; 217} 218 219multiclass SM_Pseudo_Probe<RegisterClass baseClass> { 220 defvar opName = !tolower(NAME); 221 def _IMM : SM_Probe_Pseudo <opName, baseClass, IMM_Offset>; 222 def _SGPR : SM_Probe_Pseudo <opName, baseClass, SGPR_Offset>; 223 def _SGPR_IMM : SM_Probe_Pseudo <opName, baseClass, SGPR_IMM_Offset>; 224 def _SGPR_OPT_IMM : SM_Probe_Pseudo <opName, baseClass, SGPR_IMM_OptOffset>; 225} 226 227class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo< 228 opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins), 229 " $sdst", [(set i32:$sdst, (node))]> { 230 let hasSideEffects = 1; 231 let mayStore = 0; 232 let mayLoad = 0; 233 let has_sbase = 0; 234} 235 236class SM_Prefetch_Pseudo <string opName, RegisterClass baseClass, bit hasSBase> 237 : SM_Pseudo<opName, (outs), !con(!if(hasSBase, (ins baseClass:$sbase), (ins)), 238 (ins SMEMOffset:$offset, SReg_32:$soffset, i8imm:$sdata)), 239 !if(hasSBase, " $sbase,", "") # " $offset, $soffset, $sdata"> { 240 // Mark prefetches as both load and store to prevent reordering with loads 241 // and stores. This is also needed for pattern to match prefetch intrinsic. 242 let mayLoad = 1; 243 let mayStore = 1; 244 let has_glc = 0; 245 let LGKM_CNT = 0; 246 let has_sbase = hasSBase; 247 let ScalarStore = 0; 248 let has_offset = 1; 249 let has_soffset = 1; 250} 251 252//===----------------------------------------------------------------------===// 253// Scalar Atomic Memory Classes 254//===----------------------------------------------------------------------===// 255 256class SM_Atomic_Pseudo <string opName, 257 dag outs, dag ins, string asmOps, bit isRet> 258 : SM_Pseudo<opName, outs, ins, asmOps, []> { 259 260 bit glc = isRet; 261 262 let mayLoad = 1; 263 let mayStore = 1; 264 let has_glc = 1; 265 let has_dlc = 1; 266 let has_soffset = 1; 267 268 // Should these be set? 269 let ScalarStore = 1; 270 let hasSideEffects = 1; 271 let maybeAtomic = 1; 272 273 let IsAtomicNoRet = !not(isRet); 274 let IsAtomicRet = isRet; 275} 276 277class SM_Pseudo_Atomic<string opName, 278 RegisterClass baseClass, 279 RegisterClass dataClass, 280 OffsetMode offsets, 281 bit isRet, 282 Operand CPolTy = !if(isRet, CPol_GLC, CPol_NonGLC)> : 283 SM_Atomic_Pseudo<opName, 284 !if(isRet, (outs dataClass:$sdst), (outs)), 285 !con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins, 286 (ins CPolTy:$cpol)), 287 !if(isRet, " $sdst", " $sdata") # 288 ", $sbase, " # offsets.Asm # "$cpol", 289 isRet> { 290 let has_offset = offsets.HasOffset; 291 let has_soffset = offsets.HasSOffset; 292 293 let Constraints = !if(isRet, "$sdst = $sdata", ""); 294 let DisableEncoding = !if(isRet, "$sdata", ""); 295} 296 297multiclass SM_Pseudo_Atomics<RegisterClass baseClass, 298 RegisterClass dataClass> { 299 defvar opName = !tolower(NAME); 300 def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>; 301 def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>; 302 def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>; 303 def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>; 304 def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>; 305 def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>; 306} 307 308//===----------------------------------------------------------------------===// 309// Scalar Memory Instructions 310//===----------------------------------------------------------------------===// 311 312// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit 313// SMRD instructions, because the SReg_32_XM0 register class does not include M0 314// and writing to M0 from an SMRD instruction will hang the GPU. 315 316// XXX - SMEM instructions do not allow exec for data operand, but 317// does sdst for SMRD on SI/CI? 318defm S_LOAD_DWORD : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 319defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>; 320let SubtargetPredicate = HasScalarDwordx3Loads in 321 defm S_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_64, SReg_96>; 322defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>; 323defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_64, SReg_256>; 324defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>; 325defm S_LOAD_I8 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 326defm S_LOAD_U8 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 327defm S_LOAD_I16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 328defm S_LOAD_U16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 329 330let is_buffer = 1 in { 331defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; 332// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on 333// SI/CI, bit disallowed for SMEM on VI. 334defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>; 335let SubtargetPredicate = HasScalarDwordx3Loads in 336 defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128, SReg_96>; 337defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>; 338defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>; 339defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>; 340defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; 341defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; 342defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; 343defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; 344} 345 346let SubtargetPredicate = HasScalarStores in { 347defm S_STORE_DWORD : SM_Pseudo_Stores <SReg_64, SReg_32_XM0_XEXEC>; 348defm S_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>; 349defm S_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>; 350 351let is_buffer = 1 in { 352defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <SReg_128, SReg_32_XM0_XEXEC>; 353defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_128, SReg_64_XEXEC>; 354defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_128, SReg_128>; 355} 356} // End SubtargetPredicate = HasScalarStores 357 358let SubtargetPredicate = HasSMemTimeInst in 359def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; 360def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>; 361 362let SubtargetPredicate = isGFX7GFX8GFX9 in { 363def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; 364} // let SubtargetPredicate = isGFX7GFX8GFX9 365 366let SubtargetPredicate = isGFX8Plus in { 367let OtherPredicates = [HasScalarStores] in { 368def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>; 369def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; 370} // End OtherPredicates = [HasScalarStores] 371 372defm S_ATC_PROBE : SM_Pseudo_Probe <SReg_64>; 373let is_buffer = 1 in { 374defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <SReg_128>; 375} 376} // SubtargetPredicate = isGFX8Plus 377 378let SubtargetPredicate = HasSMemRealTime in 379def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>; 380 381let SubtargetPredicate = isGFX10Plus in 382def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">; 383let SubtargetPredicate = HasGetWaveIdInst in 384def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>; 385 386 387let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in { 388defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 389defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>; 390defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>; 391 392defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <SReg_64, SReg_32_XM0_XEXEC>; 393defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>; 394defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>; 395} // SubtargetPredicate = HasScalarFlatScratchInsts 396 397let SubtargetPredicate = HasScalarAtomics in { 398 399let is_buffer = 1 in { 400defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 401defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 402defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 403defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 404defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 405defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 406defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 407defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 408defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 409defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 410defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 411defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 412defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 413 414defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 415defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <SReg_128, SReg_128>; 416defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 417defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 418defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 419defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 420defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 421defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 422defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 423defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 424defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 425defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 426defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 427} 428 429defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 430defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 431defm S_ATOMIC_ADD : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 432defm S_ATOMIC_SUB : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 433defm S_ATOMIC_SMIN : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 434defm S_ATOMIC_UMIN : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 435defm S_ATOMIC_SMAX : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 436defm S_ATOMIC_UMAX : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 437defm S_ATOMIC_AND : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 438defm S_ATOMIC_OR : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 439defm S_ATOMIC_XOR : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 440defm S_ATOMIC_INC : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 441defm S_ATOMIC_DEC : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 442 443defm S_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 444defm S_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <SReg_64, SReg_128>; 445defm S_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 446defm S_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 447defm S_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 448defm S_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 449defm S_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 450defm S_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 451defm S_ATOMIC_AND_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 452defm S_ATOMIC_OR_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 453defm S_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 454defm S_ATOMIC_INC_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 455defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 456 457} // let SubtargetPredicate = HasScalarAtomics 458 459let SubtargetPredicate = HasScalarAtomics in { 460defm S_DCACHE_DISCARD : SM_Pseudo_Discards; 461defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards; 462} 463 464let SubtargetPredicate = isGFX12Plus in { 465def S_PREFETCH_INST : SM_Prefetch_Pseudo <"s_prefetch_inst", SReg_64, 1>; 466def S_PREFETCH_INST_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_inst_pc_rel", SReg_64, 0>; 467def S_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_prefetch_data", SReg_64, 1>; 468def S_PREFETCH_DATA_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_data_pc_rel", SReg_64, 0>; 469def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128, 1> { 470 let is_buffer = 1; 471} 472} // end let SubtargetPredicate = isGFX12Plus 473 474//===----------------------------------------------------------------------===// 475// Targets 476//===----------------------------------------------------------------------===// 477 478//===----------------------------------------------------------------------===// 479// SI 480//===----------------------------------------------------------------------===// 481 482class SMRD_Real_si <bits<5> op, SM_Pseudo ps> 483 : SM_Real<ps> 484 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> 485 , Enc32 { 486 487 let AssemblerPredicate = isGFX6GFX7; 488 let DecoderNamespace = "GFX6GFX7"; 489 490 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); 491 let Inst{8} = ps.has_offset; 492 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); 493 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); 494 let Inst{26-22} = op; 495 let Inst{31-27} = 0x18; //encoding 496} 497 498multiclass SM_Real_Loads_si<bits<5> op> { 499 defvar ps = NAME; 500 defvar immPs = !cast<SM_Load_Pseudo>(ps#_IMM); 501 def _IMM_si : SMRD_Real_si <op, immPs> { 502 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol); 503 } 504 505 defvar sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR); 506 def _SGPR_si : SMRD_Real_si <op, sgprPs>; 507} 508 509defm S_LOAD_DWORD : SM_Real_Loads_si <0x00>; 510defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01>; 511defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02>; 512defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03>; 513defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04>; 514defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08>; 515defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09>; 516defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a>; 517defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b>; 518defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c>; 519 520def S_MEMTIME_si : SMRD_Real_si <0x1e, S_MEMTIME>; 521def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>; 522 523 524//===----------------------------------------------------------------------===// 525// VI and GFX9. 526//===----------------------------------------------------------------------===// 527 528class SMEM_Real_vi <bits<8> op, SM_Pseudo ps> 529 : SM_Real<ps> 530 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> 531 , Enc64 { 532 field bit IsGFX9SpecificEncoding = false; 533 let AssemblerPredicate = !if(IsGFX9SpecificEncoding, isGFX9Only, isGFX8GFX9); 534 let DecoderNamespace = "GFX8"; 535 536 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); 537 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); 538 539 // Note that for GFX9 instructions with immediate offsets, soffset_en 540 // must be defined, whereas in GFX8 it's undefined in all cases, 541 // meaning GFX9 is not perfectly backward-compatible with GFX8, despite 542 // documentation suggesting otherwise. 543 field bit SOffsetEn = !if(IsGFX9SpecificEncoding, 544 !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)), 545 ?); 546 let Inst{14} = SOffsetEn; 547 548 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); 549 550 // imm 551 // TODO: Shall not be defined if the instruction has no offset nor 552 // soffset. 553 let Inst{17} = ps.has_offset; 554 555 let Inst{25-18} = op; 556 let Inst{31-26} = 0x30; //encoding 557 558 // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed. 559 // Offset value is corrected accordingly when offset is encoded/decoded. 560 // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics. 561 field bits<21> Offset; 562 let Offset{6-0} = !if(ps.has_offset, offset{6-0}, 563 !if(ps.has_soffset, soffset{6-0}, ?)); 564 let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?); 565 let Inst{52-32} = Offset; 566 567 // soffset 568 let Inst{63-57} = !if(!and(IsGFX9SpecificEncoding, ps.has_soffset), 569 soffset{6-0}, ?); 570} 571 572class SMEM_Real_Load_vi<bits<8> op, string ps> 573 : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps)>; 574 575// The alternative GFX9 SGPR encoding using soffset to encode the 576// offset register. Not available in assembler and goes to the GFX9 577// encoding family to avoid conflicts with the primary SGPR variant. 578class SMEM_Real_SGPR_alt_gfx9 { 579 bit IsGFX9SpecificEncoding = true; 580 bit SOffsetEn = 1; 581 bit Offset = ?; 582 int Subtarget = SIEncodingFamily.GFX9; 583 string AsmVariantName = "NonParsable"; 584} 585 586multiclass SM_Real_Loads_vi<bits<8> op> { 587 defvar ps = NAME; 588 def _IMM_vi : SMEM_Real_Load_vi <op, ps#"_IMM">; 589 def _SGPR_vi : SMEM_Real_Load_vi <op, ps#"_SGPR">; 590 def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR">, 591 SMEM_Real_SGPR_alt_gfx9; 592 let IsGFX9SpecificEncoding = true in 593 def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR_IMM">; 594} 595 596class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> { 597 // encoding 598 bits<7> sdata; 599 600 let sdst = ?; 601 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); 602} 603 604class SMEM_Real_Store_vi <bits<8> op, string ps> 605 : SMEM_Real_Store_Base_vi <op, !cast<SM_Pseudo>(ps)>; 606 607multiclass SM_Real_Stores_vi<bits<8> op> { 608 defvar ps = NAME; 609 def _IMM_vi : SMEM_Real_Store_vi <op, ps#"_IMM">; 610 def _SGPR_vi : SMEM_Real_Store_vi <op, ps#"_SGPR">; 611 def _SGPR_alt_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR">, 612 SMEM_Real_SGPR_alt_gfx9; 613 let IsGFX9SpecificEncoding = true in 614 def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR_IMM">; 615} 616 617multiclass SM_Real_Probe_vi<bits<8> op> { 618 defvar ps = NAME; 619 def _IMM_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>; 620 def _SGPR_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>; 621 def _SGPR_alt_gfx9 622 : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>, 623 SMEM_Real_SGPR_alt_gfx9; 624 let IsGFX9SpecificEncoding = true in 625 def _SGPR_IMM_gfx9 626 : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>; 627} 628 629defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00>; 630defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01>; 631defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02>; 632defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03>; 633defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04>; 634defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08>; 635defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09>; 636defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a>; 637defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b>; 638defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c>; 639 640defm S_STORE_DWORD : SM_Real_Stores_vi <0x10>; 641defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11>; 642defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12>; 643 644defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18>; 645defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19>; 646defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a>; 647 648// These instructions use same encoding 649def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>; 650def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>; 651def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>; 652def S_DCACHE_WB_VOL_vi : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>; 653def S_MEMTIME_vi : SMEM_Real_vi <0x24, S_MEMTIME>; 654def S_MEMREALTIME_vi : SMEM_Real_vi <0x25, S_MEMREALTIME>; 655 656defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_vi <0x05>; 657defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_vi <0x06>; 658defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_vi <0x07>; 659 660defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_vi <0x15>; 661defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16>; 662defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17>; 663 664defm S_ATC_PROBE : SM_Real_Probe_vi <0x26>; 665defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27>; 666 667//===----------------------------------------------------------------------===// 668// GFX9 669//===----------------------------------------------------------------------===// 670 671class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps> 672 : SMEM_Real_vi <op, ps> { 673 674 bits<7> sdata; 675 676 let Constraints = ps.Constraints; 677 let DisableEncoding = ps.DisableEncoding; 678 679 let cpol{CPolBit.GLC} = ps.glc; 680 let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0}); 681} 682 683multiclass SM_Real_Atomics_vi<bits<8> op> { 684 defvar ps = NAME; 685 def _IMM_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>; 686 def _SGPR_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>; 687 def _SGPR_alt_gfx9 688 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>, 689 SMEM_Real_SGPR_alt_gfx9; 690 let IsGFX9SpecificEncoding = true in 691 def _SGPR_IMM_gfx9 692 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>; 693 def _IMM_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>; 694 def _SGPR_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>; 695 def _SGPR_RTN_alt_gfx9 696 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>, 697 SMEM_Real_SGPR_alt_gfx9; 698 let IsGFX9SpecificEncoding = true in 699 def _SGPR_IMM_RTN_gfx9 700 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>; 701} 702 703defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40>; 704defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x41>; 705defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_vi <0x42>; 706defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_vi <0x43>; 707defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_vi <0x44>; 708defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_vi <0x45>; 709defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_vi <0x46>; 710defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_vi <0x47>; 711defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_vi <0x48>; 712defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_vi <0x49>; 713defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_vi <0x4a>; 714defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_vi <0x4b>; 715defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_vi <0x4c>; 716 717defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0x60>; 718defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0x61>; 719defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0x62>; 720defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0x63>; 721defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0x64>; 722defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0x65>; 723defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0x66>; 724defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0x67>; 725defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0x68>; 726defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0x69>; 727defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0x6a>; 728defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0x6b>; 729defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0x6c>; 730 731defm S_ATOMIC_SWAP : SM_Real_Atomics_vi <0x80>; 732defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x81>; 733defm S_ATOMIC_ADD : SM_Real_Atomics_vi <0x82>; 734defm S_ATOMIC_SUB : SM_Real_Atomics_vi <0x83>; 735defm S_ATOMIC_SMIN : SM_Real_Atomics_vi <0x84>; 736defm S_ATOMIC_UMIN : SM_Real_Atomics_vi <0x85>; 737defm S_ATOMIC_SMAX : SM_Real_Atomics_vi <0x86>; 738defm S_ATOMIC_UMAX : SM_Real_Atomics_vi <0x87>; 739defm S_ATOMIC_AND : SM_Real_Atomics_vi <0x88>; 740defm S_ATOMIC_OR : SM_Real_Atomics_vi <0x89>; 741defm S_ATOMIC_XOR : SM_Real_Atomics_vi <0x8a>; 742defm S_ATOMIC_INC : SM_Real_Atomics_vi <0x8b>; 743defm S_ATOMIC_DEC : SM_Real_Atomics_vi <0x8c>; 744 745defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0xa0>; 746defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0xa1>; 747defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0xa2>; 748defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0xa3>; 749defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0xa4>; 750defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0xa5>; 751defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0xa6>; 752defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0xa7>; 753defm S_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0xa8>; 754defm S_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0xa9>; 755defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0xaa>; 756defm S_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0xab>; 757defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac>; 758 759multiclass SM_Real_Discard_vi<bits<8> op> { 760 defvar ps = NAME; 761 def _IMM_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>; 762 def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>; 763 def _SGPR_alt_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>, 764 SMEM_Real_SGPR_alt_gfx9; 765 let IsGFX9SpecificEncoding = true in 766 def _SGPR_IMM_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR_IMM)>; 767} 768 769defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28>; 770defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29>; 771 772//===----------------------------------------------------------------------===// 773// CI 774//===----------------------------------------------------------------------===// 775 776def smrd_literal_offset : ImmOperand<i32, "SMRDLiteralOffset">; 777 778class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> : 779 SM_Real<ps>, 780 Enc64 { 781 782 let AssemblerPredicate = isGFX7Only; 783 let DecoderNamespace = "GFX7"; 784 let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol); 785 786 let Inst{7-0} = 0xff; 787 let Inst{8} = 0; 788 let Inst{14-9} = sbase{6-1}; 789 let Inst{21-15} = sdst{6-0}; 790 let Inst{26-22} = op; 791 let Inst{31-27} = 0x18; //encoding 792 let Inst{63-32} = offset{31-0}; 793} 794 795def S_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>; 796def S_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>; 797def S_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>; 798def S_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>; 799def S_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>; 800def S_BUFFER_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>; 801def S_BUFFER_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>; 802def S_BUFFER_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>; 803def S_BUFFER_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>; 804def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>; 805 806class SMRD_Real_ci <bits<5> op, SM_Pseudo ps> 807 : SM_Real<ps> 808 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> 809 , Enc32 { 810 811 let AssemblerPredicate = isGFX7Only; 812 let DecoderNamespace = "GFX7"; 813 814 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); 815 let Inst{8} = ps.has_offset; 816 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); 817 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); 818 let Inst{26-22} = op; 819 let Inst{31-27} = 0x18; //encoding 820} 821 822def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>; 823 824//===----------------------------------------------------------------------===// 825// Scalar Memory Patterns 826//===----------------------------------------------------------------------===// 827 828class SMRDLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{ return isUniformLoad(N);}]> { 829 let GISelPredicateCode = [{ 830 if (!MI.hasOneMemOperand()) 831 return false; 832 if (!isInstrUniform(MI)) 833 return false; 834 835 // FIXME: We should probably be caching this. 836 SmallVector<GEPInfo, 4> AddrInfo; 837 getAddrModeInfo(MI, MRI, AddrInfo); 838 839 if (hasVgprParts(AddrInfo)) 840 return false; 841 return true; 842 }]; 843} 844 845def smrd_load : SMRDLoadPat<load>; 846def smrd_extloadi8 : SMRDLoadPat<extloadi8>; 847def smrd_zextloadi8 : SMRDLoadPat<zextloadi8>; 848def smrd_sextloadi8 : SMRDLoadPat<sextloadi8>; 849def smrd_extloadi16 : SMRDLoadPat<extloadi16>; 850def smrd_zextloadi16 : SMRDLoadPat<zextloadi16>; 851def smrd_sextloadi16 : SMRDLoadPat<sextloadi16>; 852 853def smrd_prefetch : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type), 854 (prefetch node:$ptr, node:$rw, node:$loc, node:$type), 855 [{ return !N->getOperand(1)->isDivergent();}]> { 856 let GISelPredicateCode = [{ 857 return isInstrUniform(MI); 858 }]; 859} 860 861def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">; 862def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">; 863def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">; 864def SMRDSgprImm : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">; 865def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">; 866def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">; 867def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">; 868 869class SMRDAlignedLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{ 870 // Returns true if it is a single dword load or naturally aligned multi-dword load. 871 LoadSDNode *Ld = cast<LoadSDNode>(N); 872 unsigned Size = Ld->getMemoryVT().getStoreSize(); 873 return Size <= 4 || Ld->getAlign().value() >= Size; 874}]> { 875 let GISelPredicateCode = [{ 876 auto &Ld = cast<GLoad>(MI); 877 TypeSize Size = Ld.getMMO().getSize().getValue(); 878 return Size <= 4 || Ld.getMMO().getAlign().value() >= Size; 879 }]; 880} 881 882def aligned_smrd_load : SMRDAlignedLoadPat<smrd_load>; 883 884multiclass SMRD_Patterns <string Instr, ValueType vt, PatFrag frag, 885 bit immci = true, string suffix = ""> { 886 // 1. IMM offset 887 def : GCNPat < 888 (frag (SMRDImm i64:$sbase, i32:$offset)), 889 (vt (!cast<SM_Pseudo>(Instr#"_IMM"#suffix) $sbase, $offset, 0))>; 890 891 // 2. 32-bit IMM offset on CI 892 if immci then def : GCNPat < 893 (frag (SMRDImm32 i64:$sbase, i32:$offset)), 894 (vt (!cast<InstSI>(Instr#"_IMM_ci"#suffix) $sbase, $offset, 0))> { 895 let SubtargetPredicate = isGFX7Only; 896 } 897 898 // 3. SGPR offset 899 def : GCNPat < 900 (frag (SMRDSgpr i64:$sbase, i32:$soffset)), 901 (vt (!cast<SM_Pseudo>(Instr#"_SGPR"#suffix) $sbase, $soffset, 0))> { 902 let SubtargetPredicate = isNotGFX9Plus; 903 } 904 def : GCNPat < 905 (frag (SMRDSgpr i64:$sbase, i32:$soffset)), 906 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, 0, 0))> { 907 let SubtargetPredicate = isGFX9Plus; 908 } 909 910 // 4. SGPR+IMM offset 911 def : GCNPat < 912 (frag (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)), 913 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM"#suffix) $sbase, $soffset, $offset, 0))> { 914 let SubtargetPredicate = isGFX9Plus; 915 } 916 917 // 5. No offset 918 def : GCNPat < 919 (vt (frag (i64 SReg_64:$sbase))), 920 (vt (!cast<SM_Pseudo>(Instr#"_IMM"#suffix) i64:$sbase, 0, 0))>; 921} 922 923multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> { 924 // High priority when XNACK is enabled and the load was naturally aligned. 925 let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 102 in 926 defm: SMRD_Patterns <Instr, vt, aligned_smrd_load, immci>; 927 928 // XNACK is enabled and the load wasn't naturally aligned. The constrained sload variant. 929 if !gt(vt.Size, 32) then { 930 let OtherPredicates = [HasXNACKEnabled], AddedComplexity = 101 in 931 defm: SMRD_Patterns <Instr, vt, smrd_load, /*immci=*/false, /*suffix=*/"_ec">; 932 } 933 934 // XNACK is disabled. 935 let AddedComplexity = 100 in 936 defm: SMRD_Patterns <Instr, vt, smrd_load, immci>; 937} 938 939multiclass SMLoad_Pattern <string Instr, ValueType vt, bit immci = true> { 940 // 1. Offset as an immediate 941 def : GCNPat < 942 (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy), 943 (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> { 944 let AddedComplexity = 2; 945 } 946 947 // 2. 32-bit IMM offset on CI 948 if immci then def : GCNPat < 949 (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)), 950 (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset, 951 (extract_cpol $cachepolicy))> { 952 let OtherPredicates = [isGFX7Only]; 953 let AddedComplexity = 1; 954 } 955 956 // 3. Offset loaded in an 32bit SGPR 957 def : GCNPat < 958 (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy), 959 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy)))> { 960 let OtherPredicates = [isNotGFX9Plus]; 961 } 962 def : GCNPat < 963 (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy), 964 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> { 965 let OtherPredicates = [isGFX9Plus]; 966 } 967 968 // 4. Offset as an 32-bit SGPR + immediate 969 def : GCNPat < 970 (SIsbuffer_load v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset), 971 timm:$cachepolicy), 972 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset, 973 (extract_cpol $cachepolicy)))> { 974 let OtherPredicates = [isGFX9Plus]; 975 } 976} 977 978multiclass ScalarLoadWithExtensionPat <string Instr, SDPatternOperator node, ValueType vt> { 979 // 1. IMM offset 980 def : GCNPat < 981 (node (SMRDImm i64:$sbase, i32:$offset)), 982 (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))>{ 983 let OtherPredicates = [isGFX12Plus]; 984 } 985 986 // 2. SGPR offset 987 def : GCNPat < 988 (node (SMRDSgpr i64:$sbase, i32:$soffset)), 989 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))>{ 990 let OtherPredicates = [isGFX12Plus]; 991 } 992 993 // 3. SGPR+IMM offset 994 def : GCNPat < 995 (node (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)), 996 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))>{ 997 let OtherPredicates = [isGFX12Plus]; 998 } 999 1000 // 4. No offset 1001 def : GCNPat < 1002 (vt (node (i64 SReg_64:$sbase))), 1003 (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))>{ 1004 let OtherPredicates = [isGFX12Plus]; 1005 } 1006} 1007 1008multiclass ScalarBufferLoadIntrinsicPat <SDPatternOperator name, string Instr> { 1009 1010 // 1. Offset as an immediate 1011 def : GCNPat < 1012 (name v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy), 1013 (i32 (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> { 1014 let OtherPredicates = [isGFX12Plus]; 1015 } 1016 1017 // 2. Offset as an 32-bit SGPR 1018 def : GCNPat < 1019 (name v4i32:$sbase, i32:$soffset, timm:$cachepolicy), 1020 (i32 (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> { 1021 let OtherPredicates = [isGFX12Plus]; 1022 } 1023 1024 // 3. Offset as an 32-bit SGPR + immediate 1025 def : GCNPat < 1026 (name v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset), 1027 timm:$cachepolicy), 1028 (i32 (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset, 1029 (extract_cpol $cachepolicy)))> { 1030 let OtherPredicates = [isGFX12Plus]; 1031 } 1032} 1033 1034// Global and constant loads can be selected to either MUBUF or SMRD 1035// instructions, but SMRD instructions are faster so we want the instruction 1036// selector to prefer those. 1037let AddedComplexity = 100 in { 1038 1039defm : ScalarLoadWithExtensionPat <"S_LOAD_U8", smrd_extloadi8, i32>; 1040defm : ScalarLoadWithExtensionPat <"S_LOAD_U8", smrd_zextloadi8, i32>; 1041defm : ScalarLoadWithExtensionPat <"S_LOAD_I8", smrd_sextloadi8, i32>; 1042defm : ScalarLoadWithExtensionPat <"S_LOAD_U16", smrd_extloadi16, i32>; 1043defm : ScalarLoadWithExtensionPat <"S_LOAD_U16", smrd_zextloadi16, i32>; 1044defm : ScalarLoadWithExtensionPat <"S_LOAD_I16", smrd_sextloadi16, i32>; 1045defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_byte, "S_BUFFER_LOAD_I8">; 1046defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ubyte, "S_BUFFER_LOAD_U8">; 1047defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_short, "S_BUFFER_LOAD_I16">; 1048defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ushort, "S_BUFFER_LOAD_U16">; 1049 1050} // End let AddedComplexity = 100 1051 1052foreach vt = Reg32Types.types in { 1053defm : SMRD_Pattern <"S_LOAD_DWORD", vt>; 1054} 1055 1056foreach vt = SReg_64.RegTypes in { 1057defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>; 1058} 1059 1060foreach vt = SReg_96.RegTypes in { 1061defm : SMRD_Pattern <"S_LOAD_DWORDX3", vt, false>; 1062} 1063 1064foreach vt = SReg_128.RegTypes in { 1065defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>; 1066} 1067 1068foreach vt = SReg_256.RegTypes in { 1069defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>; 1070} 1071 1072foreach vt = SReg_512.RegTypes in { 1073defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>; 1074} 1075 1076 1077defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>; 1078defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>; 1079defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3", v3i32, false>; 1080defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>; 1081defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>; 1082defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>; 1083 1084defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>; 1085defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>; 1086defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3", v3f32, false>; 1087defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>; 1088defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>; 1089defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>; 1090 1091let OtherPredicates = [HasSMemTimeInst] in { 1092def : GCNPat < 1093 (i64 (readcyclecounter)), 1094 (S_MEMTIME) 1095>; 1096} // let OtherPredicates = [HasSMemTimeInst] 1097 1098let OtherPredicates = [HasShaderCyclesRegister] in { 1099def : GCNPat < 1100 (i64 (readcyclecounter)), 1101 (REG_SEQUENCE SReg_64, 1102 (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0, 1103 (S_MOV_B32 (i32 0)), sub1)> { 1104} 1105} // let OtherPredicates = [HasShaderCyclesRegister] 1106 1107let OtherPredicates = [HasSMemRealTime] in { 1108def : GCNPat < 1109 (i64 (readsteadycounter)), 1110 (S_MEMREALTIME) 1111>; 1112} // let OtherPredicates = [HasSMemRealTime] 1113 1114let SubtargetPredicate = isGFX11Plus in { 1115def : GCNPat < 1116 (i64 (readsteadycounter)), 1117 (S_SENDMSG_RTN_B64 (i32 /*MSG_RTN_GET_REALTIME=*/0x83)) 1118>; 1119} // let SubtargetPredicate = [isGFX11Plus] 1120 1121def i32imm_zero : TImmLeaf <i32, [{ 1122 return Imm == 0; 1123}]>; 1124 1125def i32imm_one : TImmLeaf <i32, [{ 1126 return Imm == 1; 1127}]>; 1128 1129multiclass SMPrefetchPat<string type, TImmLeaf cache_type> { 1130 def : GCNPat < 1131 (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type), 1132 (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0)) 1133 >; 1134 1135 def : GCNPat < 1136 (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, cache_type), 1137 (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0)) 1138 >; 1139 1140 def : GCNPat < 1141 (smrd_prefetch (i32 SReg_32:$sbase), timm, timm, cache_type), 1142 (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) 1143 (i64 (REG_SEQUENCE SReg_64, $sbase, sub0, (i32 (S_MOV_B32 (i32 0))), sub1)), 1144 0, (i32 SGPR_NULL), (i8 0)) 1145 >; 1146} 1147 1148defm : SMPrefetchPat<"INST", i32imm_zero>; 1149defm : SMPrefetchPat<"DATA", i32imm_one>; 1150 1151//===----------------------------------------------------------------------===// 1152// GFX10. 1153//===----------------------------------------------------------------------===// 1154 1155class SMEM_Real_10Plus_common<bits<8> op, SM_Pseudo ps, string opName, 1156 int subtarget, RegisterWithSubRegs sgpr_null> : 1157 SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 { 1158 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); 1159 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); 1160 let Inst{25-18} = op; 1161 let Inst{31-26} = 0x3d; 1162 // There are SMEM instructions that do not employ any of the offset 1163 // fields, in which case we need them to remain undefined. 1164 let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?)); 1165 let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, 1166 !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?)); 1167} 1168 1169class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> 1170 : SMEM_Real_10Plus_common<op, ps, ps.Mnemonic, SIEncodingFamily.GFX10, 1171 SGPR_NULL_gfxpre11> { 1172 let AssemblerPredicate = isGFX10Only; 1173 let DecoderNamespace = "GFX10"; 1174 let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?); 1175 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); 1176} 1177 1178class SMEM_Real_Load_gfx10<bits<8> op, string ps> 1179 : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps)>; 1180 1181multiclass SM_Real_Loads_gfx10<bits<8> op> { 1182 defvar ps = NAME; 1183 def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_IMM">; 1184 def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_SGPR">; 1185 def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_SGPR_IMM">; 1186} 1187 1188class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> { 1189 bits<7> sdata; 1190 1191 let sdst = ?; 1192 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); 1193} 1194 1195multiclass SM_Real_Stores_gfx10<bits<8> op> { 1196 defvar ps = NAME; 1197 defvar immPs = !cast<SM_Store_Pseudo>(ps#_IMM); 1198 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs>; 1199 1200 defvar sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR); 1201 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs>; 1202 1203 defvar sgprImmPs = !cast<SM_Store_Pseudo>(ps#_SGPR_IMM); 1204 def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, sgprImmPs>; 1205} 1206 1207defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000>; 1208defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001>; 1209defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002>; 1210defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003>; 1211defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004>; 1212 1213defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005>; 1214defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006>; 1215defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007>; 1216 1217defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008>; 1218defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009>; 1219defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a>; 1220defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b>; 1221defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c>; 1222 1223defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010>; 1224defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011>; 1225defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012>; 1226defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015>; 1227defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016>; 1228defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017>; 1229defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018>; 1230defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019>; 1231defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a>; 1232 1233def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>; 1234def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>; 1235def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>; 1236def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>; 1237def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>; 1238 1239def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>; 1240 1241multiclass SM_Real_Probe_gfx10<bits<8> op> { 1242 defvar ps = NAME; 1243 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>; 1244 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>; 1245 def _SGPR_IMM_gfx10 1246 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>; 1247} 1248 1249defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26>; 1250defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27>; 1251 1252class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps> 1253 : SMEM_Real_gfx10 <op, ps> { 1254 1255 bits<7> sdata; 1256 1257 let Constraints = ps.Constraints; 1258 let DisableEncoding = ps.DisableEncoding; 1259 1260 let cpol{CPolBit.GLC} = ps.glc; 1261 1262 let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); 1263 let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0}); 1264} 1265 1266multiclass SM_Real_Atomics_gfx10<bits<8> op> { 1267 defvar ps = NAME; 1268 def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>; 1269 def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>; 1270 def _SGPR_IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>; 1271 def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>; 1272 def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>; 1273 def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>; 1274} 1275 1276defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40>; 1277defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41>; 1278defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42>; 1279defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43>; 1280defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44>; 1281defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45>; 1282defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46>; 1283defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47>; 1284defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48>; 1285defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49>; 1286defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a>; 1287defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b>; 1288defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c>; 1289 1290defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60>; 1291defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61>; 1292defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62>; 1293defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63>; 1294defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64>; 1295defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65>; 1296defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66>; 1297defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67>; 1298defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68>; 1299defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69>; 1300defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a>; 1301defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b>; 1302defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c>; 1303 1304defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80>; 1305defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81>; 1306defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82>; 1307defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83>; 1308defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84>; 1309defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85>; 1310defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86>; 1311defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87>; 1312defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88>; 1313defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89>; 1314defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a>; 1315defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b>; 1316defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c>; 1317 1318defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0>; 1319defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1>; 1320defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2>; 1321defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3>; 1322defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4>; 1323defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5>; 1324defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6>; 1325defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7>; 1326defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8>; 1327defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9>; 1328defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa>; 1329defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab>; 1330defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac>; 1331 1332multiclass SM_Real_Discard_gfx10<bits<8> op> { 1333 defvar ps = NAME; 1334 def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>; 1335 def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>; 1336 def _SGPR_IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>; 1337} 1338 1339defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28>; 1340defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29>; 1341 1342def SMInfoTable : GenericTable { 1343 let FilterClass = "SM_Real"; 1344 let CppTypeName = "SMInfo"; 1345 let Fields = ["Opcode", "is_buffer"]; 1346 1347 let PrimaryKey = ["Opcode"]; 1348 let PrimaryKeyName = "getSMEMOpcodeHelper"; 1349} 1350 1351//===----------------------------------------------------------------------===// 1352// GFX11. 1353//===----------------------------------------------------------------------===// 1354 1355class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> : 1356 SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11, 1357 SGPR_NULL_gfx11plus> { 1358 let AssemblerPredicate = isGFX11Only; 1359 let DecoderNamespace = "GFX11"; 1360 let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); 1361 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0); 1362} 1363 1364class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName> : 1365 SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps), opName>; 1366 1367multiclass SM_Real_Loads_gfx11<bits<8> op, string ps> { 1368 defvar opName = !tolower(NAME); 1369 def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_IMM", opName>; 1370 def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR", opName>; 1371 def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR_IMM", opName>; 1372 def : AMDGPUMnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName> { 1373 let AssemblerPredicate = isGFX11Plus; 1374 } 1375} 1376 1377defm S_LOAD_B32 : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD">; 1378defm S_LOAD_B64 : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2">; 1379defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4">; 1380defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8">; 1381defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16">; 1382 1383defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD">; 1384defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2">; 1385defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4">; 1386defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8">; 1387defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16">; 1388 1389def S_GL1_INV_gfx11 : SMEM_Real_gfx11<0x020, S_GL1_INV>; 1390def S_DCACHE_INV_gfx11 : SMEM_Real_gfx11<0x021, S_DCACHE_INV>; 1391 1392class SMEM_Real_Store_gfx11 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx11<op, ps> { 1393 // encoding 1394 bits<7> sdata; 1395 1396 let sdst = ?; 1397 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); 1398} 1399 1400multiclass SM_Real_Probe_gfx11<bits<8> op> { 1401 defvar ps = NAME; 1402 def _IMM_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>; 1403 def _SGPR_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>; 1404 def _SGPR_IMM_gfx11 1405 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>; 1406} 1407 1408defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22>; 1409defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>; 1410 1411//===----------------------------------------------------------------------===// 1412// GFX12. 1413//===----------------------------------------------------------------------===// 1414 1415class SMEM_Real_gfx12Plus<bits<6> op, SM_Pseudo ps, string opName, 1416 int subtarget, RegisterWithSubRegs sgpr_null> : 1417 SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 { 1418 1419 let Inst{18-13} = op; 1420 let Inst{31-26} = 0x3d; 1421 1422 let Inst{55-32} = !if(ps.has_offset, offset{23-0}, !if(ps.has_soffset, 0, ?)); 1423 let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, 1424 !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?)); 1425} 1426 1427class SMEM_Real_gfx12<bits<6> op, SM_Pseudo ps, string opName = ps.Mnemonic> : 1428 SMEM_Real_gfx12Plus<op, ps, opName, SIEncodingFamily.GFX12, 1429 SGPR_NULL_gfx11plus> { 1430 let AssemblerPredicate = isGFX12Plus; 1431 let DecoderNamespace = "GFX12"; 1432 1433 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); 1434 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); 1435} 1436 1437class SMEM_Real_Prefetch_gfx12<bits<6> op, SM_Pseudo ps> : 1438 SMEM_Real_gfx12<op, ps> { 1439 bits<7> sdata; // Only 5 bits of sdata are supported. 1440 1441 let sdst = ?; 1442 let Inst{12-11} = 0; // Unused sdata bits. 1443 let Inst{10-6} = !if(ps.has_sdst, sdata{4-0}, ?); 1444} 1445 1446class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offsets> : 1447 SMEM_Real_gfx12<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> { 1448 RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; 1449 let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); 1450 1451 let Inst{22-21} = cpol{4-3}; // scope 1452 let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported 1453} 1454 1455multiclass SM_Real_Loads_gfx12<bits<6> op, string ps = NAME> { 1456 defvar opName = !tolower(NAME); 1457 def _IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, IMM_Offset>; 1458 def _SGPR_IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, SGPR_IMM_OptOffset>; 1459} 1460 1461defm S_LOAD_B32 : SM_Real_Loads_gfx12<0x00, "S_LOAD_DWORD">; 1462defm S_LOAD_B64 : SM_Real_Loads_gfx12<0x01, "S_LOAD_DWORDX2">; 1463defm S_LOAD_B96 : SM_Real_Loads_gfx12<0x05, "S_LOAD_DWORDX3">; 1464defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">; 1465defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">; 1466defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">; 1467 1468defm S_LOAD_I8 : SM_Real_Loads_gfx12<0x08>; 1469defm S_LOAD_U8 : SM_Real_Loads_gfx12<0x09>; 1470defm S_LOAD_I16 : SM_Real_Loads_gfx12<0x0a>; 1471defm S_LOAD_U16 : SM_Real_Loads_gfx12<0x0b>; 1472 1473defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">; 1474defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">; 1475defm S_BUFFER_LOAD_B96 : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">; 1476defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">; 1477defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">; 1478defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">; 1479 1480defm S_BUFFER_LOAD_I8 : SM_Real_Loads_gfx12<0x18>; 1481defm S_BUFFER_LOAD_U8 : SM_Real_Loads_gfx12<0x19>; 1482defm S_BUFFER_LOAD_I16 : SM_Real_Loads_gfx12<0x1a>; 1483defm S_BUFFER_LOAD_U16 : SM_Real_Loads_gfx12<0x1b>; 1484 1485def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>; 1486 1487def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>; 1488def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>; 1489def S_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>; 1490def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>; 1491def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>; 1492 1493multiclass SMEM_Real_Probe_gfx12<bits<6> op> { 1494 defvar ps = NAME; 1495 def _IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_IMM)>; 1496 def _SGPR_IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_SGPR_OPT_IMM)>; 1497} 1498 1499defm S_ATC_PROBE : SMEM_Real_Probe_gfx12<0x22>; 1500defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>; 1501