1//===---- SMInstructions.td - Scalar Memory Instruction Definitions -------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def smrd_offset_8 : ImmOperand<i32, "SMRDOffset8", 1>; 10 11let EncoderMethod = "getSMEMOffsetEncoding", 12 DecoderMethod = "decodeSMEMOffset" in { 13def smem_offset : ImmOperand<i32, "SMEMOffset", 1>; 14def smem_offset_mod : NamedIntOperand<i32, "offset", "SMEMOffsetMod">; 15} 16 17//===----------------------------------------------------------------------===// 18// Scalar Memory classes 19//===----------------------------------------------------------------------===// 20 21class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> : 22 InstSI <outs, ins, "", pattern>, 23 SIMCInstr<opName, SIEncodingFamily.NONE> { 24 let isPseudo = 1; 25 let isCodeGenOnly = 1; 26 27 let LGKM_CNT = 1; 28 let SMRD = 1; 29 let mayStore = 0; 30 let mayLoad = 1; 31 let hasSideEffects = 0; 32 let maybeAtomic = 0; 33 let UseNamedOperandTable = 1; 34 let SchedRW = [WriteSMEM]; 35 36 string Mnemonic = opName; 37 string AsmOperands = asmOps; 38 39 bits<1> has_sbase = 1; 40 bits<1> has_sdst = 1; 41 bit has_glc = 0; 42 bit has_dlc = 0; 43 bit has_offset = 0; 44 bit has_soffset = 0; 45 bit is_buffer = 0; 46} 47 48class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic> 49 : InstSI<ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands> { 50 51 let isPseudo = 0; 52 let isCodeGenOnly = 0; 53 54 Instruction Opcode = !cast<Instruction>(NAME); 55 56 // copy relevant pseudo op flags 57 let LGKM_CNT = ps.LGKM_CNT; 58 let SMRD = ps.SMRD; 59 let mayStore = ps.mayStore; 60 let mayLoad = ps.mayLoad; 61 let hasSideEffects = ps.hasSideEffects; 62 let UseNamedOperandTable = ps.UseNamedOperandTable; 63 let SchedRW = ps.SchedRW; 64 let SubtargetPredicate = ps.SubtargetPredicate; 65 let AsmMatchConverter = ps.AsmMatchConverter; 66 let IsAtomicRet = ps.IsAtomicRet; 67 let IsAtomicNoRet = ps.IsAtomicNoRet; 68 69 let TSFlags = ps.TSFlags; 70 71 bit is_buffer = ps.is_buffer; 72 73 // encoding 74 bits<7> sbase; 75 bits<7> sdst; 76 bits<32> offset; 77 bits<8> soffset; 78 bits<5> cpol; 79} 80 81class OffsetMode<bit hasOffset, bit hasSOffset, string variant, 82 dag ins, string asm> { 83 bit HasOffset = hasOffset; 84 bit HasSOffset = hasSOffset; 85 string Variant = variant; 86 dag Ins = ins; 87 string Asm = asm; 88} 89 90def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">; 91def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">; 92def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM", 93 (ins SReg_32:$soffset, smem_offset_mod:$offset), 94 "$soffset$offset">; 95 96class SM_Probe_Pseudo <string opName, RegisterClass baseClass, OffsetMode offsets> 97 : SM_Pseudo<opName, (outs), 98 !con((ins i8imm:$sdata, baseClass:$sbase), offsets.Ins), 99 " $sdata, $sbase, " # offsets.Asm> { 100 let mayLoad = 0; 101 let mayStore = 0; 102 let has_glc = 0; 103 let LGKM_CNT = 0; 104 let ScalarStore = 0; 105 let hasSideEffects = 1; 106 let has_offset = offsets.HasOffset; 107 let has_soffset = offsets.HasSOffset; 108 let PseudoInstr = opName # offsets.Variant; 109} 110 111class SM_Load_Pseudo <string opName, RegisterClass baseClass, 112 RegisterClass dstClass, OffsetMode offsets> 113 : SM_Pseudo<opName, (outs dstClass:$sdst), 114 !con((ins baseClass:$sbase), offsets.Ins, (ins CPol:$cpol)), 115 " $sdst, $sbase, " # offsets.Asm # "$cpol", []> { 116 RegisterClass BaseClass = baseClass; 117 let mayLoad = 1; 118 let isReMaterializable = 1; 119 let mayStore = 0; 120 let has_glc = 1; 121 let has_dlc = 1; 122 let has_offset = offsets.HasOffset; 123 let has_soffset = offsets.HasSOffset; 124 let PseudoInstr = opName # offsets.Variant; 125} 126 127class SM_Store_Pseudo <string opName, RegisterClass baseClass, 128 RegisterClass srcClass, OffsetMode offsets> 129 : SM_Pseudo<opName, (outs), !con((ins srcClass:$sdata, baseClass:$sbase), 130 offsets.Ins, (ins CPol:$cpol)), 131 " $sdata, $sbase, " # offsets.Asm # "$cpol"> { 132 RegisterClass BaseClass = baseClass; 133 let mayLoad = 0; 134 let mayStore = 1; 135 let has_glc = 1; 136 let has_dlc = 1; 137 let has_offset = offsets.HasOffset; 138 let has_soffset = offsets.HasSOffset; 139 let ScalarStore = 1; 140 let PseudoInstr = opName # offsets.Variant; 141} 142 143class SM_Discard_Pseudo <string opName, OffsetMode offsets> 144 : SM_Pseudo<opName, (outs), !con((ins SReg_64:$sbase), offsets.Ins), 145 " $sbase, " # offsets.Asm> { 146 let mayLoad = 0; 147 let mayStore = 0; 148 let has_glc = 0; 149 let has_sdst = 0; 150 let ScalarStore = 0; 151 let hasSideEffects = 1; 152 let has_offset = offsets.HasOffset; 153 let has_soffset = offsets.HasSOffset; 154 let PseudoInstr = opName # offsets.Variant; 155} 156 157multiclass SM_Pseudo_Loads<RegisterClass baseClass, 158 RegisterClass dstClass> { 159 defvar opName = !tolower(NAME); 160 def _IMM : SM_Load_Pseudo <opName, baseClass, dstClass, IMM_Offset>; 161 def _SGPR : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_Offset>; 162 def _SGPR_IMM : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_IMM_Offset>; 163} 164 165multiclass SM_Pseudo_Stores<RegisterClass baseClass, 166 RegisterClass srcClass> { 167 defvar opName = !tolower(NAME); 168 def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass, IMM_Offset>; 169 def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_Offset>; 170 def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_IMM_Offset>; 171} 172 173multiclass SM_Pseudo_Discards { 174 defvar opName = !tolower(NAME); 175 def _IMM : SM_Discard_Pseudo <opName, IMM_Offset>; 176 def _SGPR : SM_Discard_Pseudo <opName, SGPR_Offset>; 177 def _SGPR_IMM : SM_Discard_Pseudo <opName, SGPR_IMM_Offset>; 178} 179 180class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo< 181 opName, (outs SReg_64_XEXEC:$sdst), (ins), 182 " $sdst", [(set i64:$sdst, (node))]> { 183 let hasSideEffects = 1; 184 185 let mayStore = 0; 186 let mayLoad = 0; 187 let has_sbase = 0; 188} 189 190class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo< 191 opName, (outs), (ins), "", [(node)]> { 192 let hasSideEffects = 1; 193 let mayLoad = 0; 194 let mayStore = 0; 195 let has_sdst = 0; 196 let has_sbase = 0; 197} 198 199multiclass SM_Pseudo_Probe<RegisterClass baseClass> { 200 defvar opName = !tolower(NAME); 201 def _IMM : SM_Probe_Pseudo <opName, baseClass, IMM_Offset>; 202 def _SGPR : SM_Probe_Pseudo <opName, baseClass, SGPR_Offset>; 203 def _SGPR_IMM : SM_Probe_Pseudo <opName, baseClass, SGPR_IMM_Offset>; 204} 205 206class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo< 207 opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins), 208 " $sdst", [(set i32:$sdst, (node))]> { 209 let hasSideEffects = 1; 210 let mayStore = 0; 211 let mayLoad = 0; 212 let has_sbase = 0; 213} 214 215class SM_Prefetch_Pseudo <string opName, RegisterClass baseClass, bit hasSBase> 216 : SM_Pseudo<opName, (outs), !con(!if(hasSBase, (ins baseClass:$sbase), (ins)), 217 (ins smem_offset:$offset, SReg_32:$soffset, i8imm:$sdata)), 218 !if(hasSBase, " $sbase,", "") # " $offset, $soffset, $sdata"> { 219 // Mark prefetches as both load and store to prevent reordering with loads 220 // and stores. This is also needed for pattern to match prefetch intrinsic. 221 let mayLoad = 1; 222 let mayStore = 1; 223 let has_glc = 0; 224 let LGKM_CNT = 0; 225 let has_sbase = hasSBase; 226 let ScalarStore = 0; 227 let has_offset = 1; 228 let has_soffset = 1; 229 let PseudoInstr = opName; 230} 231 232//===----------------------------------------------------------------------===// 233// Scalar Atomic Memory Classes 234//===----------------------------------------------------------------------===// 235 236class SM_Atomic_Pseudo <string opName, 237 dag outs, dag ins, string asmOps, bit isRet> 238 : SM_Pseudo<opName, outs, ins, asmOps, []> { 239 240 bit glc = isRet; 241 242 let mayLoad = 1; 243 let mayStore = 1; 244 let has_glc = 1; 245 let has_dlc = 1; 246 let has_soffset = 1; 247 248 // Should these be set? 249 let ScalarStore = 1; 250 let hasSideEffects = 1; 251 let maybeAtomic = 1; 252 253 let IsAtomicNoRet = !not(isRet); 254 let IsAtomicRet = isRet; 255} 256 257class SM_Pseudo_Atomic<string opName, 258 RegisterClass baseClass, 259 RegisterClass dataClass, 260 OffsetMode offsets, 261 bit isRet, 262 string opNameWithSuffix = 263 opName # offsets.Variant # !if(isRet, "_RTN", ""), 264 Operand CPolTy = !if(isRet, CPol_GLC, CPol_NonGLC)> : 265 SM_Atomic_Pseudo<opName, 266 !if(isRet, (outs dataClass:$sdst), (outs)), 267 !con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins, 268 (ins CPolTy:$cpol)), 269 !if(isRet, " $sdst", " $sdata") # 270 ", $sbase, " # offsets.Asm # "$cpol", 271 isRet>, 272 AtomicNoRet <opNameWithSuffix, isRet> { 273 let has_offset = offsets.HasOffset; 274 let has_soffset = offsets.HasSOffset; 275 let PseudoInstr = opNameWithSuffix; 276 277 let Constraints = !if(isRet, "$sdst = $sdata", ""); 278 let DisableEncoding = !if(isRet, "$sdata", ""); 279} 280 281multiclass SM_Pseudo_Atomics<RegisterClass baseClass, 282 RegisterClass dataClass> { 283 defvar opName = !tolower(NAME); 284 def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>; 285 def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>; 286 def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>; 287 def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>; 288 def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>; 289 def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>; 290} 291 292//===----------------------------------------------------------------------===// 293// Scalar Memory Instructions 294//===----------------------------------------------------------------------===// 295 296// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit 297// SMRD instructions, because the SReg_32_XM0 register class does not include M0 298// and writing to M0 from an SMRD instruction will hang the GPU. 299 300// XXX - SMEM instructions do not allow exec for data operand, but 301// does sdst for SMRD on SI/CI? 302defm S_LOAD_DWORD : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 303defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>; 304let SubtargetPredicate = HasScalarDwordx3Loads in 305 defm S_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_64, SReg_96>; 306defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>; 307defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_64, SReg_256>; 308defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_64, SReg_512>; 309defm S_LOAD_I8 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 310defm S_LOAD_U8 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 311defm S_LOAD_I16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 312defm S_LOAD_U16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 313 314let is_buffer = 1 in { 315defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; 316// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on 317// SI/CI, bit disallowed for SMEM on VI. 318defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>; 319let SubtargetPredicate = HasScalarDwordx3Loads in 320 defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128, SReg_96>; 321defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>; 322defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>; 323defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>; 324defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; 325defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; 326defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; 327defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>; 328} 329 330let SubtargetPredicate = HasScalarStores in { 331defm S_STORE_DWORD : SM_Pseudo_Stores <SReg_64, SReg_32_XM0_XEXEC>; 332defm S_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>; 333defm S_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>; 334 335let is_buffer = 1 in { 336defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <SReg_128, SReg_32_XM0_XEXEC>; 337defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_128, SReg_64_XEXEC>; 338defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_128, SReg_128>; 339} 340} // End SubtargetPredicate = HasScalarStores 341 342let SubtargetPredicate = HasSMemTimeInst in 343def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; 344def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>; 345 346let SubtargetPredicate = isGFX7GFX8GFX9 in { 347def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; 348} // let SubtargetPredicate = isGFX7GFX8GFX9 349 350let SubtargetPredicate = isGFX8Plus in { 351let OtherPredicates = [HasScalarStores] in { 352def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>; 353def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; 354} // End OtherPredicates = [HasScalarStores] 355 356defm S_ATC_PROBE : SM_Pseudo_Probe <SReg_64>; 357let is_buffer = 1 in { 358defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <SReg_128>; 359} 360} // SubtargetPredicate = isGFX8Plus 361 362let SubtargetPredicate = HasSMemRealTime in 363def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>; 364 365let SubtargetPredicate = isGFX10Plus in 366def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">; 367let SubtargetPredicate = HasGetWaveIdInst in 368def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>; 369 370 371let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in { 372defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>; 373defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_64, SReg_64_XEXEC>; 374defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_64, SReg_128>; 375 376defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <SReg_64, SReg_32_XM0_XEXEC>; 377defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>; 378defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>; 379} // SubtargetPredicate = HasScalarFlatScratchInsts 380 381let SubtargetPredicate = HasScalarAtomics in { 382 383let is_buffer = 1 in { 384defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 385defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 386defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 387defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 388defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 389defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 390defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 391defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 392defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 393defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 394defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 395defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 396defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>; 397 398defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 399defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <SReg_128, SReg_128>; 400defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 401defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 402defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 403defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 404defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 405defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 406defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 407defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 408defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 409defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 410defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>; 411} 412 413defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 414defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 415defm S_ATOMIC_ADD : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 416defm S_ATOMIC_SUB : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 417defm S_ATOMIC_SMIN : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 418defm S_ATOMIC_UMIN : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 419defm S_ATOMIC_SMAX : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 420defm S_ATOMIC_UMAX : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 421defm S_ATOMIC_AND : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 422defm S_ATOMIC_OR : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 423defm S_ATOMIC_XOR : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 424defm S_ATOMIC_INC : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 425defm S_ATOMIC_DEC : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>; 426 427defm S_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 428defm S_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <SReg_64, SReg_128>; 429defm S_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 430defm S_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 431defm S_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 432defm S_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 433defm S_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 434defm S_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 435defm S_ATOMIC_AND_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 436defm S_ATOMIC_OR_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 437defm S_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 438defm S_ATOMIC_INC_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 439defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <SReg_64, SReg_64_XEXEC>; 440 441} // let SubtargetPredicate = HasScalarAtomics 442 443let SubtargetPredicate = HasScalarAtomics in { 444defm S_DCACHE_DISCARD : SM_Pseudo_Discards; 445defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards; 446} 447 448let SubtargetPredicate = isGFX12Plus in { 449def S_PREFETCH_INST : SM_Prefetch_Pseudo <"s_prefetch_inst", SReg_64, 1>; 450def S_PREFETCH_INST_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_inst_pc_rel", SReg_64, 0>; 451def S_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_prefetch_data", SReg_64, 1>; 452def S_PREFETCH_DATA_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_data_pc_rel", SReg_64, 0>; 453def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128, 1> { 454 let is_buffer = 1; 455} 456} // end let SubtargetPredicate = isGFX12Plus 457 458//===----------------------------------------------------------------------===// 459// Targets 460//===----------------------------------------------------------------------===// 461 462//===----------------------------------------------------------------------===// 463// SI 464//===----------------------------------------------------------------------===// 465 466class SMRD_Real_si <bits<5> op, SM_Pseudo ps> 467 : SM_Real<ps> 468 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> 469 , Enc32 { 470 471 let AssemblerPredicate = isGFX6GFX7; 472 let DecoderNamespace = "GFX6GFX7"; 473 474 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); 475 let Inst{8} = ps.has_offset; 476 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); 477 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); 478 let Inst{26-22} = op; 479 let Inst{31-27} = 0x18; //encoding 480} 481 482multiclass SM_Real_Loads_si<bits<5> op> { 483 defvar ps = NAME; 484 defvar immPs = !cast<SM_Load_Pseudo>(ps#_IMM); 485 def _IMM_si : SMRD_Real_si <op, immPs> { 486 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol); 487 } 488 489 defvar sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR); 490 def _SGPR_si : SMRD_Real_si <op, sgprPs>; 491} 492 493defm S_LOAD_DWORD : SM_Real_Loads_si <0x00>; 494defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01>; 495defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02>; 496defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03>; 497defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04>; 498defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08>; 499defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09>; 500defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a>; 501defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b>; 502defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c>; 503 504def S_MEMTIME_si : SMRD_Real_si <0x1e, S_MEMTIME>; 505def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>; 506 507 508//===----------------------------------------------------------------------===// 509// VI and GFX9. 510//===----------------------------------------------------------------------===// 511 512class SMEM_Real_vi <bits<8> op, SM_Pseudo ps> 513 : SM_Real<ps> 514 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> 515 , Enc64 { 516 field bit IsGFX9SpecificEncoding = false; 517 let AssemblerPredicate = !if(IsGFX9SpecificEncoding, isGFX9Only, isGFX8GFX9); 518 let DecoderNamespace = "GFX8"; 519 520 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); 521 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); 522 523 // Note that for GFX9 instructions with immediate offsets, soffset_en 524 // must be defined, whereas in GFX8 it's undefined in all cases, 525 // meaning GFX9 is not perfectly backward-compatible with GFX8, despite 526 // documentation suggesting otherwise. 527 field bit SOffsetEn = !if(IsGFX9SpecificEncoding, 528 !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)), 529 ?); 530 let Inst{14} = SOffsetEn; 531 532 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); 533 534 // imm 535 // TODO: Shall not be defined if the instruction has no offset nor 536 // soffset. 537 let Inst{17} = ps.has_offset; 538 539 let Inst{25-18} = op; 540 let Inst{31-26} = 0x30; //encoding 541 542 // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed. 543 // Offset value is corrected accordingly when offset is encoded/decoded. 544 // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics. 545 field bits<21> Offset; 546 let Offset{6-0} = !if(ps.has_offset, offset{6-0}, 547 !if(ps.has_soffset, soffset{6-0}, ?)); 548 let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?); 549 let Inst{52-32} = Offset; 550 551 // soffset 552 let Inst{63-57} = !if(!and(IsGFX9SpecificEncoding, ps.has_soffset), 553 soffset{6-0}, ?); 554} 555 556class SMEM_Real_Load_vi<bits<8> op, string ps> 557 : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps)>; 558 559// The alternative GFX9 SGPR encoding using soffset to encode the 560// offset register. Not available in assembler and goes to the GFX9 561// encoding family to avoid conflicts with the primary SGPR variant. 562class SMEM_Real_SGPR_alt_gfx9 { 563 bit IsGFX9SpecificEncoding = true; 564 bit SOffsetEn = 1; 565 bit Offset = ?; 566 int Subtarget = SIEncodingFamily.GFX9; 567 string AsmVariantName = "NonParsable"; 568} 569 570multiclass SM_Real_Loads_vi<bits<8> op> { 571 defvar ps = NAME; 572 def _IMM_vi : SMEM_Real_Load_vi <op, ps#"_IMM">; 573 def _SGPR_vi : SMEM_Real_Load_vi <op, ps#"_SGPR">; 574 def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR">, 575 SMEM_Real_SGPR_alt_gfx9; 576 let IsGFX9SpecificEncoding = true in 577 def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps#"_SGPR_IMM">; 578} 579 580class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> { 581 // encoding 582 bits<7> sdata; 583 584 let sdst = ?; 585 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); 586} 587 588class SMEM_Real_Store_vi <bits<8> op, string ps> 589 : SMEM_Real_Store_Base_vi <op, !cast<SM_Pseudo>(ps)>; 590 591multiclass SM_Real_Stores_vi<bits<8> op> { 592 defvar ps = NAME; 593 def _IMM_vi : SMEM_Real_Store_vi <op, ps#"_IMM">; 594 def _SGPR_vi : SMEM_Real_Store_vi <op, ps#"_SGPR">; 595 def _SGPR_alt_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR">, 596 SMEM_Real_SGPR_alt_gfx9; 597 let IsGFX9SpecificEncoding = true in 598 def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi <op, ps#"_SGPR_IMM">; 599} 600 601multiclass SM_Real_Probe_vi<bits<8> op> { 602 defvar ps = NAME; 603 def _IMM_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>; 604 def _SGPR_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>; 605 def _SGPR_alt_gfx9 606 : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>, 607 SMEM_Real_SGPR_alt_gfx9; 608 let IsGFX9SpecificEncoding = true in 609 def _SGPR_IMM_gfx9 610 : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>; 611} 612 613defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00>; 614defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01>; 615defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02>; 616defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03>; 617defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04>; 618defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08>; 619defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09>; 620defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a>; 621defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b>; 622defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c>; 623 624defm S_STORE_DWORD : SM_Real_Stores_vi <0x10>; 625defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11>; 626defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12>; 627 628defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18>; 629defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19>; 630defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a>; 631 632// These instructions use same encoding 633def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>; 634def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>; 635def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>; 636def S_DCACHE_WB_VOL_vi : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>; 637def S_MEMTIME_vi : SMEM_Real_vi <0x24, S_MEMTIME>; 638def S_MEMREALTIME_vi : SMEM_Real_vi <0x25, S_MEMREALTIME>; 639 640defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_vi <0x05>; 641defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_vi <0x06>; 642defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_vi <0x07>; 643 644defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_vi <0x15>; 645defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16>; 646defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17>; 647 648defm S_ATC_PROBE : SM_Real_Probe_vi <0x26>; 649defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27>; 650 651//===----------------------------------------------------------------------===// 652// GFX9 653//===----------------------------------------------------------------------===// 654 655class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps> 656 : SMEM_Real_vi <op, ps>, 657 AtomicNoRet <!subst("_RTN","",NAME), ps.glc> { 658 659 bits<7> sdata; 660 661 let Constraints = ps.Constraints; 662 let DisableEncoding = ps.DisableEncoding; 663 664 let cpol{CPolBit.GLC} = ps.glc; 665 let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0}); 666} 667 668multiclass SM_Real_Atomics_vi<bits<8> op> { 669 defvar ps = NAME; 670 def _IMM_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>; 671 def _SGPR_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>; 672 def _SGPR_alt_gfx9 673 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>, 674 SMEM_Real_SGPR_alt_gfx9; 675 let IsGFX9SpecificEncoding = true in 676 def _SGPR_IMM_gfx9 677 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>; 678 def _IMM_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>; 679 def _SGPR_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>; 680 def _SGPR_RTN_alt_gfx9 681 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>, 682 SMEM_Real_SGPR_alt_gfx9; 683 let IsGFX9SpecificEncoding = true in 684 def _SGPR_IMM_RTN_gfx9 685 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>; 686} 687 688defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40>; 689defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x41>; 690defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_vi <0x42>; 691defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_vi <0x43>; 692defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_vi <0x44>; 693defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_vi <0x45>; 694defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_vi <0x46>; 695defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_vi <0x47>; 696defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_vi <0x48>; 697defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_vi <0x49>; 698defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_vi <0x4a>; 699defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_vi <0x4b>; 700defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_vi <0x4c>; 701 702defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0x60>; 703defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0x61>; 704defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0x62>; 705defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0x63>; 706defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0x64>; 707defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0x65>; 708defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0x66>; 709defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0x67>; 710defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0x68>; 711defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0x69>; 712defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0x6a>; 713defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0x6b>; 714defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0x6c>; 715 716defm S_ATOMIC_SWAP : SM_Real_Atomics_vi <0x80>; 717defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x81>; 718defm S_ATOMIC_ADD : SM_Real_Atomics_vi <0x82>; 719defm S_ATOMIC_SUB : SM_Real_Atomics_vi <0x83>; 720defm S_ATOMIC_SMIN : SM_Real_Atomics_vi <0x84>; 721defm S_ATOMIC_UMIN : SM_Real_Atomics_vi <0x85>; 722defm S_ATOMIC_SMAX : SM_Real_Atomics_vi <0x86>; 723defm S_ATOMIC_UMAX : SM_Real_Atomics_vi <0x87>; 724defm S_ATOMIC_AND : SM_Real_Atomics_vi <0x88>; 725defm S_ATOMIC_OR : SM_Real_Atomics_vi <0x89>; 726defm S_ATOMIC_XOR : SM_Real_Atomics_vi <0x8a>; 727defm S_ATOMIC_INC : SM_Real_Atomics_vi <0x8b>; 728defm S_ATOMIC_DEC : SM_Real_Atomics_vi <0x8c>; 729 730defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0xa0>; 731defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0xa1>; 732defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0xa2>; 733defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0xa3>; 734defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0xa4>; 735defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0xa5>; 736defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0xa6>; 737defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0xa7>; 738defm S_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0xa8>; 739defm S_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0xa9>; 740defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0xaa>; 741defm S_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0xab>; 742defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac>; 743 744multiclass SM_Real_Discard_vi<bits<8> op> { 745 defvar ps = NAME; 746 def _IMM_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>; 747 def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>; 748 def _SGPR_alt_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>, 749 SMEM_Real_SGPR_alt_gfx9; 750 let IsGFX9SpecificEncoding = true in 751 def _SGPR_IMM_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR_IMM)>; 752} 753 754defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28>; 755defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29>; 756 757//===----------------------------------------------------------------------===// 758// CI 759//===----------------------------------------------------------------------===// 760 761def smrd_literal_offset : ImmOperand<i32, "SMRDLiteralOffset">; 762 763class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> : 764 SM_Real<ps>, 765 Enc64 { 766 767 let AssemblerPredicate = isGFX7Only; 768 let DecoderNamespace = "GFX7"; 769 let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol); 770 771 let Inst{7-0} = 0xff; 772 let Inst{8} = 0; 773 let Inst{14-9} = sbase{6-1}; 774 let Inst{21-15} = sdst{6-0}; 775 let Inst{26-22} = op; 776 let Inst{31-27} = 0x18; //encoding 777 let Inst{63-32} = offset{31-0}; 778} 779 780def S_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>; 781def S_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>; 782def S_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>; 783def S_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>; 784def S_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>; 785def S_BUFFER_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>; 786def S_BUFFER_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>; 787def S_BUFFER_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>; 788def S_BUFFER_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>; 789def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>; 790 791class SMRD_Real_ci <bits<5> op, SM_Pseudo ps> 792 : SM_Real<ps> 793 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> 794 , Enc32 { 795 796 let AssemblerPredicate = isGFX7Only; 797 let DecoderNamespace = "GFX7"; 798 799 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); 800 let Inst{8} = ps.has_offset; 801 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); 802 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); 803 let Inst{26-22} = op; 804 let Inst{31-27} = 0x18; //encoding 805} 806 807def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>; 808 809//===----------------------------------------------------------------------===// 810// Scalar Memory Patterns 811//===----------------------------------------------------------------------===// 812 813class SMRDLoadPat<PatFrag Op> : PatFrag <(ops node:$ptr), (Op node:$ptr), [{ return isUniformLoad(N);}]> { 814 let GISelPredicateCode = [{ 815 if (!MI.hasOneMemOperand()) 816 return false; 817 if (!isInstrUniform(MI)) 818 return false; 819 820 // FIXME: We should probably be caching this. 821 SmallVector<GEPInfo, 4> AddrInfo; 822 getAddrModeInfo(MI, MRI, AddrInfo); 823 824 if (hasVgprParts(AddrInfo)) 825 return false; 826 return true; 827 }]; 828} 829 830def smrd_load : SMRDLoadPat<load>; 831def smrd_extloadi8 : SMRDLoadPat<extloadi8>; 832def smrd_zextloadi8 : SMRDLoadPat<zextloadi8>; 833def smrd_sextloadi8 : SMRDLoadPat<sextloadi8>; 834def smrd_extloadi16 : SMRDLoadPat<extloadi16>; 835def smrd_zextloadi16 : SMRDLoadPat<zextloadi16>; 836def smrd_sextloadi16 : SMRDLoadPat<sextloadi16>; 837 838def smrd_prefetch : PatFrag <(ops node:$ptr, node:$rw, node:$loc, node:$type), 839 (prefetch node:$ptr, node:$rw, node:$loc, node:$type), 840 [{ return !N->getOperand(1)->isDivergent();}]> { 841 let GISelPredicateCode = [{ 842 return isInstrUniform(MI); 843 }]; 844} 845 846def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">; 847def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">; 848def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">; 849def SMRDSgprImm : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">; 850def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">; 851def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">; 852def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">; 853 854multiclass SMRD_Pattern <string Instr, ValueType vt, bit immci = true> { 855 856 // 1. IMM offset 857 def : GCNPat < 858 (smrd_load (SMRDImm i64:$sbase, i32:$offset)), 859 (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0)) 860 >; 861 862 // 2. 32-bit IMM offset on CI 863 if immci then def : GCNPat < 864 (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), 865 (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> { 866 let OtherPredicates = [isGFX7Only]; 867 } 868 869 // 3. SGPR offset 870 def : GCNPat < 871 (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)), 872 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0))> { 873 let OtherPredicates = [isNotGFX9Plus]; 874 } 875 def : GCNPat < 876 (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)), 877 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))> { 878 let OtherPredicates = [isGFX9Plus]; 879 } 880 881 // 4. SGPR+IMM offset 882 def : GCNPat < 883 (smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)), 884 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> { 885 let OtherPredicates = [isGFX9Plus]; 886 } 887 888 // 5. No offset 889 def : GCNPat < 890 (vt (smrd_load (i64 SReg_64:$sbase))), 891 (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0)) 892 >; 893} 894 895multiclass SMLoad_Pattern <string Instr, ValueType vt, bit immci = true> { 896 // 1. Offset as an immediate 897 def : GCNPat < 898 (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy), 899 (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> { 900 let AddedComplexity = 2; 901 } 902 903 // 2. 32-bit IMM offset on CI 904 if immci then def : GCNPat < 905 (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)), 906 (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset, 907 (extract_cpol $cachepolicy))> { 908 let OtherPredicates = [isGFX7Only]; 909 let AddedComplexity = 1; 910 } 911 912 // 3. Offset loaded in an 32bit SGPR 913 def : GCNPat < 914 (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy), 915 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy)))> { 916 let OtherPredicates = [isNotGFX9Plus]; 917 } 918 def : GCNPat < 919 (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy), 920 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> { 921 let OtherPredicates = [isGFX9Plus]; 922 } 923 924 // 4. Offset as an 32-bit SGPR + immediate 925 def : GCNPat < 926 (SIsbuffer_load v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset), 927 timm:$cachepolicy), 928 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset, 929 (extract_cpol $cachepolicy)))> { 930 let OtherPredicates = [isGFX9Plus]; 931 } 932} 933 934multiclass ScalarLoadWithExtensionPat <string Instr, SDPatternOperator node, ValueType vt> { 935 // 1. IMM offset 936 def : GCNPat < 937 (node (SMRDImm i64:$sbase, i32:$offset)), 938 (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))>{ 939 let OtherPredicates = [isGFX12Plus]; 940 } 941 942 // 2. SGPR offset 943 def : GCNPat < 944 (node (SMRDSgpr i64:$sbase, i32:$soffset)), 945 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, 0, 0))>{ 946 let OtherPredicates = [isGFX12Plus]; 947 } 948 949 // 3. SGPR+IMM offset 950 def : GCNPat < 951 (node (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)), 952 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))>{ 953 let OtherPredicates = [isGFX12Plus]; 954 } 955 956 // 4. No offset 957 def : GCNPat < 958 (vt (node (i64 SReg_64:$sbase))), 959 (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0))>{ 960 let OtherPredicates = [isGFX12Plus]; 961 } 962} 963 964multiclass ScalarBufferLoadIntrinsicPat <SDPatternOperator name, string Instr> { 965 966 // 1. Offset as an immediate 967 def : GCNPat < 968 (name v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy), 969 (i32 (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> { 970 let OtherPredicates = [isGFX12Plus]; 971 } 972 973 // 2. Offset as an 32-bit SGPR 974 def : GCNPat < 975 (name v4i32:$sbase, i32:$soffset, timm:$cachepolicy), 976 (i32 (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, 0, (extract_cpol $cachepolicy)))> { 977 let OtherPredicates = [isGFX12Plus]; 978 } 979 980 // 3. Offset as an 32-bit SGPR + immediate 981 def : GCNPat < 982 (name v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset), 983 timm:$cachepolicy), 984 (i32 (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset, 985 (extract_cpol $cachepolicy)))> { 986 let OtherPredicates = [isGFX12Plus]; 987 } 988} 989 990// Global and constant loads can be selected to either MUBUF or SMRD 991// instructions, but SMRD instructions are faster so we want the instruction 992// selector to prefer those. 993let AddedComplexity = 100 in { 994 995defm : ScalarLoadWithExtensionPat <"S_LOAD_U8", smrd_extloadi8, i32>; 996defm : ScalarLoadWithExtensionPat <"S_LOAD_U8", smrd_zextloadi8, i32>; 997defm : ScalarLoadWithExtensionPat <"S_LOAD_I8", smrd_sextloadi8, i32>; 998defm : ScalarLoadWithExtensionPat <"S_LOAD_U16", smrd_extloadi16, i32>; 999defm : ScalarLoadWithExtensionPat <"S_LOAD_U16", smrd_zextloadi16, i32>; 1000defm : ScalarLoadWithExtensionPat <"S_LOAD_I16", smrd_sextloadi16, i32>; 1001defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_byte, "S_BUFFER_LOAD_I8">; 1002defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ubyte, "S_BUFFER_LOAD_U8">; 1003defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_short, "S_BUFFER_LOAD_I16">; 1004defm : ScalarBufferLoadIntrinsicPat <SIsbuffer_load_ushort, "S_BUFFER_LOAD_U16">; 1005 1006foreach vt = Reg32Types.types in { 1007defm : SMRD_Pattern <"S_LOAD_DWORD", vt>; 1008} 1009 1010foreach vt = SReg_64.RegTypes in { 1011defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>; 1012} 1013 1014foreach vt = SReg_96.RegTypes in { 1015defm : SMRD_Pattern <"S_LOAD_DWORDX3", vt, false>; 1016} 1017 1018foreach vt = SReg_128.RegTypes in { 1019defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>; 1020} 1021 1022foreach vt = SReg_256.RegTypes in { 1023defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>; 1024} 1025 1026foreach vt = SReg_512.RegTypes in { 1027defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>; 1028} 1029 1030} // End let AddedComplexity = 100 1031 1032defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>; 1033defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>; 1034defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3", v3i32, false>; 1035defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>; 1036defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>; 1037defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>; 1038 1039defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>; 1040defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>; 1041defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX3", v3f32, false>; 1042defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>; 1043defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>; 1044defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>; 1045 1046let OtherPredicates = [HasSMemTimeInst] in { 1047def : GCNPat < 1048 (i64 (readcyclecounter)), 1049 (S_MEMTIME) 1050>; 1051} // let OtherPredicates = [HasSMemTimeInst] 1052 1053let OtherPredicates = [HasShaderCyclesRegister] in { 1054def : GCNPat < 1055 (i64 (readcyclecounter)), 1056 (REG_SEQUENCE SReg_64, 1057 (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0, 1058 (S_MOV_B32 (i32 0)), sub1)> { 1059 // Prefer this to s_memtime because it has lower and more predictable latency. 1060 let AddedComplexity = 1; 1061} 1062} // let OtherPredicates = [HasShaderCyclesRegister] 1063 1064def i32imm_zero : TImmLeaf <i32, [{ 1065 return Imm == 0; 1066}]>; 1067 1068def i32imm_one : TImmLeaf <i32, [{ 1069 return Imm == 1; 1070}]>; 1071 1072multiclass SMPrefetchPat<string type, TImmLeaf cache_type> { 1073 def : GCNPat < 1074 (smrd_prefetch (SMRDImm i64:$sbase, i32:$offset), timm, timm, cache_type), 1075 (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, $offset, (i32 SGPR_NULL), (i8 0)) 1076 >; 1077 1078 def : GCNPat < 1079 (smrd_prefetch (i64 SReg_64:$sbase), timm, timm, cache_type), 1080 (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) $sbase, 0, (i32 SGPR_NULL), (i8 0)) 1081 >; 1082 1083 def : GCNPat < 1084 (smrd_prefetch (i32 SReg_32:$sbase), timm, timm, cache_type), 1085 (!cast<SM_Prefetch_Pseudo>("S_PREFETCH_"#type) 1086 (i64 (REG_SEQUENCE SReg_64, $sbase, sub0, (i32 (S_MOV_B32 (i32 0))), sub1)), 1087 0, (i32 SGPR_NULL), (i8 0)) 1088 >; 1089} 1090 1091defm : SMPrefetchPat<"INST", i32imm_zero>; 1092defm : SMPrefetchPat<"DATA", i32imm_one>; 1093 1094//===----------------------------------------------------------------------===// 1095// GFX10. 1096//===----------------------------------------------------------------------===// 1097 1098class SMEM_Real_10Plus_common<bits<8> op, SM_Pseudo ps, string opName, 1099 int subtarget, RegisterWithSubRegs sgpr_null> : 1100 SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 { 1101 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); 1102 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); 1103 let Inst{25-18} = op; 1104 let Inst{31-26} = 0x3d; 1105 // There are SMEM instructions that do not employ any of the offset 1106 // fields, in which case we need them to remain undefined. 1107 let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?)); 1108 let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, 1109 !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?)); 1110} 1111 1112class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> 1113 : SMEM_Real_10Plus_common<op, ps, ps.Mnemonic, SIEncodingFamily.GFX10, 1114 SGPR_NULL_gfxpre11> { 1115 let AssemblerPredicate = isGFX10Only; 1116 let DecoderNamespace = "GFX10"; 1117 let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?); 1118 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); 1119} 1120 1121class SMEM_Real_Load_gfx10<bits<8> op, string ps> 1122 : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps)>; 1123 1124multiclass SM_Real_Loads_gfx10<bits<8> op> { 1125 defvar ps = NAME; 1126 def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_IMM">; 1127 def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_SGPR">; 1128 def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps#"_SGPR_IMM">; 1129} 1130 1131class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> { 1132 bits<7> sdata; 1133 1134 let sdst = ?; 1135 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); 1136} 1137 1138multiclass SM_Real_Stores_gfx10<bits<8> op> { 1139 defvar ps = NAME; 1140 defvar immPs = !cast<SM_Store_Pseudo>(ps#_IMM); 1141 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs>; 1142 1143 defvar sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR); 1144 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs>; 1145 1146 defvar sgprImmPs = !cast<SM_Store_Pseudo>(ps#_SGPR_IMM); 1147 def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, sgprImmPs>; 1148} 1149 1150defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000>; 1151defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001>; 1152defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002>; 1153defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003>; 1154defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004>; 1155 1156let SubtargetPredicate = HasScalarFlatScratchInsts in { 1157defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005>; 1158defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006>; 1159defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007>; 1160} // End SubtargetPredicate = HasScalarFlatScratchInsts 1161 1162defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008>; 1163defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009>; 1164defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a>; 1165defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b>; 1166defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c>; 1167 1168let SubtargetPredicate = HasScalarStores in { 1169defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010>; 1170defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011>; 1171defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012>; 1172let OtherPredicates = [HasScalarFlatScratchInsts] in { 1173defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015>; 1174defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016>; 1175defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017>; 1176} // End OtherPredicates = [HasScalarFlatScratchInsts] 1177defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018>; 1178defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019>; 1179defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a>; 1180} // End SubtargetPredicate = HasScalarStores 1181 1182def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>; 1183def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>; 1184def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>; 1185def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>; 1186def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>; 1187 1188let SubtargetPredicate = HasScalarStores in { 1189def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>; 1190} // End SubtargetPredicate = HasScalarStores 1191 1192multiclass SM_Real_Probe_gfx10<bits<8> op> { 1193 defvar ps = NAME; 1194 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>; 1195 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>; 1196 def _SGPR_IMM_gfx10 1197 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>; 1198} 1199 1200defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26>; 1201defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27>; 1202 1203class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps> 1204 : SMEM_Real_gfx10 <op, ps>, 1205 AtomicNoRet <!subst("_RTN","",NAME), ps.glc> { 1206 1207 bits<7> sdata; 1208 1209 let Constraints = ps.Constraints; 1210 let DisableEncoding = ps.DisableEncoding; 1211 1212 let cpol{CPolBit.GLC} = ps.glc; 1213 1214 let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); 1215 let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0}); 1216} 1217 1218multiclass SM_Real_Atomics_gfx10<bits<8> op> { 1219 defvar ps = NAME; 1220 def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>; 1221 def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>; 1222 def _SGPR_IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>; 1223 def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>; 1224 def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>; 1225 def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>; 1226} 1227 1228let SubtargetPredicate = HasScalarAtomics in { 1229 1230defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40>; 1231defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41>; 1232defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42>; 1233defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43>; 1234defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44>; 1235defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45>; 1236defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46>; 1237defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47>; 1238defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48>; 1239defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49>; 1240defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a>; 1241defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b>; 1242defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c>; 1243 1244defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60>; 1245defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61>; 1246defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62>; 1247defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63>; 1248defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64>; 1249defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65>; 1250defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66>; 1251defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67>; 1252defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68>; 1253defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69>; 1254defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a>; 1255defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b>; 1256defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c>; 1257 1258defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80>; 1259defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81>; 1260defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82>; 1261defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83>; 1262defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84>; 1263defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85>; 1264defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86>; 1265defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87>; 1266defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88>; 1267defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89>; 1268defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a>; 1269defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b>; 1270defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c>; 1271 1272defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0>; 1273defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1>; 1274defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2>; 1275defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3>; 1276defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4>; 1277defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5>; 1278defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6>; 1279defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7>; 1280defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8>; 1281defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9>; 1282defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa>; 1283defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab>; 1284defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac>; 1285 1286multiclass SM_Real_Discard_gfx10<bits<8> op> { 1287 defvar ps = NAME; 1288 def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>; 1289 def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>; 1290 def _SGPR_IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>; 1291} 1292 1293defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28>; 1294defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29>; 1295 1296} // End SubtargetPredicate = HasScalarAtomics 1297 1298def SMInfoTable : GenericTable { 1299 let FilterClass = "SM_Real"; 1300 let CppTypeName = "SMInfo"; 1301 let Fields = ["Opcode", "is_buffer"]; 1302 1303 let PrimaryKey = ["Opcode"]; 1304 let PrimaryKeyName = "getSMEMOpcodeHelper"; 1305} 1306 1307//===----------------------------------------------------------------------===// 1308// GFX11. 1309//===----------------------------------------------------------------------===// 1310 1311class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> : 1312 SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11, 1313 SGPR_NULL_gfx11plus> { 1314 let AssemblerPredicate = isGFX11Only; 1315 let DecoderNamespace = "GFX11"; 1316 let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); 1317 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0); 1318} 1319 1320class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName> : 1321 SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps), opName>; 1322 1323multiclass SM_Real_Loads_gfx11<bits<8> op, string ps> { 1324 defvar opName = !tolower(NAME); 1325 def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_IMM", opName>; 1326 def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR", opName>; 1327 def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps#"_SGPR_IMM", opName>; 1328 def : MnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName>, 1329 Requires<[isGFX11Plus]>; 1330} 1331 1332defm S_LOAD_B32 : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD">; 1333defm S_LOAD_B64 : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2">; 1334defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4">; 1335defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8">; 1336defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16">; 1337 1338defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD">; 1339defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2">; 1340defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4">; 1341defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8">; 1342defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16">; 1343 1344def S_GL1_INV_gfx11 : SMEM_Real_gfx11<0x020, S_GL1_INV>; 1345def S_DCACHE_INV_gfx11 : SMEM_Real_gfx11<0x021, S_DCACHE_INV>; 1346 1347class SMEM_Real_Store_gfx11 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx11<op, ps> { 1348 // encoding 1349 bits<7> sdata; 1350 1351 let sdst = ?; 1352 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); 1353} 1354 1355multiclass SM_Real_Probe_gfx11<bits<8> op> { 1356 defvar ps = NAME; 1357 def _IMM_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>; 1358 def _SGPR_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>; 1359 def _SGPR_IMM_gfx11 1360 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>; 1361} 1362 1363defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22>; 1364defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23>; 1365 1366//===----------------------------------------------------------------------===// 1367// GFX12. 1368//===----------------------------------------------------------------------===// 1369 1370class SMEM_Real_gfx12Plus<bits<6> op, SM_Pseudo ps, string opName, 1371 int subtarget, RegisterWithSubRegs sgpr_null> : 1372 SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 { 1373 1374 let Inst{18-13} = op; 1375 let Inst{31-26} = 0x3d; 1376 1377 let Inst{55-32} = !if(ps.has_offset, offset{23-0}, !if(ps.has_soffset, 0, ?)); 1378 let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, 1379 !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?)); 1380} 1381 1382class SMEM_Real_gfx12<bits<6> op, SM_Pseudo ps, string opName = ps.Mnemonic> : 1383 SMEM_Real_gfx12Plus<op, ps, opName, SIEncodingFamily.GFX12, 1384 SGPR_NULL_gfx11plus> { 1385 let AssemblerPredicate = isGFX12Plus; 1386 let DecoderNamespace = "GFX12"; 1387 1388 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); 1389 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); 1390} 1391 1392class SMEM_Real_Prefetch_gfx12<bits<6> op, SM_Pseudo ps> : 1393 SMEM_Real_gfx12<op, ps> { 1394 bits<7> sdata; // Only 5 bits of sdata are supported. 1395 1396 let sdst = ?; 1397 let Inst{12-11} = 0; // Unused sdata bits. 1398 let Inst{10-6} = !if(ps.has_sdst, sdata{4-0}, ?); 1399} 1400 1401class SMEM_Real_Load_gfx12<bits<6> op, string ps, string opName, OffsetMode offsets> : 1402 SMEM_Real_gfx12<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> { 1403 RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; 1404 let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); 1405 1406 let Inst{22-21} = cpol{4-3}; // scope 1407 let Inst{24-23} = cpol{1-0}; // th - only lower 2 bits are supported 1408} 1409 1410multiclass SM_Real_Loads_gfx12<bits<6> op, string ps = NAME> { 1411 defvar opName = !tolower(NAME); 1412 def _IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, IMM_Offset>; 1413 def _SGPR_IMM_gfx12 : SMEM_Real_Load_gfx12<op, ps, opName, SGPR_IMM_Offset>; 1414} 1415 1416defm S_LOAD_B32 : SM_Real_Loads_gfx12<0x00, "S_LOAD_DWORD">; 1417defm S_LOAD_B64 : SM_Real_Loads_gfx12<0x01, "S_LOAD_DWORDX2">; 1418defm S_LOAD_B96 : SM_Real_Loads_gfx12<0x05, "S_LOAD_DWORDX3">; 1419defm S_LOAD_B128 : SM_Real_Loads_gfx12<0x02, "S_LOAD_DWORDX4">; 1420defm S_LOAD_B256 : SM_Real_Loads_gfx12<0x03, "S_LOAD_DWORDX8">; 1421defm S_LOAD_B512 : SM_Real_Loads_gfx12<0x04, "S_LOAD_DWORDX16">; 1422 1423defm S_LOAD_I8 : SM_Real_Loads_gfx12<0x08>; 1424defm S_LOAD_U8 : SM_Real_Loads_gfx12<0x09>; 1425defm S_LOAD_I16 : SM_Real_Loads_gfx12<0x0a>; 1426defm S_LOAD_U16 : SM_Real_Loads_gfx12<0x0b>; 1427 1428defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx12<0x10, "S_BUFFER_LOAD_DWORD">; 1429defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx12<0x11, "S_BUFFER_LOAD_DWORDX2">; 1430defm S_BUFFER_LOAD_B96 : SM_Real_Loads_gfx12<0x15, "S_BUFFER_LOAD_DWORDX3">; 1431defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx12<0x12, "S_BUFFER_LOAD_DWORDX4">; 1432defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx12<0x13, "S_BUFFER_LOAD_DWORDX8">; 1433defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx12<0x14, "S_BUFFER_LOAD_DWORDX16">; 1434 1435defm S_BUFFER_LOAD_I8 : SM_Real_Loads_gfx12<0x18>; 1436defm S_BUFFER_LOAD_U8 : SM_Real_Loads_gfx12<0x19>; 1437defm S_BUFFER_LOAD_I16 : SM_Real_Loads_gfx12<0x1a>; 1438defm S_BUFFER_LOAD_U16 : SM_Real_Loads_gfx12<0x1b>; 1439 1440def S_DCACHE_INV_gfx12 : SMEM_Real_gfx12<0x021, S_DCACHE_INV>; 1441 1442def S_PREFETCH_INST_gfx12 : SMEM_Real_Prefetch_gfx12<0x24, S_PREFETCH_INST>; 1443def S_PREFETCH_INST_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x25, S_PREFETCH_INST_PC_REL>; 1444def S_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x26, S_PREFETCH_DATA>; 1445def S_BUFFER_PREFETCH_DATA_gfx12 : SMEM_Real_Prefetch_gfx12<0x27, S_BUFFER_PREFETCH_DATA>; 1446def S_PREFETCH_DATA_PC_REL_gfx12 : SMEM_Real_Prefetch_gfx12<0x28, S_PREFETCH_DATA_PC_REL>; 1447 1448multiclass SMEM_Real_Probe_gfx12<bits<6> op> { 1449 defvar ps = NAME; 1450 def _IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_IMM)>; 1451 def _SGPR_IMM_gfx12 : SMEM_Real_Prefetch_gfx12<op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>; 1452} 1453 1454defm S_ATC_PROBE : SMEM_Real_Probe_gfx12<0x22>; 1455defm S_ATC_PROBE_BUFFER : SMEM_Real_Probe_gfx12<0x23>; 1456