1//===---- SMInstructions.td - Scalar Memory Instruction Definitions -------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def smrd_offset_8 : NamedOperandU32<"SMRDOffset8", 10 NamedMatchClass<"SMRDOffset8">> { 11 let OperandType = "OPERAND_IMMEDIATE"; 12} 13 14class SMEMOffset : NamedOperandU32<"SMEMOffset", 15 NamedMatchClass<"SMEMOffset">> { 16 let OperandType = "OPERAND_IMMEDIATE"; 17 let EncoderMethod = "getSMEMOffsetEncoding"; 18 let DecoderMethod = "decodeSMEMOffset"; 19} 20 21def smem_offset : SMEMOffset; 22 23def smem_offset_mod : SMEMOffset { 24 let PrintMethod = "printSMEMOffsetMod"; 25} 26 27//===----------------------------------------------------------------------===// 28// Scalar Memory classes 29//===----------------------------------------------------------------------===// 30 31class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> : 32 InstSI <outs, ins, "", pattern>, 33 SIMCInstr<opName, SIEncodingFamily.NONE> { 34 let isPseudo = 1; 35 let isCodeGenOnly = 1; 36 37 let LGKM_CNT = 1; 38 let SMRD = 1; 39 let mayStore = 0; 40 let mayLoad = 1; 41 let hasSideEffects = 0; 42 let UseNamedOperandTable = 1; 43 let SchedRW = [WriteSMEM]; 44 45 string Mnemonic = opName; 46 string AsmOperands = asmOps; 47 48 bits<1> has_sbase = 1; 49 bits<1> has_sdst = 1; 50 bit has_glc = 0; 51 bit has_dlc = 0; 52 bit has_offset = 0; 53 bit has_soffset = 0; 54 bit is_buffer = 0; 55} 56 57class SM_Real <SM_Pseudo ps, string opName = ps.Mnemonic> 58 : InstSI<ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands> { 59 60 let isPseudo = 0; 61 let isCodeGenOnly = 0; 62 63 Instruction Opcode = !cast<Instruction>(NAME); 64 65 // copy relevant pseudo op flags 66 let LGKM_CNT = ps.LGKM_CNT; 67 let SMRD = ps.SMRD; 68 let mayStore = ps.mayStore; 69 let mayLoad = ps.mayLoad; 70 let hasSideEffects = ps.hasSideEffects; 71 let UseNamedOperandTable = ps.UseNamedOperandTable; 72 let SchedRW = ps.SchedRW; 73 let SubtargetPredicate = ps.SubtargetPredicate; 74 let AsmMatchConverter = ps.AsmMatchConverter; 75 let IsAtomicRet = ps.IsAtomicRet; 76 let IsAtomicNoRet = ps.IsAtomicNoRet; 77 78 let TSFlags = ps.TSFlags; 79 80 bit is_buffer = ps.is_buffer; 81 82 // encoding 83 bits<7> sbase; 84 bits<7> sdst; 85 bits<32> offset; 86 bits<8> soffset; 87 bits<5> cpol; 88} 89 90class OffsetMode<bit hasOffset, bit hasSOffset, string variant, 91 dag ins, string asm> { 92 bit HasOffset = hasOffset; 93 bit HasSOffset = hasSOffset; 94 string Variant = variant; 95 dag Ins = ins; 96 string Asm = asm; 97} 98 99def IMM_Offset : OffsetMode<1, 0, "_IMM", (ins smem_offset:$offset), "$offset">; 100def SGPR_Offset : OffsetMode<0, 1, "_SGPR", (ins SReg_32:$soffset), "$soffset">; 101def SGPR_IMM_Offset : OffsetMode<1, 1, "_SGPR_IMM", 102 (ins SReg_32:$soffset, smem_offset_mod:$offset), 103 "$soffset$offset">; 104 105class SM_Probe_Pseudo <string opName, RegisterClass baseClass, OffsetMode offsets> 106 : SM_Pseudo<opName, (outs), 107 !con((ins i8imm:$sdata, baseClass:$sbase), offsets.Ins), 108 " $sdata, $sbase, " # offsets.Asm> { 109 let mayLoad = 0; 110 let mayStore = 0; 111 let has_glc = 0; 112 let LGKM_CNT = 0; 113 let ScalarStore = 0; 114 let hasSideEffects = 1; 115 let has_offset = offsets.HasOffset; 116 let has_soffset = offsets.HasSOffset; 117 let PseudoInstr = opName # offsets.Variant; 118} 119 120class SM_Load_Pseudo <string opName, RegisterClass baseClass, 121 RegisterClass dstClass, OffsetMode offsets> 122 : SM_Pseudo<opName, (outs dstClass:$sdst), 123 !con((ins baseClass:$sbase), offsets.Ins, (ins CPol:$cpol)), 124 " $sdst, $sbase, " # offsets.Asm # "$cpol", []> { 125 RegisterClass BaseClass = baseClass; 126 let mayLoad = 1; 127 let mayStore = 0; 128 let has_glc = 1; 129 let has_dlc = 1; 130 let has_offset = offsets.HasOffset; 131 let has_soffset = offsets.HasSOffset; 132 let PseudoInstr = opName # offsets.Variant; 133} 134 135class SM_Store_Pseudo <string opName, RegisterClass baseClass, 136 RegisterClass srcClass, OffsetMode offsets> 137 : SM_Pseudo<opName, (outs), !con((ins srcClass:$sdata, baseClass:$sbase), 138 offsets.Ins, (ins CPol:$cpol)), 139 " $sdata, $sbase, " # offsets.Asm # "$cpol"> { 140 RegisterClass BaseClass = baseClass; 141 RegisterClass SrcClass = srcClass; 142 let mayLoad = 0; 143 let mayStore = 1; 144 let has_glc = 1; 145 let has_dlc = 1; 146 let has_offset = offsets.HasOffset; 147 let has_soffset = offsets.HasSOffset; 148 let ScalarStore = 1; 149 let PseudoInstr = opName # offsets.Variant; 150} 151 152class SM_Discard_Pseudo <string opName, OffsetMode offsets> 153 : SM_Pseudo<opName, (outs), !con((ins SReg_64:$sbase), offsets.Ins), 154 " $sbase, " # offsets.Asm> { 155 let mayLoad = 0; 156 let mayStore = 0; 157 let has_glc = 0; 158 let has_sdst = 0; 159 let ScalarStore = 0; 160 let hasSideEffects = 1; 161 let has_offset = offsets.HasOffset; 162 let has_soffset = offsets.HasSOffset; 163 let PseudoInstr = opName # offsets.Variant; 164} 165 166multiclass SM_Pseudo_Loads<string opName, 167 RegisterClass baseClass, 168 RegisterClass dstClass> { 169 def _IMM : SM_Load_Pseudo <opName, baseClass, dstClass, IMM_Offset>; 170 def _SGPR : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_Offset>; 171 def _SGPR_IMM : SM_Load_Pseudo <opName, baseClass, dstClass, SGPR_IMM_Offset>; 172} 173 174multiclass SM_Pseudo_Stores<string opName, 175 RegisterClass baseClass, 176 RegisterClass srcClass> { 177 def _IMM : SM_Store_Pseudo <opName, baseClass, srcClass, IMM_Offset>; 178 def _SGPR : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_Offset>; 179 def _SGPR_IMM : SM_Store_Pseudo <opName, baseClass, srcClass, SGPR_IMM_Offset>; 180} 181 182multiclass SM_Pseudo_Discards<string opName> { 183 def _IMM : SM_Discard_Pseudo <opName, IMM_Offset>; 184 def _SGPR : SM_Discard_Pseudo <opName, SGPR_Offset>; 185 def _SGPR_IMM : SM_Discard_Pseudo <opName, SGPR_IMM_Offset>; 186} 187 188class SM_Time_Pseudo<string opName, SDPatternOperator node = null_frag> : SM_Pseudo< 189 opName, (outs SReg_64_XEXEC:$sdst), (ins), 190 " $sdst", [(set i64:$sdst, (node))]> { 191 let hasSideEffects = 1; 192 193 let mayStore = 0; 194 let mayLoad = 0; 195 let has_sbase = 0; 196} 197 198class SM_Inval_Pseudo <string opName, SDPatternOperator node = null_frag> : SM_Pseudo< 199 opName, (outs), (ins), "", [(node)]> { 200 let hasSideEffects = 1; 201 let mayLoad = 0; 202 let mayStore = 0; 203 let has_sdst = 0; 204 let has_sbase = 0; 205} 206 207multiclass SM_Pseudo_Probe<string opName, RegisterClass baseClass> { 208 def _IMM : SM_Probe_Pseudo <opName, baseClass, IMM_Offset>; 209 def _SGPR : SM_Probe_Pseudo <opName, baseClass, SGPR_Offset>; 210 def _SGPR_IMM : SM_Probe_Pseudo <opName, baseClass, SGPR_IMM_Offset>; 211} 212 213class SM_WaveId_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo< 214 opName, (outs SReg_32_XM0_XEXEC:$sdst), (ins), 215 " $sdst", [(set i32:$sdst, (node))]> { 216 let hasSideEffects = 1; 217 let mayStore = 0; 218 let mayLoad = 0; 219 let has_sbase = 0; 220} 221 222//===----------------------------------------------------------------------===// 223// Scalar Atomic Memory Classes 224//===----------------------------------------------------------------------===// 225 226class SM_Atomic_Pseudo <string opName, 227 dag outs, dag ins, string asmOps, bit isRet> 228 : SM_Pseudo<opName, outs, ins, asmOps, []> { 229 230 bit glc = isRet; 231 232 let mayLoad = 1; 233 let mayStore = 1; 234 let has_glc = 1; 235 let has_dlc = 1; 236 let has_soffset = 1; 237 238 // Should these be set? 239 let ScalarStore = 1; 240 let hasSideEffects = 1; 241 let maybeAtomic = 1; 242 243 let IsAtomicNoRet = !not(isRet); 244 let IsAtomicRet = isRet; 245 246 let AsmMatchConverter = "cvtSMEMAtomic"; 247} 248 249class SM_Pseudo_Atomic<string opName, 250 RegisterClass baseClass, 251 RegisterClass dataClass, 252 OffsetMode offsets, 253 bit isRet, 254 string opNameWithSuffix = 255 opName # offsets.Variant # !if(isRet, "_RTN", ""), 256 Operand CPolTy = !if(isRet, CPol_GLC1, CPol)> : 257 SM_Atomic_Pseudo<opName, 258 !if(isRet, (outs dataClass:$sdst), (outs)), 259 !con((ins dataClass:$sdata, baseClass:$sbase), offsets.Ins, 260 (ins CPolTy:$cpol)), 261 !if(isRet, " $sdst", " $sdata") # 262 ", $sbase, " # offsets.Asm # "$cpol", 263 isRet>, 264 AtomicNoRet <opNameWithSuffix, isRet> { 265 let has_offset = offsets.HasOffset; 266 let has_soffset = offsets.HasSOffset; 267 let PseudoInstr = opNameWithSuffix; 268 269 let Constraints = !if(isRet, "$sdst = $sdata", ""); 270 let DisableEncoding = !if(isRet, "$sdata", ""); 271} 272 273multiclass SM_Pseudo_Atomics<string opName, 274 RegisterClass baseClass, 275 RegisterClass dataClass> { 276 def _IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 0>; 277 def _SGPR : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 0>; 278 def _SGPR_IMM : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 0>; 279 def _IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, IMM_Offset, 1>; 280 def _SGPR_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_Offset, 1>; 281 def _SGPR_IMM_RTN : SM_Pseudo_Atomic <opName, baseClass, dataClass, SGPR_IMM_Offset, 1>; 282} 283 284//===----------------------------------------------------------------------===// 285// Scalar Memory Instructions 286//===----------------------------------------------------------------------===// 287 288// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit 289// SMRD instructions, because the SReg_32_XM0 register class does not include M0 290// and writing to M0 from an SMRD instruction will hang the GPU. 291 292// XXX - SMEM instructions do not allow exec for data operand, but 293// does sdst for SMRD on SI/CI? 294defm S_LOAD_DWORD : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0_XEXEC>; 295defm S_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64_XEXEC>; 296defm S_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>; 297defm S_LOAD_DWORDX8 : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>; 298defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>; 299 300let is_buffer = 1 in { 301defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads < 302 "s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC 303>; 304 305// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on 306// SI/CI, bit disallowed for SMEM on VI. 307defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads < 308 "s_buffer_load_dwordx2", SReg_128, SReg_64_XEXEC 309>; 310 311defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads < 312 "s_buffer_load_dwordx4", SReg_128, SReg_128 313>; 314 315defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads < 316 "s_buffer_load_dwordx8", SReg_128, SReg_256 317>; 318 319defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads < 320 "s_buffer_load_dwordx16", SReg_128, SReg_512 321>; 322} 323 324let SubtargetPredicate = HasScalarStores in { 325defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>; 326defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>; 327defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>; 328 329let is_buffer = 1 in { 330defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores < 331 "s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC 332>; 333 334defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores < 335 "s_buffer_store_dwordx2", SReg_128, SReg_64_XEXEC 336>; 337 338defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores < 339 "s_buffer_store_dwordx4", SReg_128, SReg_128 340>; 341} 342} // End SubtargetPredicate = HasScalarStores 343 344let SubtargetPredicate = HasSMemTimeInst in 345def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>; 346def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>; 347 348let SubtargetPredicate = isGFX7GFX8GFX9 in { 349def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>; 350} // let SubtargetPredicate = isGFX7GFX8GFX9 351 352let SubtargetPredicate = isGFX8Plus in { 353let OtherPredicates = [HasScalarStores] in { 354def S_DCACHE_WB : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>; 355def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>; 356} // End OtherPredicates = [HasScalarStores] 357 358defm S_ATC_PROBE : SM_Pseudo_Probe <"s_atc_probe", SReg_64>; 359let is_buffer = 1 in { 360defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <"s_atc_probe_buffer", SReg_128>; 361} 362} // SubtargetPredicate = isGFX8Plus 363 364let SubtargetPredicate = HasSMemRealTime in 365def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>; 366 367let SubtargetPredicate = isGFX10Plus in 368def S_GL1_INV : SM_Inval_Pseudo<"s_gl1_inv">; 369let SubtargetPredicate = HasGetWaveIdInst in 370def S_GET_WAVEID_IN_WORKGROUP : SM_WaveId_Pseudo <"s_get_waveid_in_workgroup", int_amdgcn_s_get_waveid_in_workgroup>; 371 372 373let SubtargetPredicate = HasScalarFlatScratchInsts, Uses = [FLAT_SCR] in { 374defm S_SCRATCH_LOAD_DWORD : SM_Pseudo_Loads <"s_scratch_load_dword", SReg_64, SReg_32_XM0_XEXEC>; 375defm S_SCRATCH_LOAD_DWORDX2 : SM_Pseudo_Loads <"s_scratch_load_dwordx2", SReg_64, SReg_64_XEXEC>; 376defm S_SCRATCH_LOAD_DWORDX4 : SM_Pseudo_Loads <"s_scratch_load_dwordx4", SReg_64, SReg_128>; 377 378defm S_SCRATCH_STORE_DWORD : SM_Pseudo_Stores <"s_scratch_store_dword", SReg_64, SReg_32_XM0_XEXEC>; 379defm S_SCRATCH_STORE_DWORDX2 : SM_Pseudo_Stores <"s_scratch_store_dwordx2", SReg_64, SReg_64_XEXEC>; 380defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <"s_scratch_store_dwordx4", SReg_64, SReg_128>; 381} // SubtargetPredicate = HasScalarFlatScratchInsts 382 383let SubtargetPredicate = HasScalarAtomics in { 384 385let is_buffer = 1 in { 386defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_buffer_atomic_swap", SReg_128, SReg_32_XM0_XEXEC>; 387defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap", SReg_128, SReg_64_XEXEC>; 388defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <"s_buffer_atomic_add", SReg_128, SReg_32_XM0_XEXEC>; 389defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <"s_buffer_atomic_sub", SReg_128, SReg_32_XM0_XEXEC>; 390defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <"s_buffer_atomic_smin", SReg_128, SReg_32_XM0_XEXEC>; 391defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <"s_buffer_atomic_umin", SReg_128, SReg_32_XM0_XEXEC>; 392defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <"s_buffer_atomic_smax", SReg_128, SReg_32_XM0_XEXEC>; 393defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <"s_buffer_atomic_umax", SReg_128, SReg_32_XM0_XEXEC>; 394defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <"s_buffer_atomic_and", SReg_128, SReg_32_XM0_XEXEC>; 395defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <"s_buffer_atomic_or", SReg_128, SReg_32_XM0_XEXEC>; 396defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <"s_buffer_atomic_xor", SReg_128, SReg_32_XM0_XEXEC>; 397defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <"s_buffer_atomic_inc", SReg_128, SReg_32_XM0_XEXEC>; 398defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <"s_buffer_atomic_dec", SReg_128, SReg_32_XM0_XEXEC>; 399 400defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_swap_x2", SReg_128, SReg_64_XEXEC>; 401defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_cmpswap_x2", SReg_128, SReg_128>; 402defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_add_x2", SReg_128, SReg_64_XEXEC>; 403defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_sub_x2", SReg_128, SReg_64_XEXEC>; 404defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_smin_x2", SReg_128, SReg_64_XEXEC>; 405defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_umin_x2", SReg_128, SReg_64_XEXEC>; 406defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_smax_x2", SReg_128, SReg_64_XEXEC>; 407defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_umax_x2", SReg_128, SReg_64_XEXEC>; 408defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_and_x2", SReg_128, SReg_64_XEXEC>; 409defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_or_x2", SReg_128, SReg_64_XEXEC>; 410defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_xor_x2", SReg_128, SReg_64_XEXEC>; 411defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_inc_x2", SReg_128, SReg_64_XEXEC>; 412defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_buffer_atomic_dec_x2", SReg_128, SReg_64_XEXEC>; 413} 414 415defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <"s_atomic_swap", SReg_64, SReg_32_XM0_XEXEC>; 416defm S_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <"s_atomic_cmpswap", SReg_64, SReg_64_XEXEC>; 417defm S_ATOMIC_ADD : SM_Pseudo_Atomics <"s_atomic_add", SReg_64, SReg_32_XM0_XEXEC>; 418defm S_ATOMIC_SUB : SM_Pseudo_Atomics <"s_atomic_sub", SReg_64, SReg_32_XM0_XEXEC>; 419defm S_ATOMIC_SMIN : SM_Pseudo_Atomics <"s_atomic_smin", SReg_64, SReg_32_XM0_XEXEC>; 420defm S_ATOMIC_UMIN : SM_Pseudo_Atomics <"s_atomic_umin", SReg_64, SReg_32_XM0_XEXEC>; 421defm S_ATOMIC_SMAX : SM_Pseudo_Atomics <"s_atomic_smax", SReg_64, SReg_32_XM0_XEXEC>; 422defm S_ATOMIC_UMAX : SM_Pseudo_Atomics <"s_atomic_umax", SReg_64, SReg_32_XM0_XEXEC>; 423defm S_ATOMIC_AND : SM_Pseudo_Atomics <"s_atomic_and", SReg_64, SReg_32_XM0_XEXEC>; 424defm S_ATOMIC_OR : SM_Pseudo_Atomics <"s_atomic_or", SReg_64, SReg_32_XM0_XEXEC>; 425defm S_ATOMIC_XOR : SM_Pseudo_Atomics <"s_atomic_xor", SReg_64, SReg_32_XM0_XEXEC>; 426defm S_ATOMIC_INC : SM_Pseudo_Atomics <"s_atomic_inc", SReg_64, SReg_32_XM0_XEXEC>; 427defm S_ATOMIC_DEC : SM_Pseudo_Atomics <"s_atomic_dec", SReg_64, SReg_32_XM0_XEXEC>; 428 429defm S_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <"s_atomic_swap_x2", SReg_64, SReg_64_XEXEC>; 430defm S_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <"s_atomic_cmpswap_x2", SReg_64, SReg_128>; 431defm S_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <"s_atomic_add_x2", SReg_64, SReg_64_XEXEC>; 432defm S_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <"s_atomic_sub_x2", SReg_64, SReg_64_XEXEC>; 433defm S_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <"s_atomic_smin_x2", SReg_64, SReg_64_XEXEC>; 434defm S_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <"s_atomic_umin_x2", SReg_64, SReg_64_XEXEC>; 435defm S_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <"s_atomic_smax_x2", SReg_64, SReg_64_XEXEC>; 436defm S_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <"s_atomic_umax_x2", SReg_64, SReg_64_XEXEC>; 437defm S_ATOMIC_AND_X2 : SM_Pseudo_Atomics <"s_atomic_and_x2", SReg_64, SReg_64_XEXEC>; 438defm S_ATOMIC_OR_X2 : SM_Pseudo_Atomics <"s_atomic_or_x2", SReg_64, SReg_64_XEXEC>; 439defm S_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <"s_atomic_xor_x2", SReg_64, SReg_64_XEXEC>; 440defm S_ATOMIC_INC_X2 : SM_Pseudo_Atomics <"s_atomic_inc_x2", SReg_64, SReg_64_XEXEC>; 441defm S_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <"s_atomic_dec_x2", SReg_64, SReg_64_XEXEC>; 442 443} // let SubtargetPredicate = HasScalarAtomics 444 445let SubtargetPredicate = HasScalarAtomics in { 446defm S_DCACHE_DISCARD : SM_Pseudo_Discards <"s_dcache_discard">; 447defm S_DCACHE_DISCARD_X2 : SM_Pseudo_Discards <"s_dcache_discard_x2">; 448} 449 450//===----------------------------------------------------------------------===// 451// Targets 452//===----------------------------------------------------------------------===// 453 454//===----------------------------------------------------------------------===// 455// SI 456//===----------------------------------------------------------------------===// 457 458class SMRD_Real_si <bits<5> op, SM_Pseudo ps> 459 : SM_Real<ps> 460 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> 461 , Enc32 { 462 463 let AssemblerPredicate = isGFX6GFX7; 464 let DecoderNamespace = "GFX6GFX7"; 465 466 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); 467 let Inst{8} = ps.has_offset; 468 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); 469 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); 470 let Inst{26-22} = op; 471 let Inst{31-27} = 0x18; //encoding 472} 473 474multiclass SM_Real_Loads_si<bits<5> op, string ps, 475 SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM), 476 SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> { 477 478 def _IMM_si : SMRD_Real_si <op, immPs> { 479 let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, CPol:$cpol); 480 } 481 482 def _SGPR_si : SMRD_Real_si <op, sgprPs> { 483 let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); 484 } 485 486} 487 488defm S_LOAD_DWORD : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">; 489defm S_LOAD_DWORDX2 : SM_Real_Loads_si <0x01, "S_LOAD_DWORDX2">; 490defm S_LOAD_DWORDX4 : SM_Real_Loads_si <0x02, "S_LOAD_DWORDX4">; 491defm S_LOAD_DWORDX8 : SM_Real_Loads_si <0x03, "S_LOAD_DWORDX8">; 492defm S_LOAD_DWORDX16 : SM_Real_Loads_si <0x04, "S_LOAD_DWORDX16">; 493defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_si <0x08, "S_BUFFER_LOAD_DWORD">; 494defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_si <0x09, "S_BUFFER_LOAD_DWORDX2">; 495defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_si <0x0a, "S_BUFFER_LOAD_DWORDX4">; 496defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_si <0x0b, "S_BUFFER_LOAD_DWORDX8">; 497defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c, "S_BUFFER_LOAD_DWORDX16">; 498 499def S_MEMTIME_si : SMRD_Real_si <0x1e, S_MEMTIME>; 500def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>; 501 502 503//===----------------------------------------------------------------------===// 504// VI and GFX9. 505//===----------------------------------------------------------------------===// 506 507class SMEM_Real_vi <bits<8> op, SM_Pseudo ps> 508 : SM_Real<ps> 509 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> 510 , Enc64 { 511 field bit IsGFX9SpecificEncoding = false; 512 let AssemblerPredicate = !if(IsGFX9SpecificEncoding, isGFX9Only, isGFX8GFX9); 513 let DecoderNamespace = "GFX8"; 514 515 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); 516 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); 517 518 // Note that for GFX9 instructions with immediate offsets, soffset_en 519 // must be defined, whereas in GFX8 it's undefined in all cases, 520 // meaning GFX9 is not perfectly backward-compatible with GFX8, despite 521 // documentation suggesting otherwise. 522 field bit SOffsetEn = !if(IsGFX9SpecificEncoding, 523 !if(ps.has_offset, ps.has_soffset, !if(ps.has_soffset, 0, ?)), 524 ?); 525 let Inst{14} = SOffsetEn; 526 527 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); 528 529 // imm 530 // TODO: Shall not be defined if the instruction has no offset nor 531 // soffset. 532 let Inst{17} = ps.has_offset; 533 534 let Inst{25-18} = op; 535 let Inst{31-26} = 0x30; //encoding 536 537 // VI supports 20-bit unsigned offsets while GFX9+ supports 21-bit signed. 538 // Offset value is corrected accordingly when offset is encoded/decoded. 539 // TODO: Forbid non-M0 register offsets for GFX8 stores and atomics. 540 field bits<21> Offset; 541 let Offset{6-0} = !if(ps.has_offset, offset{6-0}, 542 !if(ps.has_soffset, soffset{6-0}, ?)); 543 let Offset{20-7} = !if(ps.has_offset, offset{20-7}, ?); 544 let Inst{52-32} = Offset; 545 546 // soffset 547 let Inst{63-57} = !if(!and(IsGFX9SpecificEncoding, ps.has_soffset), 548 soffset{6-0}, ?); 549} 550 551class SMEM_Real_Load_vi<bits<8> op, string ps, OffsetMode offsets> 552 : SMEM_Real_vi<op, !cast<SM_Pseudo>(ps # offsets.Variant)> { 553 RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; 554 let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); 555} 556 557// The alternative GFX9 SGPR encoding using soffset to encode the 558// offset register. Not available in assembler and goes to the GFX9 559// encoding family to avoid conflicts with the primary SGPR variant. 560class SMEM_Real_SGPR_alt_gfx9 { 561 bit IsGFX9SpecificEncoding = true; 562 bit SOffsetEn = 1; 563 bit Offset = ?; 564 int Subtarget = SIEncodingFamily.GFX9; 565 string AsmVariantName = "NonParsable"; 566} 567 568multiclass SM_Real_Loads_vi<bits<8> op, string ps> { 569 def _IMM_vi : SMEM_Real_Load_vi <op, ps, IMM_Offset>; 570 def _SGPR_vi : SMEM_Real_Load_vi <op, ps, SGPR_Offset>; 571 def _SGPR_alt_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_Offset>, 572 SMEM_Real_SGPR_alt_gfx9; 573 let IsGFX9SpecificEncoding = true in 574 def _SGPR_IMM_gfx9 : SMEM_Real_Load_vi <op, ps, SGPR_IMM_Offset>; 575} 576 577class SMEM_Real_Store_Base_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> { 578 // encoding 579 bits<7> sdata; 580 581 let sdst = ?; 582 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); 583} 584 585class SMEM_Real_Store_vi <bits<8> op, string ps, OffsetMode offsets> 586 : SMEM_Real_Store_Base_vi <op, !cast<SM_Pseudo>(ps # offsets.Variant)> { 587 RegisterClass SrcClass = !cast<SM_Store_Pseudo>(ps # offsets.Variant).SrcClass; 588 RegisterClass BaseClass = !cast<SM_Store_Pseudo>(ps # offsets.Variant).BaseClass; 589 let InOperandList = !con((ins SrcClass:$sdata, BaseClass:$sbase), 590 offsets.Ins, (ins CPol:$cpol)); 591} 592 593multiclass SM_Real_Stores_vi<bits<8> op, string ps> { 594 def _IMM_vi : SMEM_Real_Store_vi <op, ps, IMM_Offset>; 595 def _SGPR_vi : SMEM_Real_Store_vi <op, ps, SGPR_Offset>; 596 def _SGPR_alt_gfx9 : SMEM_Real_Store_vi <op, ps, SGPR_Offset>, 597 SMEM_Real_SGPR_alt_gfx9; 598 let IsGFX9SpecificEncoding = true in 599 def _SGPR_IMM_gfx9 : SMEM_Real_Store_vi <op, ps, SGPR_IMM_Offset>; 600} 601 602multiclass SM_Real_Probe_vi<bits<8> op, string ps> { 603 def _IMM_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>; 604 def _SGPR_vi : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>; 605 def _SGPR_alt_gfx9 606 : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>, 607 SMEM_Real_SGPR_alt_gfx9; 608 let IsGFX9SpecificEncoding = true in 609 def _SGPR_IMM_gfx9 610 : SMEM_Real_Store_Base_vi <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>; 611} 612 613defm S_LOAD_DWORD : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">; 614defm S_LOAD_DWORDX2 : SM_Real_Loads_vi <0x01, "S_LOAD_DWORDX2">; 615defm S_LOAD_DWORDX4 : SM_Real_Loads_vi <0x02, "S_LOAD_DWORDX4">; 616defm S_LOAD_DWORDX8 : SM_Real_Loads_vi <0x03, "S_LOAD_DWORDX8">; 617defm S_LOAD_DWORDX16 : SM_Real_Loads_vi <0x04, "S_LOAD_DWORDX16">; 618defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_vi <0x08, "S_BUFFER_LOAD_DWORD">; 619defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_vi <0x09, "S_BUFFER_LOAD_DWORDX2">; 620defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">; 621defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">; 622defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">; 623 624defm S_STORE_DWORD : SM_Real_Stores_vi <0x10, "S_STORE_DWORD">; 625defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11, "S_STORE_DWORDX2">; 626defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12, "S_STORE_DWORDX4">; 627 628defm S_BUFFER_STORE_DWORD : SM_Real_Stores_vi <0x18, "S_BUFFER_STORE_DWORD">; 629defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_vi <0x19, "S_BUFFER_STORE_DWORDX2">; 630defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_vi <0x1a, "S_BUFFER_STORE_DWORDX4">; 631 632// These instructions use same encoding 633def S_DCACHE_INV_vi : SMEM_Real_vi <0x20, S_DCACHE_INV>; 634def S_DCACHE_WB_vi : SMEM_Real_vi <0x21, S_DCACHE_WB>; 635def S_DCACHE_INV_VOL_vi : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>; 636def S_DCACHE_WB_VOL_vi : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>; 637def S_MEMTIME_vi : SMEM_Real_vi <0x24, S_MEMTIME>; 638def S_MEMREALTIME_vi : SMEM_Real_vi <0x25, S_MEMREALTIME>; 639 640defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_vi <0x05, "S_SCRATCH_LOAD_DWORD">; 641defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_vi <0x06, "S_SCRATCH_LOAD_DWORDX2">; 642defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_vi <0x07, "S_SCRATCH_LOAD_DWORDX4">; 643 644defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_vi <0x15, "S_SCRATCH_STORE_DWORD">; 645defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_vi <0x16, "S_SCRATCH_STORE_DWORDX2">; 646defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_vi <0x17, "S_SCRATCH_STORE_DWORDX4">; 647 648defm S_ATC_PROBE : SM_Real_Probe_vi <0x26, "S_ATC_PROBE">; 649defm S_ATC_PROBE_BUFFER : SM_Real_Probe_vi <0x27, "S_ATC_PROBE_BUFFER">; 650 651//===----------------------------------------------------------------------===// 652// GFX9 653//===----------------------------------------------------------------------===// 654 655class SMEM_Atomic_Real_vi <bits<8> op, SM_Atomic_Pseudo ps> 656 : SMEM_Real_vi <op, ps>, 657 AtomicNoRet <!subst("_RTN","",NAME), ps.glc> { 658 659 bits<7> sdata; 660 661 let Constraints = ps.Constraints; 662 let DisableEncoding = ps.DisableEncoding; 663 664 let cpol{CPolBit.GLC} = ps.glc; 665 let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0}); 666} 667 668multiclass SM_Real_Atomics_vi<bits<8> op, string ps> { 669 def _IMM_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>; 670 def _SGPR_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>; 671 def _SGPR_alt_gfx9 672 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>, 673 SMEM_Real_SGPR_alt_gfx9; 674 let IsGFX9SpecificEncoding = true in 675 def _SGPR_IMM_gfx9 676 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>; 677 def _IMM_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>; 678 def _SGPR_RTN_vi : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>; 679 def _SGPR_RTN_alt_gfx9 680 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>, 681 SMEM_Real_SGPR_alt_gfx9; 682 let IsGFX9SpecificEncoding = true in 683 def _SGPR_IMM_RTN_gfx9 684 : SMEM_Atomic_Real_vi <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>; 685} 686 687defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_vi <0x40, "S_BUFFER_ATOMIC_SWAP">; 688defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x41, "S_BUFFER_ATOMIC_CMPSWAP">; 689defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_vi <0x42, "S_BUFFER_ATOMIC_ADD">; 690defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_vi <0x43, "S_BUFFER_ATOMIC_SUB">; 691defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_vi <0x44, "S_BUFFER_ATOMIC_SMIN">; 692defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_vi <0x45, "S_BUFFER_ATOMIC_UMIN">; 693defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_vi <0x46, "S_BUFFER_ATOMIC_SMAX">; 694defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_vi <0x47, "S_BUFFER_ATOMIC_UMAX">; 695defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_vi <0x48, "S_BUFFER_ATOMIC_AND">; 696defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_vi <0x49, "S_BUFFER_ATOMIC_OR">; 697defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_vi <0x4a, "S_BUFFER_ATOMIC_XOR">; 698defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_vi <0x4b, "S_BUFFER_ATOMIC_INC">; 699defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_vi <0x4c, "S_BUFFER_ATOMIC_DEC">; 700 701defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0x60, "S_BUFFER_ATOMIC_SWAP_X2">; 702defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">; 703defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0x62, "S_BUFFER_ATOMIC_ADD_X2">; 704defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0x63, "S_BUFFER_ATOMIC_SUB_X2">; 705defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0x64, "S_BUFFER_ATOMIC_SMIN_X2">; 706defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0x65, "S_BUFFER_ATOMIC_UMIN_X2">; 707defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0x66, "S_BUFFER_ATOMIC_SMAX_X2">; 708defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0x67, "S_BUFFER_ATOMIC_UMAX_X2">; 709defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0x68, "S_BUFFER_ATOMIC_AND_X2">; 710defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0x69, "S_BUFFER_ATOMIC_OR_X2">; 711defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0x6a, "S_BUFFER_ATOMIC_XOR_X2">; 712defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0x6b, "S_BUFFER_ATOMIC_INC_X2">; 713defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0x6c, "S_BUFFER_ATOMIC_DEC_X2">; 714 715defm S_ATOMIC_SWAP : SM_Real_Atomics_vi <0x80, "S_ATOMIC_SWAP">; 716defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_vi <0x81, "S_ATOMIC_CMPSWAP">; 717defm S_ATOMIC_ADD : SM_Real_Atomics_vi <0x82, "S_ATOMIC_ADD">; 718defm S_ATOMIC_SUB : SM_Real_Atomics_vi <0x83, "S_ATOMIC_SUB">; 719defm S_ATOMIC_SMIN : SM_Real_Atomics_vi <0x84, "S_ATOMIC_SMIN">; 720defm S_ATOMIC_UMIN : SM_Real_Atomics_vi <0x85, "S_ATOMIC_UMIN">; 721defm S_ATOMIC_SMAX : SM_Real_Atomics_vi <0x86, "S_ATOMIC_SMAX">; 722defm S_ATOMIC_UMAX : SM_Real_Atomics_vi <0x87, "S_ATOMIC_UMAX">; 723defm S_ATOMIC_AND : SM_Real_Atomics_vi <0x88, "S_ATOMIC_AND">; 724defm S_ATOMIC_OR : SM_Real_Atomics_vi <0x89, "S_ATOMIC_OR">; 725defm S_ATOMIC_XOR : SM_Real_Atomics_vi <0x8a, "S_ATOMIC_XOR">; 726defm S_ATOMIC_INC : SM_Real_Atomics_vi <0x8b, "S_ATOMIC_INC">; 727defm S_ATOMIC_DEC : SM_Real_Atomics_vi <0x8c, "S_ATOMIC_DEC">; 728 729defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_vi <0xa0, "S_ATOMIC_SWAP_X2">; 730defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_vi <0xa1, "S_ATOMIC_CMPSWAP_X2">; 731defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_vi <0xa2, "S_ATOMIC_ADD_X2">; 732defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_vi <0xa3, "S_ATOMIC_SUB_X2">; 733defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_vi <0xa4, "S_ATOMIC_SMIN_X2">; 734defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_vi <0xa5, "S_ATOMIC_UMIN_X2">; 735defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_vi <0xa6, "S_ATOMIC_SMAX_X2">; 736defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_vi <0xa7, "S_ATOMIC_UMAX_X2">; 737defm S_ATOMIC_AND_X2 : SM_Real_Atomics_vi <0xa8, "S_ATOMIC_AND_X2">; 738defm S_ATOMIC_OR_X2 : SM_Real_Atomics_vi <0xa9, "S_ATOMIC_OR_X2">; 739defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_vi <0xaa, "S_ATOMIC_XOR_X2">; 740defm S_ATOMIC_INC_X2 : SM_Real_Atomics_vi <0xab, "S_ATOMIC_INC_X2">; 741defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_vi <0xac, "S_ATOMIC_DEC_X2">; 742 743multiclass SM_Real_Discard_vi<bits<8> op, string ps> { 744 def _IMM_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_IMM)>; 745 def _SGPR_vi : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>; 746 def _SGPR_alt_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR)>, 747 SMEM_Real_SGPR_alt_gfx9; 748 let IsGFX9SpecificEncoding = true in 749 def _SGPR_IMM_gfx9 : SMEM_Real_vi <op, !cast<SM_Discard_Pseudo>(ps#_SGPR_IMM)>; 750} 751 752defm S_DCACHE_DISCARD : SM_Real_Discard_vi <0x28, "S_DCACHE_DISCARD">; 753defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_vi <0x29, "S_DCACHE_DISCARD_X2">; 754 755//===----------------------------------------------------------------------===// 756// CI 757//===----------------------------------------------------------------------===// 758 759def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset", 760 NamedMatchClass<"SMRDLiteralOffset">> { 761 let OperandType = "OPERAND_IMMEDIATE"; 762} 763 764class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> : 765 SM_Real<ps>, 766 Enc64 { 767 768 let AssemblerPredicate = isGFX7Only; 769 let DecoderNamespace = "GFX7"; 770 let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, CPol:$cpol); 771 772 let Inst{7-0} = 0xff; 773 let Inst{8} = 0; 774 let Inst{14-9} = sbase{6-1}; 775 let Inst{21-15} = sdst{6-0}; 776 let Inst{26-22} = op; 777 let Inst{31-27} = 0x18; //encoding 778 let Inst{63-32} = offset{31-0}; 779} 780 781def S_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>; 782def S_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>; 783def S_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>; 784def S_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>; 785def S_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>; 786def S_BUFFER_LOAD_DWORD_IMM_ci : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>; 787def S_BUFFER_LOAD_DWORDX2_IMM_ci : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>; 788def S_BUFFER_LOAD_DWORDX4_IMM_ci : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>; 789def S_BUFFER_LOAD_DWORDX8_IMM_ci : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>; 790def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>; 791 792class SMRD_Real_ci <bits<5> op, SM_Pseudo ps> 793 : SM_Real<ps> 794 , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> 795 , Enc32 { 796 797 let AssemblerPredicate = isGFX7Only; 798 let DecoderNamespace = "GFX7"; 799 800 let Inst{7-0} = !if(ps.has_offset, offset{7-0}, !if(ps.has_soffset, soffset, ?)); 801 let Inst{8} = ps.has_offset; 802 let Inst{14-9} = !if(ps.has_sbase, sbase{6-1}, ?); 803 let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?); 804 let Inst{26-22} = op; 805 let Inst{31-27} = 0x18; //encoding 806} 807 808def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>; 809 810//===----------------------------------------------------------------------===// 811// Scalar Memory Patterns 812//===----------------------------------------------------------------------===// 813 814def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ return isUniformLoad(N);}]> { 815 let GISelPredicateCode = [{ 816 if (!MI.hasOneMemOperand()) 817 return false; 818 if (!isInstrUniform(MI)) 819 return false; 820 821 // FIXME: We should probably be caching this. 822 SmallVector<GEPInfo, 4> AddrInfo; 823 getAddrModeInfo(MI, MRI, AddrInfo); 824 825 if (hasVgprParts(AddrInfo)) 826 return false; 827 return true; 828 }]; 829} 830 831def SMRDImm : ComplexPattern<iPTR, 2, "SelectSMRDImm">; 832def SMRDImm32 : ComplexPattern<iPTR, 2, "SelectSMRDImm32">; 833def SMRDSgpr : ComplexPattern<iPTR, 2, "SelectSMRDSgpr">; 834def SMRDSgprImm : ComplexPattern<iPTR, 3, "SelectSMRDSgprImm">; 835def SMRDBufferImm : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm">; 836def SMRDBufferImm32 : ComplexPattern<iPTR, 1, "SelectSMRDBufferImm32">; 837def SMRDBufferSgprImm : ComplexPattern<iPTR, 2, "SelectSMRDBufferSgprImm">; 838 839multiclass SMRD_Pattern <string Instr, ValueType vt> { 840 841 // 1. IMM offset 842 def : GCNPat < 843 (smrd_load (SMRDImm i64:$sbase, i32:$offset)), 844 (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0)) 845 >; 846 847 // 2. 32-bit IMM offset on CI 848 def : GCNPat < 849 (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)), 850 (vt (!cast<InstSI>(Instr#"_IMM_ci") $sbase, $offset, 0))> { 851 let OtherPredicates = [isGFX7Only]; 852 } 853 854 // 3. SGPR offset 855 def : GCNPat < 856 (smrd_load (SMRDSgpr i64:$sbase, i32:$soffset)), 857 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $soffset, 0)) 858 >; 859 860 // 4. SGPR+IMM offset 861 def : GCNPat < 862 (smrd_load (SMRDSgprImm i64:$sbase, i32:$soffset, i32:$offset)), 863 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") $sbase, $soffset, $offset, 0))> { 864 let OtherPredicates = [isGFX9Plus]; 865 } 866 867 // 5. No offset 868 def : GCNPat < 869 (vt (smrd_load (i64 SReg_64:$sbase))), 870 (vt (!cast<SM_Pseudo>(Instr#"_IMM") i64:$sbase, 0, 0)) 871 >; 872} 873 874multiclass SMLoad_Pattern <string Instr, ValueType vt> { 875 // 1. Offset as an immediate 876 def : GCNPat < 877 (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm i32:$offset), timm:$cachepolicy), 878 (vt (!cast<SM_Pseudo>(Instr#"_IMM") SReg_128:$sbase, i32imm:$offset, (extract_cpol $cachepolicy)))> { 879 let AddedComplexity = 2; 880 } 881 882 // 2. 32-bit IMM offset on CI 883 def : GCNPat < 884 (vt (SIsbuffer_load v4i32:$sbase, (SMRDBufferImm32 i32:$offset), timm:$cachepolicy)), 885 (!cast<InstSI>(Instr#"_IMM_ci") SReg_128:$sbase, smrd_literal_offset:$offset, 886 (extract_cpol $cachepolicy))> { 887 let OtherPredicates = [isGFX7Only]; 888 let AddedComplexity = 1; 889 } 890 891 // 3. Offset loaded in an 32bit SGPR 892 def : GCNPat < 893 (SIsbuffer_load v4i32:$sbase, i32:$soffset, timm:$cachepolicy), 894 (vt (!cast<SM_Pseudo>(Instr#"_SGPR") SReg_128:$sbase, SReg_32:$soffset, (extract_cpol $cachepolicy))) 895 >; 896 897 // 4. Offset as an 32-bit SGPR + immediate 898 def : GCNPat < 899 (SIsbuffer_load v4i32:$sbase, (SMRDBufferSgprImm i32:$soffset, i32:$offset), 900 timm:$cachepolicy), 901 (vt (!cast<SM_Pseudo>(Instr#"_SGPR_IMM") SReg_128:$sbase, SReg_32:$soffset, i32imm:$offset, 902 (extract_cpol $cachepolicy)))> { 903 let OtherPredicates = [isGFX9Plus]; 904 } 905} 906 907// Global and constant loads can be selected to either MUBUF or SMRD 908// instructions, but SMRD instructions are faster so we want the instruction 909// selector to prefer those. 910let AddedComplexity = 100 in { 911 912foreach vt = Reg32Types.types in { 913defm : SMRD_Pattern <"S_LOAD_DWORD", vt>; 914} 915 916foreach vt = SReg_64.RegTypes in { 917defm : SMRD_Pattern <"S_LOAD_DWORDX2", vt>; 918} 919 920foreach vt = SReg_128.RegTypes in { 921defm : SMRD_Pattern <"S_LOAD_DWORDX4", vt>; 922} 923 924foreach vt = SReg_256.RegTypes in { 925defm : SMRD_Pattern <"S_LOAD_DWORDX8", vt>; 926} 927 928foreach vt = SReg_512.RegTypes in { 929defm : SMRD_Pattern <"S_LOAD_DWORDX16", vt>; 930} 931 932defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", i32>; 933defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2i32>; 934defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4i32>; 935defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8i32>; 936defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16i32>; 937 938defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORD", f32>; 939defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX2", v2f32>; 940defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX4", v4f32>; 941defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX8", v8f32>; 942defm : SMLoad_Pattern <"S_BUFFER_LOAD_DWORDX16", v16f32>; 943} // End let AddedComplexity = 100 944 945let OtherPredicates = [HasSMemTimeInst] in { 946def : GCNPat < 947 (i64 (readcyclecounter)), 948 (S_MEMTIME) 949>; 950} // let OtherPredicates = [HasSMemTimeInst] 951 952let OtherPredicates = [HasShaderCyclesRegister] in { 953def : GCNPat < 954 (i64 (readcyclecounter)), 955 (REG_SEQUENCE SReg_64, 956 (S_GETREG_B32 getHwRegImm<HWREG.SHADER_CYCLES, 0, -12>.ret), sub0, 957 (S_MOV_B32 (i32 0)), sub1)> { 958 // Prefer this to s_memtime because it has lower and more predictable latency. 959 let AddedComplexity = 1; 960} 961} // let OtherPredicates = [HasShaderCyclesRegister] 962 963//===----------------------------------------------------------------------===// 964// GFX10. 965//===----------------------------------------------------------------------===// 966 967class SMEM_Real_10Plus_common<bits<8> op, SM_Pseudo ps, string opName, 968 int subtarget, RegisterWithSubRegs sgpr_null> : 969 SM_Real<ps, opName>, SIMCInstr<ps.PseudoInstr, subtarget>, Enc64 { 970 let Inst{5-0} = !if(ps.has_sbase, sbase{6-1}, ?); 971 let Inst{12-6} = !if(ps.has_sdst, sdst{6-0}, ?); 972 let Inst{25-18} = op; 973 let Inst{31-26} = 0x3d; 974 // There are SMEM instructions that do not employ any of the offset 975 // fields, in which case we need them to remain undefined. 976 let Inst{52-32} = !if(ps.has_offset, offset{20-0}, !if(ps.has_soffset, 0, ?)); 977 let Inst{63-57} = !if(ps.has_soffset, soffset{6-0}, 978 !if(ps.has_offset, sgpr_null.HWEncoding{6-0}, ?)); 979} 980 981class SMEM_Real_gfx10<bits<8> op, SM_Pseudo ps> 982 : SMEM_Real_10Plus_common<op, ps, ps.Mnemonic, SIEncodingFamily.GFX10, 983 SGPR_NULL_gfxpre11> { 984 let AssemblerPredicate = isGFX10Only; 985 let DecoderNamespace = "GFX10"; 986 let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ?); 987 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ?); 988} 989 990class SMEM_Real_Load_gfx10<bits<8> op, string ps, OffsetMode offsets> 991 : SMEM_Real_gfx10<op, !cast<SM_Pseudo>(ps # offsets.Variant)> { 992 RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; 993 let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); 994} 995 996multiclass SM_Real_Loads_gfx10<bits<8> op, string ps> { 997 def _IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, IMM_Offset>; 998 def _SGPR_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_Offset>; 999 def _SGPR_IMM_gfx10 : SMEM_Real_Load_gfx10<op, ps, SGPR_IMM_Offset>; 1000} 1001 1002class SMEM_Real_Store_gfx10<bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx10<op, ps> { 1003 bits<7> sdata; 1004 1005 let sdst = ?; 1006 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); 1007} 1008 1009multiclass SM_Real_Stores_gfx10<bits<8> op, string ps, 1010 SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM), 1011 SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> { 1012 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, immPs> { 1013 let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smem_offset:$offset, CPol:$cpol); 1014 } 1015 1016 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, sgprPs> { 1017 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$soffset, CPol:$cpol); 1018 } 1019 1020 def _SGPR_IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Store_Pseudo>(ps#_SGPR_IMM)> { 1021 let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, 1022 SReg_32:$soffset, smem_offset_mod:$offset, CPol:$cpol); 1023 } 1024} 1025 1026defm S_LOAD_DWORD : SM_Real_Loads_gfx10<0x000, "S_LOAD_DWORD">; 1027defm S_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x001, "S_LOAD_DWORDX2">; 1028defm S_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x002, "S_LOAD_DWORDX4">; 1029defm S_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x003, "S_LOAD_DWORDX8">; 1030defm S_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x004, "S_LOAD_DWORDX16">; 1031 1032let SubtargetPredicate = HasScalarFlatScratchInsts in { 1033defm S_SCRATCH_LOAD_DWORD : SM_Real_Loads_gfx10<0x005, "S_SCRATCH_LOAD_DWORD">; 1034defm S_SCRATCH_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x006, "S_SCRATCH_LOAD_DWORDX2">; 1035defm S_SCRATCH_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x007, "S_SCRATCH_LOAD_DWORDX4">; 1036} // End SubtargetPredicate = HasScalarFlatScratchInsts 1037 1038defm S_BUFFER_LOAD_DWORD : SM_Real_Loads_gfx10<0x008, "S_BUFFER_LOAD_DWORD">; 1039defm S_BUFFER_LOAD_DWORDX2 : SM_Real_Loads_gfx10<0x009, "S_BUFFER_LOAD_DWORDX2">; 1040defm S_BUFFER_LOAD_DWORDX4 : SM_Real_Loads_gfx10<0x00a, "S_BUFFER_LOAD_DWORDX4">; 1041defm S_BUFFER_LOAD_DWORDX8 : SM_Real_Loads_gfx10<0x00b, "S_BUFFER_LOAD_DWORDX8">; 1042defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_gfx10<0x00c, "S_BUFFER_LOAD_DWORDX16">; 1043 1044let SubtargetPredicate = HasScalarStores in { 1045defm S_STORE_DWORD : SM_Real_Stores_gfx10<0x010, "S_STORE_DWORD">; 1046defm S_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x011, "S_STORE_DWORDX2">; 1047defm S_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x012, "S_STORE_DWORDX4">; 1048let OtherPredicates = [HasScalarFlatScratchInsts] in { 1049defm S_SCRATCH_STORE_DWORD : SM_Real_Stores_gfx10<0x015, "S_SCRATCH_STORE_DWORD">; 1050defm S_SCRATCH_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x016, "S_SCRATCH_STORE_DWORDX2">; 1051defm S_SCRATCH_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x017, "S_SCRATCH_STORE_DWORDX4">; 1052} // End OtherPredicates = [HasScalarFlatScratchInsts] 1053defm S_BUFFER_STORE_DWORD : SM_Real_Stores_gfx10<0x018, "S_BUFFER_STORE_DWORD">; 1054defm S_BUFFER_STORE_DWORDX2 : SM_Real_Stores_gfx10<0x019, "S_BUFFER_STORE_DWORDX2">; 1055defm S_BUFFER_STORE_DWORDX4 : SM_Real_Stores_gfx10<0x01a, "S_BUFFER_STORE_DWORDX4">; 1056} // End SubtargetPredicate = HasScalarStores 1057 1058def S_MEMREALTIME_gfx10 : SMEM_Real_gfx10<0x025, S_MEMREALTIME>; 1059def S_MEMTIME_gfx10 : SMEM_Real_gfx10<0x024, S_MEMTIME>; 1060def S_GL1_INV_gfx10 : SMEM_Real_gfx10<0x01f, S_GL1_INV>; 1061def S_GET_WAVEID_IN_WORKGROUP_gfx10 : SMEM_Real_gfx10<0x02a, S_GET_WAVEID_IN_WORKGROUP>; 1062def S_DCACHE_INV_gfx10 : SMEM_Real_gfx10<0x020, S_DCACHE_INV>; 1063 1064let SubtargetPredicate = HasScalarStores in { 1065def S_DCACHE_WB_gfx10 : SMEM_Real_gfx10<0x021, S_DCACHE_WB>; 1066} // End SubtargetPredicate = HasScalarStores 1067 1068multiclass SM_Real_Probe_gfx10<bits<8> op, string ps> { 1069 def _IMM_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>; 1070 def _SGPR_gfx10 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>; 1071 def _SGPR_IMM_gfx10 1072 : SMEM_Real_Store_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>; 1073} 1074 1075defm S_ATC_PROBE : SM_Real_Probe_gfx10 <0x26, "S_ATC_PROBE">; 1076defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx10 <0x27, "S_ATC_PROBE_BUFFER">; 1077 1078class SMEM_Atomic_Real_gfx10 <bits<8> op, SM_Atomic_Pseudo ps> 1079 : SMEM_Real_gfx10 <op, ps>, 1080 AtomicNoRet <!subst("_RTN","",NAME), ps.glc> { 1081 1082 bits<7> sdata; 1083 1084 let Constraints = ps.Constraints; 1085 let DisableEncoding = ps.DisableEncoding; 1086 1087 let cpol{CPolBit.GLC} = ps.glc; 1088 1089 let Inst{14} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); 1090 let Inst{12-6} = !if(ps.glc, sdst{6-0}, sdata{6-0}); 1091} 1092 1093multiclass SM_Real_Atomics_gfx10<bits<8> op, string ps> { 1094 def _IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM)>; 1095 def _SGPR_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR)>; 1096 def _SGPR_IMM_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM)>; 1097 def _IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_IMM_RTN)>; 1098 def _SGPR_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_RTN)>; 1099 def _SGPR_IMM_RTN_gfx10 : SMEM_Atomic_Real_gfx10 <op, !cast<SM_Atomic_Pseudo>(ps#_SGPR_IMM_RTN)>; 1100} 1101 1102let SubtargetPredicate = HasScalarAtomics in { 1103 1104defm S_BUFFER_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x40, "S_BUFFER_ATOMIC_SWAP">; 1105defm S_BUFFER_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x41, "S_BUFFER_ATOMIC_CMPSWAP">; 1106defm S_BUFFER_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x42, "S_BUFFER_ATOMIC_ADD">; 1107defm S_BUFFER_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x43, "S_BUFFER_ATOMIC_SUB">; 1108defm S_BUFFER_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x44, "S_BUFFER_ATOMIC_SMIN">; 1109defm S_BUFFER_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x45, "S_BUFFER_ATOMIC_UMIN">; 1110defm S_BUFFER_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x46, "S_BUFFER_ATOMIC_SMAX">; 1111defm S_BUFFER_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x47, "S_BUFFER_ATOMIC_UMAX">; 1112defm S_BUFFER_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x48, "S_BUFFER_ATOMIC_AND">; 1113defm S_BUFFER_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x49, "S_BUFFER_ATOMIC_OR">; 1114defm S_BUFFER_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x4a, "S_BUFFER_ATOMIC_XOR">; 1115defm S_BUFFER_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x4b, "S_BUFFER_ATOMIC_INC">; 1116defm S_BUFFER_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x4c, "S_BUFFER_ATOMIC_DEC">; 1117 1118defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0x60, "S_BUFFER_ATOMIC_SWAP_X2">; 1119defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0x61, "S_BUFFER_ATOMIC_CMPSWAP_X2">; 1120defm S_BUFFER_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0x62, "S_BUFFER_ATOMIC_ADD_X2">; 1121defm S_BUFFER_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0x63, "S_BUFFER_ATOMIC_SUB_X2">; 1122defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0x64, "S_BUFFER_ATOMIC_SMIN_X2">; 1123defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0x65, "S_BUFFER_ATOMIC_UMIN_X2">; 1124defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0x66, "S_BUFFER_ATOMIC_SMAX_X2">; 1125defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0x67, "S_BUFFER_ATOMIC_UMAX_X2">; 1126defm S_BUFFER_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0x68, "S_BUFFER_ATOMIC_AND_X2">; 1127defm S_BUFFER_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0x69, "S_BUFFER_ATOMIC_OR_X2">; 1128defm S_BUFFER_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0x6a, "S_BUFFER_ATOMIC_XOR_X2">; 1129defm S_BUFFER_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0x6b, "S_BUFFER_ATOMIC_INC_X2">; 1130defm S_BUFFER_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0x6c, "S_BUFFER_ATOMIC_DEC_X2">; 1131 1132defm S_ATOMIC_SWAP : SM_Real_Atomics_gfx10 <0x80, "S_ATOMIC_SWAP">; 1133defm S_ATOMIC_CMPSWAP : SM_Real_Atomics_gfx10 <0x81, "S_ATOMIC_CMPSWAP">; 1134defm S_ATOMIC_ADD : SM_Real_Atomics_gfx10 <0x82, "S_ATOMIC_ADD">; 1135defm S_ATOMIC_SUB : SM_Real_Atomics_gfx10 <0x83, "S_ATOMIC_SUB">; 1136defm S_ATOMIC_SMIN : SM_Real_Atomics_gfx10 <0x84, "S_ATOMIC_SMIN">; 1137defm S_ATOMIC_UMIN : SM_Real_Atomics_gfx10 <0x85, "S_ATOMIC_UMIN">; 1138defm S_ATOMIC_SMAX : SM_Real_Atomics_gfx10 <0x86, "S_ATOMIC_SMAX">; 1139defm S_ATOMIC_UMAX : SM_Real_Atomics_gfx10 <0x87, "S_ATOMIC_UMAX">; 1140defm S_ATOMIC_AND : SM_Real_Atomics_gfx10 <0x88, "S_ATOMIC_AND">; 1141defm S_ATOMIC_OR : SM_Real_Atomics_gfx10 <0x89, "S_ATOMIC_OR">; 1142defm S_ATOMIC_XOR : SM_Real_Atomics_gfx10 <0x8a, "S_ATOMIC_XOR">; 1143defm S_ATOMIC_INC : SM_Real_Atomics_gfx10 <0x8b, "S_ATOMIC_INC">; 1144defm S_ATOMIC_DEC : SM_Real_Atomics_gfx10 <0x8c, "S_ATOMIC_DEC">; 1145 1146defm S_ATOMIC_SWAP_X2 : SM_Real_Atomics_gfx10 <0xa0, "S_ATOMIC_SWAP_X2">; 1147defm S_ATOMIC_CMPSWAP_X2 : SM_Real_Atomics_gfx10 <0xa1, "S_ATOMIC_CMPSWAP_X2">; 1148defm S_ATOMIC_ADD_X2 : SM_Real_Atomics_gfx10 <0xa2, "S_ATOMIC_ADD_X2">; 1149defm S_ATOMIC_SUB_X2 : SM_Real_Atomics_gfx10 <0xa3, "S_ATOMIC_SUB_X2">; 1150defm S_ATOMIC_SMIN_X2 : SM_Real_Atomics_gfx10 <0xa4, "S_ATOMIC_SMIN_X2">; 1151defm S_ATOMIC_UMIN_X2 : SM_Real_Atomics_gfx10 <0xa5, "S_ATOMIC_UMIN_X2">; 1152defm S_ATOMIC_SMAX_X2 : SM_Real_Atomics_gfx10 <0xa6, "S_ATOMIC_SMAX_X2">; 1153defm S_ATOMIC_UMAX_X2 : SM_Real_Atomics_gfx10 <0xa7, "S_ATOMIC_UMAX_X2">; 1154defm S_ATOMIC_AND_X2 : SM_Real_Atomics_gfx10 <0xa8, "S_ATOMIC_AND_X2">; 1155defm S_ATOMIC_OR_X2 : SM_Real_Atomics_gfx10 <0xa9, "S_ATOMIC_OR_X2">; 1156defm S_ATOMIC_XOR_X2 : SM_Real_Atomics_gfx10 <0xaa, "S_ATOMIC_XOR_X2">; 1157defm S_ATOMIC_INC_X2 : SM_Real_Atomics_gfx10 <0xab, "S_ATOMIC_INC_X2">; 1158defm S_ATOMIC_DEC_X2 : SM_Real_Atomics_gfx10 <0xac, "S_ATOMIC_DEC_X2">; 1159 1160multiclass SM_Real_Discard_gfx10<bits<8> op, string ps> { 1161 def _IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_IMM)>; 1162 def _SGPR_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR)>; 1163 def _SGPR_IMM_gfx10 : SMEM_Real_gfx10 <op, !cast<SM_Pseudo>(ps#_SGPR_IMM)>; 1164} 1165 1166defm S_DCACHE_DISCARD : SM_Real_Discard_gfx10 <0x28, "S_DCACHE_DISCARD">; 1167defm S_DCACHE_DISCARD_X2 : SM_Real_Discard_gfx10 <0x29, "S_DCACHE_DISCARD_X2">; 1168 1169} // End SubtargetPredicate = HasScalarAtomics 1170 1171def SMInfoTable : GenericTable { 1172 let FilterClass = "SM_Real"; 1173 let CppTypeName = "SMInfo"; 1174 let Fields = ["Opcode", "is_buffer"]; 1175 1176 let PrimaryKey = ["Opcode"]; 1177 let PrimaryKeyName = "getSMEMOpcodeHelper"; 1178} 1179 1180//===----------------------------------------------------------------------===// 1181// GFX11. 1182//===----------------------------------------------------------------------===// 1183 1184class SMEM_Real_gfx11<bits<8> op, SM_Pseudo ps, string opName = ps.Mnemonic> : 1185 SMEM_Real_10Plus_common<op, ps, opName, SIEncodingFamily.GFX11, 1186 SGPR_NULL_gfx11plus> { 1187 let AssemblerPredicate = isGFX11Plus; 1188 let DecoderNamespace = "GFX11"; 1189 let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, 0); 1190 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, 0); 1191} 1192 1193class SMEM_Real_Load_gfx11<bits<8> op, string ps, string opName, OffsetMode offsets> : 1194 SMEM_Real_gfx11<op, !cast<SM_Pseudo>(ps # offsets.Variant), opName> { 1195 RegisterClass BaseClass = !cast<SM_Load_Pseudo>(ps # offsets.Variant).BaseClass; 1196 let InOperandList = !con((ins BaseClass:$sbase), offsets.Ins, (ins CPol:$cpol)); 1197} 1198 1199multiclass SM_Real_Loads_gfx11<bits<8> op, string ps, string opName> { 1200 def _IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, IMM_Offset>; 1201 def _SGPR_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_Offset>; 1202 def _SGPR_IMM_gfx11 : SMEM_Real_Load_gfx11<op, ps, opName, SGPR_IMM_Offset>; 1203 def : MnemonicAlias<!cast<SM_Pseudo>(ps#"_IMM").Mnemonic, opName>, 1204 Requires<[isGFX11Plus]>; 1205} 1206 1207defm S_LOAD_B32 : SM_Real_Loads_gfx11<0x000, "S_LOAD_DWORD", "s_load_b32">; 1208defm S_LOAD_B64 : SM_Real_Loads_gfx11<0x001, "S_LOAD_DWORDX2", "s_load_b64">; 1209defm S_LOAD_B128 : SM_Real_Loads_gfx11<0x002, "S_LOAD_DWORDX4", "s_load_b128">; 1210defm S_LOAD_B256 : SM_Real_Loads_gfx11<0x003, "S_LOAD_DWORDX8", "s_load_b256">; 1211defm S_LOAD_B512 : SM_Real_Loads_gfx11<0x004, "S_LOAD_DWORDX16", "s_load_b512">; 1212 1213defm S_BUFFER_LOAD_B32 : SM_Real_Loads_gfx11<0x008, "S_BUFFER_LOAD_DWORD", "s_buffer_load_b32">; 1214defm S_BUFFER_LOAD_B64 : SM_Real_Loads_gfx11<0x009, "S_BUFFER_LOAD_DWORDX2", "s_buffer_load_b64">; 1215defm S_BUFFER_LOAD_B128 : SM_Real_Loads_gfx11<0x00a, "S_BUFFER_LOAD_DWORDX4", "s_buffer_load_b128">; 1216defm S_BUFFER_LOAD_B256 : SM_Real_Loads_gfx11<0x00b, "S_BUFFER_LOAD_DWORDX8", "s_buffer_load_b256">; 1217defm S_BUFFER_LOAD_B512 : SM_Real_Loads_gfx11<0x00c, "S_BUFFER_LOAD_DWORDX16", "s_buffer_load_b512">; 1218 1219def S_GL1_INV_gfx11 : SMEM_Real_gfx11<0x020, S_GL1_INV>; 1220def S_DCACHE_INV_gfx11 : SMEM_Real_gfx11<0x021, S_DCACHE_INV>; 1221 1222class SMEM_Real_Store_gfx11 <bits<8> op, SM_Pseudo ps> : SMEM_Real_gfx11<op, ps> { 1223 // encoding 1224 bits<7> sdata; 1225 1226 let sdst = ?; 1227 let Inst{12-6} = !if(ps.has_sdst, sdata{6-0}, ?); 1228} 1229 1230multiclass SM_Real_Probe_gfx11<bits<8> op, string ps> { 1231 def _IMM_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_IMM)>; 1232 def _SGPR_gfx11 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR)>; 1233 def _SGPR_IMM_gfx11 1234 : SMEM_Real_Store_gfx11 <op, !cast<SM_Probe_Pseudo>(ps#_SGPR_IMM)>; 1235} 1236 1237defm S_ATC_PROBE : SM_Real_Probe_gfx11 <0x22, "S_ATC_PROBE">; 1238defm S_ATC_PROBE_BUFFER : SM_Real_Probe_gfx11 <0x23, "S_ATC_PROBE_BUFFER">; 1239