1//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>; 10def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>; 11 12def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>; 13def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>; 14 15//===----------------------------------------------------------------------===// 16// FLAT classes 17//===----------------------------------------------------------------------===// 18 19class FLAT_Pseudo<string opName, dag outs, dag ins, 20 string asmOps, list<dag> pattern=[]> : 21 InstSI<outs, ins, "", pattern>, 22 SIMCInstr<opName, SIEncodingFamily.NONE> { 23 24 let isPseudo = 1; 25 let isCodeGenOnly = 1; 26 27 let FLAT = 1; 28 29 let UseNamedOperandTable = 1; 30 let hasSideEffects = 0; 31 let SchedRW = [WriteVMEM]; 32 33 string Mnemonic = opName; 34 string AsmOperands = asmOps; 35 36 bits<1> is_flat_global = 0; 37 bits<1> is_flat_scratch = 0; 38 39 bits<1> has_vdst = 1; 40 41 // We need to distinguish having saddr and enabling saddr because 42 // saddr is only valid for scratch and global instructions. Pre-gfx9 43 // these bits were reserved, so we also don't necessarily want to 44 // set these bits to the disabled value for the original flat 45 // segment instructions. 46 bits<1> has_saddr = 0; 47 bits<1> enabled_saddr = 0; 48 bits<7> saddr_value = 0; 49 bits<1> has_vaddr = 1; 50 51 bits<1> has_data = 1; 52 bits<1> has_glc = 1; 53 bits<1> glcValue = 0; 54 bits<1> has_dlc = 1; 55 bits<1> dlcValue = 0; 56 57 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 58 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 59 60 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 61 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 62 63 // Internally, FLAT instruction are executed as both an LDS and a 64 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 65 // and are not considered done until both have been decremented. 66 let VM_CNT = 1; 67 let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1); 68 69 let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0); 70} 71 72class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : 73 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 74 Enc64 { 75 76 let isPseudo = 0; 77 let isCodeGenOnly = 0; 78 79 // copy relevant pseudo op flags 80 let SubtargetPredicate = ps.SubtargetPredicate; 81 let AsmMatchConverter = ps.AsmMatchConverter; 82 let TSFlags = ps.TSFlags; 83 let UseNamedOperandTable = ps.UseNamedOperandTable; 84 85 // encoding fields 86 bits<8> vaddr; 87 bits<8> vdata; 88 bits<7> saddr; 89 bits<8> vdst; 90 91 bits<1> slc; 92 bits<1> glc; 93 bits<1> dlc; 94 95 // Only valid on gfx9 96 bits<1> lds = 0; // XXX - What does this actually do? 97 98 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 99 bits<2> seg = !if(ps.is_flat_global, 0b10, 100 !if(ps.is_flat_scratch, 0b01, 0)); 101 102 // Signed offset. Highest bit ignored for flat and treated as 12-bit 103 // unsigned for flat acceses. 104 bits<13> offset; 105 bits<1> nv = 0; // XXX - What does this actually do? 106 107 // We don't use tfe right now, and it was removed in gfx9. 108 bits<1> tfe = 0; 109 110 // Only valid on GFX9+ 111 let Inst{12-0} = offset; 112 let Inst{13} = lds; 113 let Inst{15-14} = seg; 114 115 let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); 116 let Inst{17} = slc; 117 let Inst{24-18} = op; 118 let Inst{31-26} = 0x37; // Encoding. 119 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 120 let Inst{47-40} = !if(ps.has_data, vdata, ?); 121 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 122 123 // 54-48 is reserved. 124 let Inst{55} = nv; // nv on GFX9+, TFE before. 125 let Inst{63-56} = !if(ps.has_vdst, vdst, ?); 126} 127 128class GlobalSaddrTable <bit is_saddr, string Name = ""> { 129 bit IsSaddr = is_saddr; 130 string SaddrOp = Name; 131} 132 133// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 134// same encoding value as exec_hi, so it isn't possible to use that if 135// saddr is 32-bit (which isn't handled here yet). 136class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 137 bit HasTiedOutput = 0, 138 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 139 opName, 140 (outs regClass:$vdst), 141 !con( 142 !con( 143 !con((ins VReg_64:$vaddr), 144 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), 145 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 146 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 147 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { 148 let has_data = 0; 149 let mayLoad = 1; 150 let has_saddr = HasSaddr; 151 let enabled_saddr = EnableSaddr; 152 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 153 let maybeAtomic = 1; 154 155 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 156 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 157} 158 159class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 160 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 161 opName, 162 (outs), 163 !con( 164 !con((ins VReg_64:$vaddr, vdataClass:$vdata), 165 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), 166 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 167 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { 168 let mayLoad = 0; 169 let mayStore = 1; 170 let has_vdst = 0; 171 let has_saddr = HasSaddr; 172 let enabled_saddr = EnableSaddr; 173 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 174 let maybeAtomic = 1; 175} 176 177multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 178 let is_flat_global = 1 in { 179 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 180 GlobalSaddrTable<0, opName>; 181 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 182 GlobalSaddrTable<1, opName>; 183 } 184} 185 186multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 187 let is_flat_global = 1 in { 188 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 189 GlobalSaddrTable<0, opName>; 190 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 191 GlobalSaddrTable<1, opName>; 192 } 193} 194 195class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 196 bit EnableSaddr = 0>: FLAT_Pseudo< 197 opName, 198 (outs regClass:$vdst), 199 !if(EnableSaddr, 200 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), 201 (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 202 " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> { 203 let has_data = 0; 204 let mayLoad = 1; 205 let has_saddr = 1; 206 let enabled_saddr = EnableSaddr; 207 let has_vaddr = !if(EnableSaddr, 0, 1); 208 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 209 let maybeAtomic = 1; 210} 211 212class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo< 213 opName, 214 (outs), 215 !if(EnableSaddr, 216 (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), 217 (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 218 " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { 219 let mayLoad = 0; 220 let mayStore = 1; 221 let has_vdst = 0; 222 let has_saddr = 1; 223 let enabled_saddr = EnableSaddr; 224 let has_vaddr = !if(EnableSaddr, 0, 1); 225 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 226 let maybeAtomic = 1; 227} 228 229multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> { 230 let is_flat_scratch = 1 in { 231 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>; 232 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>; 233 } 234} 235 236multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 237 let is_flat_scratch = 1 in { 238 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>; 239 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>; 240 } 241} 242 243class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 244 string asm, list<dag> pattern = []> : 245 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 246 let mayLoad = 1; 247 let mayStore = 1; 248 let has_glc = 0; 249 let glcValue = 0; 250 let has_dlc = 0; 251 let dlcValue = 0; 252 let has_vdst = 0; 253 let maybeAtomic = 1; 254} 255 256class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 257 string asm, list<dag> pattern = []> 258 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 259 let hasPostISelHook = 1; 260 let has_vdst = 1; 261 let glcValue = 1; 262 let dlcValue = 0; 263 let PseudoInstr = NAME # "_RTN"; 264} 265 266multiclass FLAT_Atomic_Pseudo< 267 string opName, 268 RegisterClass vdst_rc, 269 ValueType vt, 270 SDPatternOperator atomic = null_frag, 271 ValueType data_vt = vt, 272 RegisterClass data_rc = vdst_rc, 273 bit isFP = getIsFP<data_vt>.ret> { 274 def "" : FLAT_AtomicNoRet_Pseudo <opName, 275 (outs), 276 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 277 " $vaddr, $vdata$offset$slc">, 278 GlobalSaddrTable<0, opName>, 279 AtomicNoRet <opName, 0> { 280 let PseudoInstr = NAME; 281 let FPAtomic = isFP; 282 } 283 284 def _RTN : FLAT_AtomicRet_Pseudo <opName, 285 (outs vdst_rc:$vdst), 286 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 287 " $vdst, $vaddr, $vdata$offset glc$slc", 288 [(set vt:$vdst, 289 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, 290 GlobalSaddrTable<0, opName#"_rtn">, 291 AtomicNoRet <opName, 1>{ 292 let FPAtomic = isFP; 293 } 294} 295 296multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 297 string opName, 298 RegisterClass vdst_rc, 299 ValueType vt, 300 SDPatternOperator atomic = null_frag, 301 ValueType data_vt = vt, 302 RegisterClass data_rc = vdst_rc, 303 bit isFP = getIsFP<data_vt>.ret> { 304 305 def "" : FLAT_AtomicNoRet_Pseudo <opName, 306 (outs), 307 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 308 " $vaddr, $vdata, off$offset$slc">, 309 GlobalSaddrTable<0, opName>, 310 AtomicNoRet <opName, 0> { 311 let has_saddr = 1; 312 let PseudoInstr = NAME; 313 let FPAtomic = isFP; 314 } 315 316 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 317 (outs), 318 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc), 319 " $vaddr, $vdata, $saddr$offset$slc">, 320 GlobalSaddrTable<1, opName>, 321 AtomicNoRet <opName#"_saddr", 0> { 322 let has_saddr = 1; 323 let enabled_saddr = 1; 324 let PseudoInstr = NAME#"_SADDR"; 325 let FPAtomic = isFP; 326 } 327} 328 329multiclass FLAT_Global_Atomic_Pseudo_RTN< 330 string opName, 331 RegisterClass vdst_rc, 332 ValueType vt, 333 SDPatternOperator atomic = null_frag, 334 ValueType data_vt = vt, 335 RegisterClass data_rc = vdst_rc, 336 bit isFP = getIsFP<data_vt>.ret> { 337 338 def _RTN : FLAT_AtomicRet_Pseudo <opName, 339 (outs vdst_rc:$vdst), 340 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 341 " $vdst, $vaddr, $vdata, off$offset glc$slc", 342 [(set vt:$vdst, 343 (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, 344 GlobalSaddrTable<0, opName#"_rtn">, 345 AtomicNoRet <opName, 1> { 346 let has_saddr = 1; 347 let FPAtomic = isFP; 348 } 349 350 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 351 (outs vdst_rc:$vdst), 352 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc), 353 " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">, 354 GlobalSaddrTable<1, opName#"_rtn">, 355 AtomicNoRet <opName#"_saddr", 1> { 356 let has_saddr = 1; 357 let enabled_saddr = 1; 358 let PseudoInstr = NAME#"_SADDR_RTN"; 359 let FPAtomic = isFP; 360 } 361} 362 363multiclass FLAT_Global_Atomic_Pseudo< 364 string opName, 365 RegisterClass vdst_rc, 366 ValueType vt, 367 SDPatternOperator atomic = null_frag, 368 ValueType data_vt = vt, 369 RegisterClass data_rc = vdst_rc> : 370 FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>, 371 FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>; 372 373class flat_binary_atomic_op<SDNode atomic_op> : PatFrag< 374 (ops node:$ptr, node:$value), 375 (atomic_op node:$ptr, node:$value), 376 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] 377>; 378 379def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>; 380def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>; 381def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>; 382def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>; 383def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>; 384def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>; 385def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>; 386def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>; 387def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>; 388def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>; 389def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>; 390def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>; 391def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>; 392 393 394 395//===----------------------------------------------------------------------===// 396// Flat Instructions 397//===----------------------------------------------------------------------===// 398 399def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 400def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 401def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 402def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 403def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 404def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 405def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 406def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 407 408def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 409def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 410def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 411def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 412def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 413def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 414 415let SubtargetPredicate = HasD16LoadStore in { 416def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 417def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 418def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 419def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 420def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 421def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 422 423def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 424def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 425} 426 427defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 428 VGPR_32, i32, atomic_cmp_swap_flat, 429 v2i32, VReg_64>; 430 431defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 432 VReg_64, i64, atomic_cmp_swap_flat, 433 v2i64, VReg_128>; 434 435defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 436 VGPR_32, i32, atomic_swap_flat>; 437 438defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 439 VReg_64, i64, atomic_swap_flat>; 440 441defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 442 VGPR_32, i32, atomic_add_flat>; 443 444defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 445 VGPR_32, i32, atomic_sub_flat>; 446 447defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 448 VGPR_32, i32, atomic_min_flat>; 449 450defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 451 VGPR_32, i32, atomic_umin_flat>; 452 453defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 454 VGPR_32, i32, atomic_max_flat>; 455 456defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 457 VGPR_32, i32, atomic_umax_flat>; 458 459defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 460 VGPR_32, i32, atomic_and_flat>; 461 462defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 463 VGPR_32, i32, atomic_or_flat>; 464 465defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 466 VGPR_32, i32, atomic_xor_flat>; 467 468defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 469 VGPR_32, i32, atomic_inc_flat>; 470 471defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 472 VGPR_32, i32, atomic_dec_flat>; 473 474defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 475 VReg_64, i64, atomic_add_flat>; 476 477defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 478 VReg_64, i64, atomic_sub_flat>; 479 480defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 481 VReg_64, i64, atomic_min_flat>; 482 483defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 484 VReg_64, i64, atomic_umin_flat>; 485 486defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 487 VReg_64, i64, atomic_max_flat>; 488 489defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 490 VReg_64, i64, atomic_umax_flat>; 491 492defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 493 VReg_64, i64, atomic_and_flat>; 494 495defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 496 VReg_64, i64, atomic_or_flat>; 497 498defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 499 VReg_64, i64, atomic_xor_flat>; 500 501defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 502 VReg_64, i64, atomic_inc_flat>; 503 504defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 505 VReg_64, i64, atomic_dec_flat>; 506 507// GFX7-, GFX10-only flat instructions. 508let SubtargetPredicate = isGFX7GFX10 in { 509 510defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 511 VGPR_32, f32, null_frag, v2f32, VReg_64>; 512 513defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 514 VReg_64, f64, null_frag, v2f64, VReg_128>; 515 516defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 517 VGPR_32, f32>; 518 519defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 520 VGPR_32, f32>; 521 522defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 523 VReg_64, f64>; 524 525defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 526 VReg_64, f64>; 527 528} // End SubtargetPredicate = isGFX7GFX10 529 530let SubtargetPredicate = HasFlatGlobalInsts in { 531defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 532defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 533defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 534defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 535defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 536defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 537defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 538defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 539 540defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 541defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 542defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 543defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 544defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 545defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 546 547defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 548defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 549defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 550defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 551defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 552defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 553 554defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 555defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 556 557let is_flat_global = 1 in { 558defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 559 VGPR_32, i32, AMDGPUatomic_cmp_swap_global, 560 v2i32, VReg_64>; 561 562defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 563 VReg_64, i64, AMDGPUatomic_cmp_swap_global, 564 v2i64, VReg_128>; 565 566defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 567 VGPR_32, i32, atomic_swap_global>; 568 569defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 570 VReg_64, i64, atomic_swap_global>; 571 572defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 573 VGPR_32, i32, atomic_add_global>; 574 575defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 576 VGPR_32, i32, atomic_sub_global>; 577 578defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 579 VGPR_32, i32, atomic_min_global>; 580 581defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 582 VGPR_32, i32, atomic_umin_global>; 583 584defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 585 VGPR_32, i32, atomic_max_global>; 586 587defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 588 VGPR_32, i32, atomic_umax_global>; 589 590defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 591 VGPR_32, i32, atomic_and_global>; 592 593defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 594 VGPR_32, i32, atomic_or_global>; 595 596defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 597 VGPR_32, i32, atomic_xor_global>; 598 599defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 600 VGPR_32, i32, atomic_inc_global>; 601 602defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 603 VGPR_32, i32, atomic_dec_global>; 604 605defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 606 VReg_64, i64, atomic_add_global>; 607 608defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 609 VReg_64, i64, atomic_sub_global>; 610 611defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 612 VReg_64, i64, atomic_min_global>; 613 614defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 615 VReg_64, i64, atomic_umin_global>; 616 617defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 618 VReg_64, i64, atomic_max_global>; 619 620defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 621 VReg_64, i64, atomic_umax_global>; 622 623defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 624 VReg_64, i64, atomic_and_global>; 625 626defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 627 VReg_64, i64, atomic_or_global>; 628 629defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 630 VReg_64, i64, atomic_xor_global>; 631 632defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 633 VReg_64, i64, atomic_inc_global>; 634 635defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 636 VReg_64, i64, atomic_dec_global>; 637} // End is_flat_global = 1 638 639} // End SubtargetPredicate = HasFlatGlobalInsts 640 641 642let SubtargetPredicate = HasFlatScratchInsts in { 643defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 644defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 645defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 646defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 647defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 648defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 649defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 650defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 651 652defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>; 653defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>; 654defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>; 655defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>; 656defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>; 657defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>; 658 659defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 660defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 661defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 662defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 663defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 664defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 665 666defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 667defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 668 669} // End SubtargetPredicate = HasFlatScratchInsts 670 671let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 672 defm GLOBAL_ATOMIC_FCMPSWAP : 673 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>; 674 defm GLOBAL_ATOMIC_FMIN : 675 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 676 defm GLOBAL_ATOMIC_FMAX : 677 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 678 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 679 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>; 680 defm GLOBAL_ATOMIC_FMIN_X2 : 681 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 682 defm GLOBAL_ATOMIC_FMAX_X2 : 683 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 684} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 685 686let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in { 687 688defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 689 "global_atomic_add_f32", VGPR_32, f32, atomic_add_global 690>; 691defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 692 "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_add_global 693>; 694 695} // End SubtargetPredicate = HasAtomicFaddInsts 696 697//===----------------------------------------------------------------------===// 698// Flat Patterns 699//===----------------------------------------------------------------------===// 700 701// Patterns for global loads with no offset. 702class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 703 (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), 704 (inst $vaddr, $offset, 0, 0, $slc) 705>; 706 707class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 708 (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in), 709 (inst $vaddr, $offset, 0, 0, $slc, $in) 710>; 711 712class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 713 (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in), 714 (inst $vaddr, $offset, 0, 0, $slc, $in) 715>; 716 717class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 718 (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))), 719 (inst $vaddr, $offset, 0, 0, $slc) 720>; 721 722class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 723 (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))), 724 (inst $vaddr, $offset, 0, 0, $slc) 725>; 726 727class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 728 (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)), 729 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 730>; 731 732class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 733 (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)), 734 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 735>; 736 737class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 738 // atomic store follows atomic binop convention so the address comes 739 // first. 740 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 741 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 742>; 743 744class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 745 // atomic store follows atomic binop convention so the address comes 746 // first. 747 (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 748 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 749>; 750 751class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 752 ValueType data_vt = vt> : GCNPat < 753 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), 754 (inst $vaddr, $data, $offset, $slc) 755>; 756 757class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 758 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 759 (inst $vaddr, $data, $offset, $slc) 760>; 761 762class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 763 ValueType data_vt = vt> : GCNPat < 764 (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), 765 (inst $vaddr, $data, $offset, $slc) 766>; 767 768let OtherPredicates = [HasFlatAddressSpace] in { 769 770def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 771def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 772def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 773def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 774def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 775def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 776def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 777def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 778def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 779def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 780def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, i32>; 781def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, v2i32>; 782def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 783def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>; 784 785def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 786def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 787 788def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 789def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 790def : FlatStorePat <FLAT_STORE_DWORD, store_flat, i32>; 791def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, v2i32, VReg_64>; 792def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>; 793def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>; 794 795def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>; 796def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>; 797 798def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>; 799def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>; 800def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>; 801def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>; 802def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>; 803def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>; 804def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>; 805def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>; 806def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>; 807def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>; 808def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>; 809def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>; 810def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>; 811 812def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>; 813def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>; 814def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>; 815def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>; 816def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>; 817def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>; 818def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>; 819def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>; 820def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>; 821def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>; 822def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>; 823def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>; 824def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>; 825 826def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 827def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 828 829let OtherPredicates = [D16PreservesUnusedBits] in { 830def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 831def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 832 833def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 834def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 835def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 836def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 837def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 838def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 839 840def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 841def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 842def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 843def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 844def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 845def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 846} 847 848} // End OtherPredicates = [HasFlatAddressSpace] 849 850def atomic_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_fadd>; 851def atomic_pk_fadd_global : global_binary_atomic_op_frag<SIglobal_atomic_pk_fadd>; 852 853let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { 854 855def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 856def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 857def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 858def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 859def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 860def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 861def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 862def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 863def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 864def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>; 865 866def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, i32>; 867def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, v2i32>; 868def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 869def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>; 870 871def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 872def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 873 874def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>; 875def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>; 876def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>; 877def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>; 878def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, i32, VGPR_32>; 879def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, v2i32, VReg_64>; 880def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>; 881def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>; 882 883let OtherPredicates = [D16PreservesUnusedBits] in { 884def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 885def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 886 887def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 888def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 889def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 890def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 891def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 892def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 893 894def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 895def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 896def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 897def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 898def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 899def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 900} 901 902def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>; 903def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64, VReg_64>; 904 905def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_add_global, i32>; 906def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_sub_global, i32>; 907def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global, i32>; 908def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global, i32>; 909def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_and_global, i32>; 910def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_max_global, i32>; 911def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_umax_global, i32>; 912def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_min_global, i32>; 913def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_umin_global, i32>; 914def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_or_global, i32>; 915def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global, i32>; 916def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>; 917def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_xor_global, i32>; 918 919def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>; 920def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>; 921def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>; 922def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>; 923def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_and_global, i64>; 924def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>; 925def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>; 926def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>; 927def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>; 928def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_or_global, i64>; 929def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>; 930def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>; 931def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>; 932 933def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global, f32>; 934def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global, v2f16>; 935 936} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 937 938 939//===----------------------------------------------------------------------===// 940// Target 941//===----------------------------------------------------------------------===// 942 943//===----------------------------------------------------------------------===// 944// CI 945//===----------------------------------------------------------------------===// 946 947class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 948 FLAT_Real <op, ps>, 949 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 950 let AssemblerPredicate = isGFX7Only; 951 let DecoderNamespace="GFX7"; 952} 953 954def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 955def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 956def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 957def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 958def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 959def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 960def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 961def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 962 963def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 964def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 965def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 966def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 967def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 968def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 969 970multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 971 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 972 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 973} 974 975defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 976defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 977defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 978defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 979defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 980defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 981defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 982defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 983defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 984defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 985defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 986defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 987defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 988defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 989defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 990defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 991defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 992defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 993defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 994defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 995defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 996defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 997defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 998defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 999defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1000defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1001 1002// CI Only flat instructions 1003defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1004defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1005defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1006defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1007defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1008defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1009 1010 1011//===----------------------------------------------------------------------===// 1012// VI 1013//===----------------------------------------------------------------------===// 1014 1015class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> : 1016 FLAT_Real <op, ps>, 1017 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1018 let AssemblerPredicate = isGFX8GFX9; 1019 let DecoderNamespace = "GFX8"; 1020} 1021 1022multiclass FLAT_Real_AllAddr_vi<bits<7> op> { 1023 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>; 1024 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1025} 1026 1027def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1028def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1029def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1030def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1031def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1032def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1033def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1034def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1035 1036def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1037def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1038def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1039def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1040def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1041def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1042def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1043def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1044 1045def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1046def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1047def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1048def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1049def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1050def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1051 1052multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> { 1053 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1054 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1055} 1056 1057multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> : 1058 FLAT_Real_AllAddr_vi<op> { 1059 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1060 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1061} 1062 1063 1064defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1065defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1066defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1067defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1068defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1069defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1070defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1071defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1072defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1073defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1074defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1075defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1076defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1077defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1078defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1079defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1080defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1081defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1082defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1083defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1084defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1085defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1086defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1087defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1088defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1089defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1090 1091defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1092defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1093defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1094defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1095defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1096defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1097defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1098defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1099 1100defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1101defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1102defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1103defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1104defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1105defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1106 1107defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1108defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1109defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1110defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1111defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1112defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1113defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1114defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1115 1116 1117defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1118defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1119defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1120defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1121defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1122defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1123defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1124defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1125defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1126defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1127defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1128defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1129defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1130defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1131defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1132defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1133defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1134defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1135defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1136defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1137defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1138defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1139defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1140defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1141defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1142defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1143 1144defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1145defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1146defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1147defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1148defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1149defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1150defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1151defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1152defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1153defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1154defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1155defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1156defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1157defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1158defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1159defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1160defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1161defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1162defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1163defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1164defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1165defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1166 1167 1168//===----------------------------------------------------------------------===// 1169// GFX10. 1170//===----------------------------------------------------------------------===// 1171 1172class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 1173 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 1174 let AssemblerPredicate = isGFX10Plus; 1175 let DecoderNamespace = "GFX10"; 1176 1177 let Inst{11-0} = {offset{12}, offset{10-0}}; 1178 let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue); 1179 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 1180 let Inst{55} = 0; 1181} 1182 1183 1184multiclass FLAT_Real_Base_gfx10<bits<7> op> { 1185 def _gfx10 : 1186 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 1187} 1188 1189multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 1190 def _RTN_gfx10 : 1191 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1192} 1193 1194multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 1195 def _SADDR_gfx10 : 1196 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1197} 1198 1199multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 1200 def _SADDR_RTN_gfx10 : 1201 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1202} 1203 1204 1205multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 1206 FLAT_Real_Base_gfx10<op>, 1207 FLAT_Real_SADDR_gfx10<op>; 1208 1209multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 1210 FLAT_Real_Base_gfx10<op>, 1211 FLAT_Real_RTN_gfx10<op>; 1212 1213multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 1214 FLAT_Real_AllAddr_gfx10<op>, 1215 FLAT_Real_RTN_gfx10<op>, 1216 FLAT_Real_SADDR_RTN_gfx10<op>; 1217 1218 1219// ENC_FLAT. 1220defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 1221defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 1222defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 1223defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 1224defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 1225defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 1226defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 1227defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 1228defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 1229defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 1230defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 1231defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 1232defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 1233defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 1234defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 1235defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 1236defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 1237defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 1238defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 1239defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 1240defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 1241defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 1242defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 1243defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 1244defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 1245defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 1246defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 1247defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 1248defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 1249defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 1250defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 1251defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 1252defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 1253defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 1254defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 1255defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 1256defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 1257defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 1258defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 1259defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 1260defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 1261defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 1262defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 1263defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 1264defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 1265defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 1266defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 1267defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 1268defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 1269defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 1270defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 1271defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 1272defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 1273defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 1274 1275 1276// ENC_FLAT_GLBL. 1277defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1278defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1279defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1280defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1281defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1282defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1283defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1284defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1285defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1286defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1287defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1288defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1289defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1290defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1291defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1292defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1293defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1294defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1295defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1296defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1297defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1298defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1299defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 1300defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 1301defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 1302defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 1303defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 1304defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 1305defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 1306defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 1307defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 1308defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 1309defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 1310defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 1311defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 1312defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 1313defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 1314defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 1315defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 1316defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 1317defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 1318defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 1319defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 1320defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 1321defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 1322defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 1323defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 1324defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 1325defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 1326defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 1327defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 1328defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 1329defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 1330defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 1331 1332 1333// ENC_FLAT_SCRATCH. 1334defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1335defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1336defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1337defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1338defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1339defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1340defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1341defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1342defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1343defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1344defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1345defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1346defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1347defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1348defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1349defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1350defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1351defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1352defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1353defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1354defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1355defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1356 1357let SubtargetPredicate = HasAtomicFaddInsts in { 1358 1359defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_AllAddr_vi <0x04d>; 1360defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>; 1361 1362} // End SubtargetPredicate = HasAtomicFaddInsts 1363