1//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>; 10def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>; 11 12def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>; 13def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>; 14 15//===----------------------------------------------------------------------===// 16// FLAT classes 17//===----------------------------------------------------------------------===// 18 19class FLAT_Pseudo<string opName, dag outs, dag ins, 20 string asmOps, list<dag> pattern=[]> : 21 InstSI<outs, ins, "", pattern>, 22 SIMCInstr<opName, SIEncodingFamily.NONE> { 23 24 let isPseudo = 1; 25 let isCodeGenOnly = 1; 26 27 let FLAT = 1; 28 29 let UseNamedOperandTable = 1; 30 let hasSideEffects = 0; 31 let SchedRW = [WriteVMEM]; 32 33 string Mnemonic = opName; 34 string AsmOperands = asmOps; 35 36 bits<1> is_flat_global = 0; 37 bits<1> is_flat_scratch = 0; 38 39 bits<1> has_vdst = 1; 40 41 // We need to distinguish having saddr and enabling saddr because 42 // saddr is only valid for scratch and global instructions. Pre-gfx9 43 // these bits were reserved, so we also don't necessarily want to 44 // set these bits to the disabled value for the original flat 45 // segment instructions. 46 bits<1> has_saddr = 0; 47 bits<1> enabled_saddr = 0; 48 bits<7> saddr_value = 0; 49 bits<1> has_vaddr = 1; 50 51 bits<1> has_data = 1; 52 bits<1> has_glc = 1; 53 bits<1> glcValue = 0; 54 bits<1> has_dlc = 1; 55 bits<1> dlcValue = 0; 56 57 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 58 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 59 60 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 61 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 62 63 // Internally, FLAT instruction are executed as both an LDS and a 64 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 65 // and are not considered done until both have been decremented. 66 let VM_CNT = 1; 67 let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1); 68 69 let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0); 70} 71 72class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : 73 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 74 Enc64 { 75 76 let isPseudo = 0; 77 let isCodeGenOnly = 0; 78 79 // copy relevant pseudo op flags 80 let SubtargetPredicate = ps.SubtargetPredicate; 81 let AsmMatchConverter = ps.AsmMatchConverter; 82 let TSFlags = ps.TSFlags; 83 let UseNamedOperandTable = ps.UseNamedOperandTable; 84 85 // encoding fields 86 bits<8> vaddr; 87 bits<8> vdata; 88 bits<7> saddr; 89 bits<8> vdst; 90 91 bits<1> slc; 92 bits<1> glc; 93 bits<1> dlc; 94 95 // Only valid on gfx9 96 bits<1> lds = 0; // XXX - What does this actually do? 97 98 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 99 bits<2> seg = !if(ps.is_flat_global, 0b10, 100 !if(ps.is_flat_scratch, 0b01, 0)); 101 102 // Signed offset. Highest bit ignored for flat and treated as 12-bit 103 // unsigned for flat acceses. 104 bits<13> offset; 105 bits<1> nv = 0; // XXX - What does this actually do? 106 107 // We don't use tfe right now, and it was removed in gfx9. 108 bits<1> tfe = 0; 109 110 // Only valid on GFX9+ 111 let Inst{12-0} = offset; 112 let Inst{13} = lds; 113 let Inst{15-14} = seg; 114 115 let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); 116 let Inst{17} = slc; 117 let Inst{24-18} = op; 118 let Inst{31-26} = 0x37; // Encoding. 119 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 120 let Inst{47-40} = !if(ps.has_data, vdata, ?); 121 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 122 123 // 54-48 is reserved. 124 let Inst{55} = nv; // nv on GFX9+, TFE before. 125 let Inst{63-56} = !if(ps.has_vdst, vdst, ?); 126} 127 128class GlobalSaddrTable <bit is_saddr, string Name = ""> { 129 bit IsSaddr = is_saddr; 130 string SaddrOp = Name; 131} 132 133// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 134// same encoding value as exec_hi, so it isn't possible to use that if 135// saddr is 32-bit (which isn't handled here yet). 136class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 137 bit HasTiedOutput = 0, 138 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 139 opName, 140 (outs regClass:$vdst), 141 !con( 142 !con( 143 !con((ins VReg_64:$vaddr), 144 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), 145 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 146 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 147 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { 148 let has_data = 0; 149 let mayLoad = 1; 150 let has_saddr = HasSaddr; 151 let enabled_saddr = EnableSaddr; 152 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 153 let maybeAtomic = 1; 154 155 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 156 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 157} 158 159class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 160 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 161 opName, 162 (outs), 163 !con( 164 !con((ins VReg_64:$vaddr, vdataClass:$vdata), 165 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), 166 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 167 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { 168 let mayLoad = 0; 169 let mayStore = 1; 170 let has_vdst = 0; 171 let has_saddr = HasSaddr; 172 let enabled_saddr = EnableSaddr; 173 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 174 let maybeAtomic = 1; 175} 176 177multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 178 let is_flat_global = 1 in { 179 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 180 GlobalSaddrTable<0, opName>; 181 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 182 GlobalSaddrTable<1, opName>; 183 } 184} 185 186multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 187 let is_flat_global = 1 in { 188 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 189 GlobalSaddrTable<0, opName>; 190 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 191 GlobalSaddrTable<1, opName>; 192 } 193} 194 195class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 196 bit EnableSaddr = 0>: FLAT_Pseudo< 197 opName, 198 (outs regClass:$vdst), 199 !if(EnableSaddr, 200 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), 201 (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 202 " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> { 203 let has_data = 0; 204 let mayLoad = 1; 205 let has_saddr = 1; 206 let enabled_saddr = EnableSaddr; 207 let has_vaddr = !if(EnableSaddr, 0, 1); 208 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 209 let maybeAtomic = 1; 210} 211 212class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo< 213 opName, 214 (outs), 215 !if(EnableSaddr, 216 (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), 217 (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 218 " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { 219 let mayLoad = 0; 220 let mayStore = 1; 221 let has_vdst = 0; 222 let has_saddr = 1; 223 let enabled_saddr = EnableSaddr; 224 let has_vaddr = !if(EnableSaddr, 0, 1); 225 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 226 let maybeAtomic = 1; 227} 228 229multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> { 230 let is_flat_scratch = 1 in { 231 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>; 232 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>; 233 } 234} 235 236multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 237 let is_flat_scratch = 1 in { 238 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>; 239 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>; 240 } 241} 242 243class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 244 string asm, list<dag> pattern = []> : 245 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 246 let mayLoad = 1; 247 let mayStore = 1; 248 let has_glc = 0; 249 let glcValue = 0; 250 let has_dlc = 0; 251 let dlcValue = 0; 252 let has_vdst = 0; 253 let maybeAtomic = 1; 254} 255 256class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 257 string asm, list<dag> pattern = []> 258 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 259 let hasPostISelHook = 1; 260 let has_vdst = 1; 261 let glcValue = 1; 262 let dlcValue = 0; 263 let PseudoInstr = NAME # "_RTN"; 264} 265 266multiclass FLAT_Atomic_Pseudo< 267 string opName, 268 RegisterClass vdst_rc, 269 ValueType vt, 270 SDPatternOperator atomic = null_frag, 271 ValueType data_vt = vt, 272 RegisterClass data_rc = vdst_rc, 273 bit isFP = isFloatType<data_vt>.ret> { 274 def "" : FLAT_AtomicNoRet_Pseudo <opName, 275 (outs), 276 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 277 " $vaddr, $vdata$offset$slc">, 278 GlobalSaddrTable<0, opName>, 279 AtomicNoRet <opName, 0> { 280 let PseudoInstr = NAME; 281 let FPAtomic = isFP; 282 } 283 284 def _RTN : FLAT_AtomicRet_Pseudo <opName, 285 (outs vdst_rc:$vdst), 286 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 287 " $vdst, $vaddr, $vdata$offset glc$slc", 288 [(set vt:$vdst, 289 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, 290 GlobalSaddrTable<0, opName#"_rtn">, 291 AtomicNoRet <opName, 1>{ 292 let FPAtomic = isFP; 293 } 294} 295 296multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 297 string opName, 298 RegisterClass vdst_rc, 299 ValueType vt, 300 SDPatternOperator atomic = null_frag, 301 ValueType data_vt = vt, 302 RegisterClass data_rc = vdst_rc, 303 bit isFP = isFloatType<data_vt>.ret> { 304 305 def "" : FLAT_AtomicNoRet_Pseudo <opName, 306 (outs), 307 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 308 " $vaddr, $vdata, off$offset$slc">, 309 GlobalSaddrTable<0, opName>, 310 AtomicNoRet <opName, 0> { 311 let has_saddr = 1; 312 let PseudoInstr = NAME; 313 let FPAtomic = isFP; 314 } 315 316 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 317 (outs), 318 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc), 319 " $vaddr, $vdata, $saddr$offset$slc">, 320 GlobalSaddrTable<1, opName>, 321 AtomicNoRet <opName#"_saddr", 0> { 322 let has_saddr = 1; 323 let enabled_saddr = 1; 324 let PseudoInstr = NAME#"_SADDR"; 325 let FPAtomic = isFP; 326 } 327} 328 329multiclass FLAT_Global_Atomic_Pseudo_RTN< 330 string opName, 331 RegisterClass vdst_rc, 332 ValueType vt, 333 SDPatternOperator atomic = null_frag, 334 ValueType data_vt = vt, 335 RegisterClass data_rc = vdst_rc, 336 bit isFP = isFloatType<data_vt>.ret> { 337 338 def _RTN : FLAT_AtomicRet_Pseudo <opName, 339 (outs vdst_rc:$vdst), 340 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 341 " $vdst, $vaddr, $vdata, off$offset glc$slc", 342 [(set vt:$vdst, 343 (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, 344 GlobalSaddrTable<0, opName#"_rtn">, 345 AtomicNoRet <opName, 1> { 346 let has_saddr = 1; 347 let FPAtomic = isFP; 348 } 349 350 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 351 (outs vdst_rc:$vdst), 352 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc), 353 " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">, 354 GlobalSaddrTable<1, opName#"_rtn">, 355 AtomicNoRet <opName#"_saddr", 1> { 356 let has_saddr = 1; 357 let enabled_saddr = 1; 358 let PseudoInstr = NAME#"_SADDR_RTN"; 359 let FPAtomic = isFP; 360 } 361} 362 363multiclass FLAT_Global_Atomic_Pseudo< 364 string opName, 365 RegisterClass vdst_rc, 366 ValueType vt, 367 SDPatternOperator atomic = null_frag, 368 ValueType data_vt = vt, 369 RegisterClass data_rc = vdst_rc> : 370 FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>, 371 FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic, data_vt, data_rc>; 372 373class flat_binary_atomic_op<SDNode atomic_op> : PatFrag< 374 (ops node:$ptr, node:$value), 375 (atomic_op node:$ptr, node:$value), 376 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] 377>; 378 379def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>; 380def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>; 381def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>; 382def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>; 383def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>; 384def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>; 385def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>; 386def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>; 387def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>; 388def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>; 389def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>; 390def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>; 391def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>; 392 393 394 395//===----------------------------------------------------------------------===// 396// Flat Instructions 397//===----------------------------------------------------------------------===// 398 399def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 400def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 401def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 402def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 403def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 404def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 405def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 406def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 407 408def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 409def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 410def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 411def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 412def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 413def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 414 415let SubtargetPredicate = HasD16LoadStore in { 416def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 417def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 418def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 419def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 420def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 421def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 422 423def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 424def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 425} 426 427defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 428 VGPR_32, i32, atomic_cmp_swap_flat, 429 v2i32, VReg_64>; 430 431defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 432 VReg_64, i64, atomic_cmp_swap_flat, 433 v2i64, VReg_128>; 434 435defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 436 VGPR_32, i32, atomic_swap_flat>; 437 438defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 439 VReg_64, i64, atomic_swap_flat>; 440 441defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 442 VGPR_32, i32, atomic_add_flat>; 443 444defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 445 VGPR_32, i32, atomic_sub_flat>; 446 447defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 448 VGPR_32, i32, atomic_min_flat>; 449 450defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 451 VGPR_32, i32, atomic_umin_flat>; 452 453defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 454 VGPR_32, i32, atomic_max_flat>; 455 456defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 457 VGPR_32, i32, atomic_umax_flat>; 458 459defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 460 VGPR_32, i32, atomic_and_flat>; 461 462defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 463 VGPR_32, i32, atomic_or_flat>; 464 465defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 466 VGPR_32, i32, atomic_xor_flat>; 467 468defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 469 VGPR_32, i32, atomic_inc_flat>; 470 471defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 472 VGPR_32, i32, atomic_dec_flat>; 473 474defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 475 VReg_64, i64, atomic_add_flat>; 476 477defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 478 VReg_64, i64, atomic_sub_flat>; 479 480defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 481 VReg_64, i64, atomic_min_flat>; 482 483defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 484 VReg_64, i64, atomic_umin_flat>; 485 486defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 487 VReg_64, i64, atomic_max_flat>; 488 489defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 490 VReg_64, i64, atomic_umax_flat>; 491 492defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 493 VReg_64, i64, atomic_and_flat>; 494 495defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 496 VReg_64, i64, atomic_or_flat>; 497 498defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 499 VReg_64, i64, atomic_xor_flat>; 500 501defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 502 VReg_64, i64, atomic_inc_flat>; 503 504defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 505 VReg_64, i64, atomic_dec_flat>; 506 507// GFX7-, GFX10-only flat instructions. 508let SubtargetPredicate = isGFX7GFX10 in { 509 510defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 511 VGPR_32, f32, null_frag, v2f32, VReg_64>; 512 513defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 514 VReg_64, f64, null_frag, v2f64, VReg_128>; 515 516defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 517 VGPR_32, f32>; 518 519defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 520 VGPR_32, f32>; 521 522defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 523 VReg_64, f64>; 524 525defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 526 VReg_64, f64>; 527 528} // End SubtargetPredicate = isGFX7GFX10 529 530let SubtargetPredicate = HasFlatGlobalInsts in { 531defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 532defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 533defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 534defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 535defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 536defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 537defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 538defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 539 540defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 541defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 542defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 543defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 544defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 545defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 546 547defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 548defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 549defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 550defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 551defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 552defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 553 554defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 555defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 556 557let is_flat_global = 1 in { 558defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 559 VGPR_32, i32, AMDGPUatomic_cmp_swap_global, 560 v2i32, VReg_64>; 561 562defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 563 VReg_64, i64, AMDGPUatomic_cmp_swap_global, 564 v2i64, VReg_128>; 565 566defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 567 VGPR_32, i32, atomic_swap_global_32>; 568 569defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 570 VReg_64, i64, atomic_swap_global_64>; 571 572defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 573 VGPR_32, i32, atomic_load_add_global_32>; 574 575defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 576 VGPR_32, i32, atomic_load_sub_global_32>; 577 578defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 579 VGPR_32, i32, atomic_load_min_global_32>; 580 581defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 582 VGPR_32, i32, atomic_load_umin_global_32>; 583 584defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 585 VGPR_32, i32, atomic_load_max_global_32>; 586 587defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 588 VGPR_32, i32, atomic_load_umax_global_32>; 589 590defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 591 VGPR_32, i32, atomic_load_and_global_32>; 592 593defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 594 VGPR_32, i32, atomic_load_or_global_32>; 595 596defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 597 VGPR_32, i32, atomic_load_xor_global_32>; 598 599defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 600 VGPR_32, i32, atomic_inc_global_32>; 601 602defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 603 VGPR_32, i32, atomic_dec_global_32>; 604 605defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 606 VReg_64, i64, atomic_load_add_global_64>; 607 608defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 609 VReg_64, i64, atomic_load_sub_global_64>; 610 611defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 612 VReg_64, i64, atomic_load_min_global_64>; 613 614defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 615 VReg_64, i64, atomic_load_umin_global_64>; 616 617defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 618 VReg_64, i64, atomic_load_max_global_64>; 619 620defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 621 VReg_64, i64, atomic_load_umax_global_64>; 622 623defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 624 VReg_64, i64, atomic_load_and_global_64>; 625 626defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 627 VReg_64, i64, atomic_load_or_global_64>; 628 629defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 630 VReg_64, i64, atomic_load_xor_global_64>; 631 632defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 633 VReg_64, i64, atomic_inc_global_64>; 634 635defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 636 VReg_64, i64, atomic_dec_global_64>; 637} // End is_flat_global = 1 638 639} // End SubtargetPredicate = HasFlatGlobalInsts 640 641 642let SubtargetPredicate = HasFlatScratchInsts in { 643defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 644defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 645defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 646defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 647defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 648defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 649defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 650defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 651 652defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>; 653defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>; 654defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>; 655defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>; 656defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>; 657defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>; 658 659defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 660defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 661defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 662defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 663defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 664defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 665 666defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 667defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 668 669} // End SubtargetPredicate = HasFlatScratchInsts 670 671let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 672 defm GLOBAL_ATOMIC_FCMPSWAP : 673 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>; 674 defm GLOBAL_ATOMIC_FMIN : 675 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 676 defm GLOBAL_ATOMIC_FMAX : 677 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 678 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 679 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>; 680 defm GLOBAL_ATOMIC_FMIN_X2 : 681 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 682 defm GLOBAL_ATOMIC_FMAX_X2 : 683 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 684} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 685 686let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in { 687 688defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 689 "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret 690>; 691defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 692 "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret 693>; 694 695} // End SubtargetPredicate = HasAtomicFaddInsts 696 697//===----------------------------------------------------------------------===// 698// Flat Patterns 699//===----------------------------------------------------------------------===// 700 701// Patterns for global loads with no offset. 702class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 703 (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), 704 (inst $vaddr, $offset, 0, 0, $slc) 705>; 706 707class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 708 (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in), 709 (inst $vaddr, $offset, 0, 0, $slc, $in) 710>; 711 712class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 713 (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in), 714 (inst $vaddr, $offset, 0, 0, $slc, $in) 715>; 716 717class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 718 (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))), 719 (inst $vaddr, $offset, 0, 0, $slc) 720>; 721 722class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 723 (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))), 724 (inst $vaddr, $offset, 0, 0, $slc) 725>; 726 727class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 728 (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)), 729 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 730>; 731 732class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 733 (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)), 734 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 735>; 736 737class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 738 // atomic store follows atomic binop convention so the address comes 739 // first. 740 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 741 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 742>; 743 744class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 745 // atomic store follows atomic binop convention so the address comes 746 // first. 747 (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 748 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 749>; 750 751class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 752 ValueType data_vt = vt> : GCNPat < 753 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), 754 (inst $vaddr, $data, $offset, $slc) 755>; 756 757class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 758 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 759 (inst $vaddr, $data, $offset, $slc) 760>; 761 762class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 763 ValueType data_vt = vt> : GCNPat < 764 (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), 765 (inst $vaddr, $data, $offset, $slc) 766>; 767 768let OtherPredicates = [HasFlatAddressSpace] in { 769 770def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 771def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 772def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 773def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 774def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 775def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 776def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 777def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 778def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 779def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 780def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 781def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, v4i32>; 782 783def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 784def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 785 786def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 787def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 788 789foreach vt = Reg32Types.types in { 790def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 791def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 792} 793 794foreach vt = VReg_64.RegTypes in { 795def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>; 796def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 797} 798 799def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>; 800def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, v4i32, VReg_128>; 801 802def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>; 803def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>; 804 805def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; 806def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; 807def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; 808def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; 809def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; 810def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; 811def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; 812def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; 813def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; 814def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; 815def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; 816def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>; 817def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; 818 819def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; 820def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; 821def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; 822def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; 823def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; 824def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; 825def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; 826def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; 827def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; 828def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; 829def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; 830def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>; 831def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; 832 833def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 834def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 835 836let OtherPredicates = [D16PreservesUnusedBits] in { 837def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 838def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 839 840def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 841def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 842def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 843def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 844def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 845def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 846 847def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 848def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 849def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 850def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 851def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 852def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 853} 854 855} // End OtherPredicates = [HasFlatAddressSpace] 856 857let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { 858 859def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 860def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 861def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 862def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 863def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 864def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 865def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 866def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 867def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 868def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>; 869 870foreach vt = Reg32Types.types in { 871def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>; 872def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>; 873} 874 875foreach vt = VReg_64.RegTypes in { 876def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>; 877def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>; 878} 879 880def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 881def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, v4i32>; 882 883def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 884def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 885 886def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>; 887def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>; 888def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>; 889def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>; 890def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>; 891def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, v4i32, VReg_128>; 892 893let OtherPredicates = [D16PreservesUnusedBits] in { 894def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 895def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 896 897def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 898def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 899def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 900def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 901def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 902def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 903 904def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 905def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 906def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 907def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 908def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 909def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 910} 911 912def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, store_atomic_global, i32>; 913def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, store_atomic_global, i64, VReg_64>; 914 915def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; 916def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; 917def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; 918def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; 919def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; 920def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; 921def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; 922def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; 923def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; 924def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; 925def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; 926def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>; 927def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; 928 929def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; 930def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; 931def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; 932def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; 933def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; 934def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; 935def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; 936def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; 937def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; 938def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; 939def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; 940def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>; 941def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; 942 943def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>; 944def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>; 945 946} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 947 948 949//===----------------------------------------------------------------------===// 950// Target 951//===----------------------------------------------------------------------===// 952 953//===----------------------------------------------------------------------===// 954// CI 955//===----------------------------------------------------------------------===// 956 957class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 958 FLAT_Real <op, ps>, 959 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 960 let AssemblerPredicate = isGFX7Only; 961 let DecoderNamespace="GFX7"; 962} 963 964def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 965def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 966def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 967def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 968def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 969def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 970def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 971def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 972 973def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 974def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 975def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 976def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 977def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 978def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 979 980multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 981 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 982 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 983} 984 985defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 986defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 987defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 988defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 989defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 990defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 991defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 992defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 993defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 994defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 995defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 996defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 997defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 998defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 999defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 1000defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 1001defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 1002defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 1003defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 1004defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 1005defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 1006defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 1007defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 1008defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 1009defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1010defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1011 1012// CI Only flat instructions 1013defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1014defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1015defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1016defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1017defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1018defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1019 1020 1021//===----------------------------------------------------------------------===// 1022// VI 1023//===----------------------------------------------------------------------===// 1024 1025class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> : 1026 FLAT_Real <op, ps>, 1027 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1028 let AssemblerPredicate = isGFX8GFX9; 1029 let DecoderNamespace = "GFX8"; 1030} 1031 1032multiclass FLAT_Real_AllAddr_vi<bits<7> op> { 1033 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>; 1034 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1035} 1036 1037def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1038def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1039def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1040def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1041def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1042def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1043def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1044def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1045 1046def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1047def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1048def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1049def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1050def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1051def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1052def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1053def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1054 1055def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1056def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1057def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1058def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1059def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1060def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1061 1062multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> { 1063 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1064 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1065} 1066 1067multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> : 1068 FLAT_Real_AllAddr_vi<op> { 1069 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1070 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1071} 1072 1073 1074defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1075defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1076defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1077defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1078defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1079defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1080defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1081defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1082defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1083defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1084defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1085defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1086defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1087defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1088defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1089defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1090defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1091defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1092defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1093defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1094defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1095defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1096defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1097defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1098defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1099defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1100 1101defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1102defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1103defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1104defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1105defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1106defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1107defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1108defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1109 1110defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1111defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1112defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1113defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1114defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1115defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1116 1117defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1118defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1119defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1120defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1121defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1122defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1123defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1124defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1125 1126 1127defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1128defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1129defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1130defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1131defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1132defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1133defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1134defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1135defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1136defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1137defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1138defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1139defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1140defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1141defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1142defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1143defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1144defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1145defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1146defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1147defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1148defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1149defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1150defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1151defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1152defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1153 1154defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1155defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1156defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1157defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1158defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1159defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1160defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1161defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1162defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1163defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1164defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1165defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1166defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1167defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1168defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1169defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1170defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1171defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1172defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1173defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1174defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1175defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1176 1177 1178//===----------------------------------------------------------------------===// 1179// GFX10. 1180//===----------------------------------------------------------------------===// 1181 1182class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 1183 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 1184 let AssemblerPredicate = isGFX10Plus; 1185 let DecoderNamespace = "GFX10"; 1186 1187 let Inst{11-0} = offset{11-0}; 1188 let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue); 1189 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 1190 let Inst{55} = 0; 1191} 1192 1193 1194multiclass FLAT_Real_Base_gfx10<bits<7> op> { 1195 def _gfx10 : 1196 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 1197} 1198 1199multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 1200 def _RTN_gfx10 : 1201 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1202} 1203 1204multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 1205 def _SADDR_gfx10 : 1206 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1207} 1208 1209multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 1210 def _SADDR_RTN_gfx10 : 1211 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1212} 1213 1214 1215multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 1216 FLAT_Real_Base_gfx10<op>, 1217 FLAT_Real_SADDR_gfx10<op>; 1218 1219multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 1220 FLAT_Real_Base_gfx10<op>, 1221 FLAT_Real_RTN_gfx10<op>; 1222 1223multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 1224 FLAT_Real_AllAddr_gfx10<op>, 1225 FLAT_Real_RTN_gfx10<op>, 1226 FLAT_Real_SADDR_RTN_gfx10<op>; 1227 1228 1229// ENC_FLAT. 1230defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 1231defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 1232defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 1233defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 1234defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 1235defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 1236defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 1237defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 1238defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 1239defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 1240defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 1241defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 1242defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 1243defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 1244defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 1245defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 1246defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 1247defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 1248defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 1249defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 1250defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 1251defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 1252defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 1253defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 1254defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 1255defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 1256defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 1257defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 1258defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 1259defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 1260defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 1261defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 1262defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 1263defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 1264defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 1265defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 1266defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 1267defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 1268defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 1269defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 1270defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 1271defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 1272defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 1273defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 1274defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 1275defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 1276defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 1277defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 1278defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 1279defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 1280defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 1281defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 1282defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 1283defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 1284 1285 1286// ENC_FLAT_GLBL. 1287defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1288defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1289defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1290defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1291defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1292defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1293defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1294defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1295defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1296defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1297defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1298defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1299defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1300defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1301defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1302defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1303defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1304defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1305defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1306defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1307defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1308defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1309defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 1310defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 1311defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 1312defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 1313defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 1314defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 1315defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 1316defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 1317defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 1318defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 1319defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 1320defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 1321defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 1322defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 1323defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 1324defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 1325defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 1326defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 1327defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 1328defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 1329defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 1330defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 1331defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 1332defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 1333defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 1334defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 1335defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 1336defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 1337defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 1338defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 1339defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 1340defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 1341 1342 1343// ENC_FLAT_SCRATCH. 1344defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1345defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1346defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1347defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1348defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1349defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1350defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1351defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1352defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1353defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1354defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1355defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1356defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1357defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1358defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1359defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1360defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1361defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1362defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1363defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1364defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1365defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1366 1367let SubtargetPredicate = HasAtomicFaddInsts in { 1368 1369defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_AllAddr_vi <0x04d>; 1370defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>; 1371 1372} // End SubtargetPredicate = HasAtomicFaddInsts 1373