1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FLATAtomic : ComplexPattern<i64, 3, "SelectFlatAtomic", [], [SDNPWantRoot], -10>; 10def FLATOffset : ComplexPattern<i64, 3, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>; 11 12def FLATOffsetSigned : ComplexPattern<i64, 3, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>; 13def FLATSignedAtomic : ComplexPattern<i64, 3, "SelectFlatAtomicSigned", [], [SDNPWantRoot], -10>; 14 15//===----------------------------------------------------------------------===// 16// FLAT classes 17//===----------------------------------------------------------------------===// 18 19class FLAT_Pseudo<string opName, dag outs, dag ins, 20 string asmOps, list<dag> pattern=[]> : 21 InstSI<outs, ins, "", pattern>, 22 SIMCInstr<opName, SIEncodingFamily.NONE> { 23 24 let isPseudo = 1; 25 let isCodeGenOnly = 1; 26 27 let FLAT = 1; 28 29 let UseNamedOperandTable = 1; 30 let hasSideEffects = 0; 31 let SchedRW = [WriteVMEM]; 32 33 string Mnemonic = opName; 34 string AsmOperands = asmOps; 35 36 bits<1> is_flat_global = 0; 37 bits<1> is_flat_scratch = 0; 38 39 bits<1> has_vdst = 1; 40 41 // We need to distinguish having saddr and enabling saddr because 42 // saddr is only valid for scratch and global instructions. Pre-gfx9 43 // these bits were reserved, so we also don't necessarily want to 44 // set these bits to the disabled value for the original flat 45 // segment instructions. 46 bits<1> has_saddr = 0; 47 bits<1> enabled_saddr = 0; 48 bits<7> saddr_value = 0; 49 bits<1> has_vaddr = 1; 50 51 bits<1> has_data = 1; 52 bits<1> has_glc = 1; 53 bits<1> glcValue = 0; 54 bits<1> has_dlc = 1; 55 bits<1> dlcValue = 0; 56 57 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 58 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 59 60 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 61 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 62 63 // Internally, FLAT instruction are executed as both an LDS and a 64 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 65 // and are not considered done until both have been decremented. 66 let VM_CNT = 1; 67 let LGKM_CNT = !if(!or(is_flat_global, is_flat_scratch), 0, 1); 68 69 let IsNonFlatSeg = !if(!or(is_flat_global, is_flat_scratch), 1, 0); 70} 71 72class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : 73 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 74 Enc64 { 75 76 let isPseudo = 0; 77 let isCodeGenOnly = 0; 78 79 // copy relevant pseudo op flags 80 let SubtargetPredicate = ps.SubtargetPredicate; 81 let AsmMatchConverter = ps.AsmMatchConverter; 82 let TSFlags = ps.TSFlags; 83 let UseNamedOperandTable = ps.UseNamedOperandTable; 84 85 // encoding fields 86 bits<8> vaddr; 87 bits<8> vdata; 88 bits<7> saddr; 89 bits<8> vdst; 90 91 bits<1> slc; 92 bits<1> glc; 93 bits<1> dlc; 94 95 // Only valid on gfx9 96 bits<1> lds = 0; // XXX - What does this actually do? 97 98 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 99 bits<2> seg = !if(ps.is_flat_global, 0b10, 100 !if(ps.is_flat_scratch, 0b01, 0)); 101 102 // Signed offset. Highest bit ignored for flat and treated as 12-bit 103 // unsigned for flat accesses. 104 bits<13> offset; 105 bits<1> nv = 0; // XXX - What does this actually do? 106 107 // We don't use tfe right now, and it was removed in gfx9. 108 bits<1> tfe = 0; 109 110 // Only valid on GFX9+ 111 let Inst{12-0} = offset; 112 let Inst{13} = lds; 113 let Inst{15-14} = seg; 114 115 let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); 116 let Inst{17} = slc; 117 let Inst{24-18} = op; 118 let Inst{31-26} = 0x37; // Encoding. 119 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 120 let Inst{47-40} = !if(ps.has_data, vdata, ?); 121 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 122 123 // 54-48 is reserved. 124 let Inst{55} = nv; // nv on GFX9+, TFE before. 125 let Inst{63-56} = !if(ps.has_vdst, vdst, ?); 126} 127 128class GlobalSaddrTable <bit is_saddr, string Name = ""> { 129 bit IsSaddr = is_saddr; 130 string SaddrOp = Name; 131} 132 133// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 134// same encoding value as exec_hi, so it isn't possible to use that if 135// saddr is 32-bit (which isn't handled here yet). 136class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 137 bit HasTiedOutput = 0, 138 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 139 opName, 140 (outs regClass:$vdst), 141 !con( 142 !con( 143 !con((ins VReg_64:$vaddr), 144 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), 145 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 146 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 147 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { 148 let has_data = 0; 149 let mayLoad = 1; 150 let has_saddr = HasSaddr; 151 let enabled_saddr = EnableSaddr; 152 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 153 let maybeAtomic = 1; 154 155 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 156 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 157} 158 159class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 160 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 161 opName, 162 (outs), 163 !con( 164 !con((ins VReg_64:$vaddr, vdataClass:$vdata), 165 !if(EnableSaddr, (ins SReg_64:$saddr), (ins))), 166 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 167 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { 168 let mayLoad = 0; 169 let mayStore = 1; 170 let has_vdst = 0; 171 let has_saddr = HasSaddr; 172 let enabled_saddr = EnableSaddr; 173 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 174 let maybeAtomic = 1; 175} 176 177multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 178 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 179 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 180 GlobalSaddrTable<0, opName>; 181 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 182 GlobalSaddrTable<1, opName>; 183 } 184} 185 186class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 187 bit HasTiedOutput = 0, bit HasSignedOffset = 0> : FLAT_Pseudo< 188 opName, 189 (outs regClass:$vdst), 190 !con((ins SReg_64:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), 191 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 192 " $vdst, $saddr$offset$glc$slc$dlc"> { 193 let is_flat_global = 1; 194 let has_data = 0; 195 let mayLoad = 1; 196 let has_vaddr = 0; 197 let has_saddr = 1; 198 let enabled_saddr = 1; 199 let maybeAtomic = 1; 200 201 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 202 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 203} 204 205multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 206 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 207 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 208 GlobalSaddrTable<0, opName>; 209 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 210 GlobalSaddrTable<1, opName>; 211 } 212} 213 214class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 215 bit HasSignedOffset = 0> : FLAT_Pseudo< 216 opName, 217 (outs), 218 !con( 219 (ins vdataClass:$vdata, SReg_64:$saddr), 220 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 221 " $vdata, $saddr$offset$glc$slc$dlc"> { 222 let is_flat_global = 1; 223 let mayLoad = 0; 224 let mayStore = 1; 225 let has_vdst = 0; 226 let has_vaddr = 0; 227 let has_saddr = 1; 228 let enabled_saddr = 1; 229 let maybeAtomic = 1; 230} 231 232class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 233 bit EnableSaddr = 0>: FLAT_Pseudo< 234 opName, 235 (outs regClass:$vdst), 236 !if(EnableSaddr, 237 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), 238 (ins VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 239 " $vdst, "#!if(EnableSaddr, "off", "$vaddr")#!if(EnableSaddr, ", $saddr", ", off")#"$offset$glc$slc$dlc"> { 240 let has_data = 0; 241 let mayLoad = 1; 242 let has_saddr = 1; 243 let enabled_saddr = EnableSaddr; 244 let has_vaddr = !if(EnableSaddr, 0, 1); 245 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 246 let maybeAtomic = 1; 247} 248 249class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0> : FLAT_Pseudo< 250 opName, 251 (outs), 252 !if(EnableSaddr, 253 (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc), 254 (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 255 " "#!if(EnableSaddr, "off", "$vaddr")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { 256 let mayLoad = 0; 257 let mayStore = 1; 258 let has_vdst = 0; 259 let has_saddr = 1; 260 let enabled_saddr = EnableSaddr; 261 let has_vaddr = !if(EnableSaddr, 0, 1); 262 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 263 let maybeAtomic = 1; 264} 265 266multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass> { 267 let is_flat_scratch = 1 in { 268 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass>; 269 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, 1>; 270 } 271} 272 273multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 274 let is_flat_scratch = 1 in { 275 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>; 276 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>; 277 } 278} 279 280class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 281 string asm, list<dag> pattern = []> : 282 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 283 let mayLoad = 1; 284 let mayStore = 1; 285 let has_glc = 0; 286 let glcValue = 0; 287 let has_dlc = 0; 288 let dlcValue = 0; 289 let has_vdst = 0; 290 let maybeAtomic = 1; 291} 292 293class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 294 string asm, list<dag> pattern = []> 295 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 296 let hasPostISelHook = 1; 297 let has_vdst = 1; 298 let glcValue = 1; 299 let dlcValue = 0; 300 let PseudoInstr = NAME # "_RTN"; 301} 302 303multiclass FLAT_Atomic_Pseudo< 304 string opName, 305 RegisterClass vdst_rc, 306 ValueType vt, 307 SDPatternOperator atomic = null_frag, 308 ValueType data_vt = vt, 309 RegisterClass data_rc = vdst_rc, 310 bit isFP = isFloatType<data_vt>.ret> { 311 def "" : FLAT_AtomicNoRet_Pseudo <opName, 312 (outs), 313 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 314 " $vaddr, $vdata$offset$slc">, 315 GlobalSaddrTable<0, opName>, 316 AtomicNoRet <opName, 0> { 317 let PseudoInstr = NAME; 318 let FPAtomic = isFP; 319 let AddedComplexity = -1; // Prefer global atomics if available 320 } 321 322 def _RTN : FLAT_AtomicRet_Pseudo <opName, 323 (outs vdst_rc:$vdst), 324 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 325 " $vdst, $vaddr, $vdata$offset glc$slc", 326 [(set vt:$vdst, 327 (atomic (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, 328 GlobalSaddrTable<0, opName#"_rtn">, 329 AtomicNoRet <opName, 1>{ 330 let FPAtomic = isFP; 331 let AddedComplexity = -1; // Prefer global atomics if available 332 } 333} 334 335multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 336 string opName, 337 RegisterClass vdst_rc, 338 ValueType vt, 339 SDPatternOperator atomic = null_frag, 340 ValueType data_vt = vt, 341 RegisterClass data_rc = vdst_rc, 342 bit isFP = isFloatType<data_vt>.ret> { 343 344 def "" : FLAT_AtomicNoRet_Pseudo <opName, 345 (outs), 346 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 347 " $vaddr, $vdata, off$offset$slc">, 348 GlobalSaddrTable<0, opName>, 349 AtomicNoRet <opName, 0> { 350 let has_saddr = 1; 351 let PseudoInstr = NAME; 352 let FPAtomic = isFP; 353 } 354 355 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 356 (outs), 357 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc), 358 " $vaddr, $vdata, $saddr$offset$slc">, 359 GlobalSaddrTable<1, opName>, 360 AtomicNoRet <opName#"_saddr", 0> { 361 let has_saddr = 1; 362 let enabled_saddr = 1; 363 let PseudoInstr = NAME#"_SADDR"; 364 let FPAtomic = isFP; 365 } 366} 367 368multiclass FLAT_Global_Atomic_Pseudo_RTN< 369 string opName, 370 RegisterClass vdst_rc, 371 ValueType vt, 372 SDPatternOperator atomic = null_frag, 373 ValueType data_vt = vt, 374 RegisterClass data_rc = vdst_rc, 375 bit isFP = isFloatType<data_vt>.ret> { 376 377 def _RTN : FLAT_AtomicRet_Pseudo <opName, 378 (outs vdst_rc:$vdst), 379 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC:$slc), 380 " $vdst, $vaddr, $vdata, off$offset glc$slc", 381 [(set vt:$vdst, 382 (atomic (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$vdata))]>, 383 GlobalSaddrTable<0, opName#"_rtn">, 384 AtomicNoRet <opName, 1> { 385 let has_saddr = 1; 386 let FPAtomic = isFP; 387 } 388 389 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 390 (outs vdst_rc:$vdst), 391 (ins VReg_64:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC:$slc), 392 " $vdst, $vaddr, $vdata, $saddr$offset glc$slc">, 393 GlobalSaddrTable<1, opName#"_rtn">, 394 AtomicNoRet <opName#"_saddr", 1> { 395 let has_saddr = 1; 396 let enabled_saddr = 1; 397 let PseudoInstr = NAME#"_SADDR_RTN"; 398 let FPAtomic = isFP; 399 } 400} 401 402multiclass FLAT_Global_Atomic_Pseudo< 403 string opName, 404 RegisterClass vdst_rc, 405 ValueType vt, 406 SDPatternOperator atomic_rtn = null_frag, 407 SDPatternOperator atomic_no_rtn = null_frag, 408 ValueType data_vt = vt, 409 RegisterClass data_rc = vdst_rc> { 410 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 411 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>; 412 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>; 413 } 414} 415 416//===----------------------------------------------------------------------===// 417// Flat Instructions 418//===----------------------------------------------------------------------===// 419 420def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 421def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 422def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 423def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 424def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 425def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 426def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 427def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 428 429def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 430def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 431def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 432def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 433def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 434def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 435 436let SubtargetPredicate = HasD16LoadStore in { 437def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 438def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 439def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 440def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 441def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 442def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 443 444def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 445def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 446} 447 448defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 449 VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32, 450 v2i32, VReg_64>; 451 452defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 453 VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64, 454 v2i64, VReg_128>; 455 456defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 457 VGPR_32, i32, atomic_swap_flat_32>; 458 459defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 460 VReg_64, i64, atomic_swap_flat_64>; 461 462defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 463 VGPR_32, i32, atomic_load_add_flat_32>; 464 465defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 466 VGPR_32, i32, atomic_load_sub_flat_32>; 467 468defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 469 VGPR_32, i32, atomic_load_min_flat_32>; 470 471defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 472 VGPR_32, i32, atomic_load_umin_flat_32>; 473 474defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 475 VGPR_32, i32, atomic_load_max_flat_32>; 476 477defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 478 VGPR_32, i32, atomic_load_umax_flat_32>; 479 480defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 481 VGPR_32, i32, atomic_load_and_flat_32>; 482 483defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 484 VGPR_32, i32, atomic_load_or_flat_32>; 485 486defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 487 VGPR_32, i32, atomic_load_xor_flat_32>; 488 489defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 490 VGPR_32, i32, atomic_inc_flat_32>; 491 492defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 493 VGPR_32, i32, atomic_dec_flat_32>; 494 495defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 496 VReg_64, i64, atomic_load_add_flat_64>; 497 498defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 499 VReg_64, i64, atomic_load_sub_flat_64>; 500 501defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 502 VReg_64, i64, atomic_load_min_flat_64>; 503 504defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 505 VReg_64, i64, atomic_load_umin_flat_64>; 506 507defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 508 VReg_64, i64, atomic_load_max_flat_64>; 509 510defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 511 VReg_64, i64, atomic_load_umax_flat_64>; 512 513defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 514 VReg_64, i64, atomic_load_and_flat_64>; 515 516defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 517 VReg_64, i64, atomic_load_or_flat_64>; 518 519defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 520 VReg_64, i64, atomic_load_xor_flat_64>; 521 522defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 523 VReg_64, i64, atomic_inc_flat_64>; 524 525defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 526 VReg_64, i64, atomic_dec_flat_64>; 527 528// GFX7-, GFX10-only flat instructions. 529let SubtargetPredicate = isGFX7GFX10 in { 530 531defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 532 VGPR_32, f32, null_frag, v2f32, VReg_64>; 533 534defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 535 VReg_64, f64, null_frag, v2f64, VReg_128>; 536 537defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 538 VGPR_32, f32>; 539 540defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 541 VGPR_32, f32>; 542 543defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 544 VReg_64, f64>; 545 546defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 547 VReg_64, f64>; 548 549} // End SubtargetPredicate = isGFX7GFX10 550 551defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 552defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 553defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 554defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 555defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 556defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 557defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 558defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 559 560defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 561defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 562defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 563defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 564defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 565defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 566let OtherPredicates = [HasGFX10_BEncoding] in 567def GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 568 569defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 570defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 571defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 572defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 573defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 574defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 575let OtherPredicates = [HasGFX10_BEncoding] in 576def GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 577 578defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 579defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 580 581let is_flat_global = 1 in { 582defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 583 VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag, 584 v2i32, VReg_64>; 585 586defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 587 VReg_64, i64, AMDGPUatomic_cmp_swap_global_64, 588 null_frag, 589 v2i64, VReg_128>; 590 591defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 592 VGPR_32, i32, atomic_swap_global_32>; 593 594defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 595 VReg_64, i64, atomic_swap_global_64>; 596 597defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 598 VGPR_32, i32, atomic_load_add_global_32>; 599 600defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 601 VGPR_32, i32, atomic_load_sub_global_32>; 602 603defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 604 VGPR_32, i32, atomic_load_min_global_32>; 605 606defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 607 VGPR_32, i32, atomic_load_umin_global_32>; 608 609defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 610 VGPR_32, i32, atomic_load_max_global_32>; 611 612defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 613 VGPR_32, i32, atomic_load_umax_global_32>; 614 615defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 616 VGPR_32, i32, atomic_load_and_global_32>; 617 618defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 619 VGPR_32, i32, atomic_load_or_global_32>; 620 621defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 622 VGPR_32, i32, atomic_load_xor_global_32>; 623 624defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 625 VGPR_32, i32, atomic_inc_global_32>; 626 627defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 628 VGPR_32, i32, atomic_dec_global_32>; 629 630defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 631 VReg_64, i64, atomic_load_add_global_64>; 632 633defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 634 VReg_64, i64, atomic_load_sub_global_64>; 635 636defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 637 VReg_64, i64, atomic_load_min_global_64>; 638 639defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 640 VReg_64, i64, atomic_load_umin_global_64>; 641 642defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 643 VReg_64, i64, atomic_load_max_global_64>; 644 645defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 646 VReg_64, i64, atomic_load_umax_global_64>; 647 648defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 649 VReg_64, i64, atomic_load_and_global_64>; 650 651defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 652 VReg_64, i64, atomic_load_or_global_64>; 653 654defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 655 VReg_64, i64, atomic_load_xor_global_64>; 656 657defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 658 VReg_64, i64, atomic_inc_global_64>; 659 660defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 661 VReg_64, i64, atomic_dec_global_64>; 662 663let SubtargetPredicate = HasGFX10_BEncoding in 664defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", 665 VGPR_32, i32, atomic_csub_global_32>; 666} // End is_flat_global = 1 667 668 669 670let SubtargetPredicate = HasFlatScratchInsts in { 671defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 672defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 673defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 674defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 675defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 676defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 677defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 678defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 679 680defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32>; 681defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32>; 682defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32>; 683defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32>; 684defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32>; 685defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32>; 686 687defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 688defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 689defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 690defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 691defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 692defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 693 694defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 695defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 696 697} // End SubtargetPredicate = HasFlatScratchInsts 698 699let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 700 defm GLOBAL_ATOMIC_FCMPSWAP : 701 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>; 702 defm GLOBAL_ATOMIC_FMIN : 703 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 704 defm GLOBAL_ATOMIC_FMAX : 705 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 706 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 707 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>; 708 defm GLOBAL_ATOMIC_FMIN_X2 : 709 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 710 defm GLOBAL_ATOMIC_FMAX_X2 : 711 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 712} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 713 714let SubtargetPredicate = HasAtomicFaddInsts, is_flat_global = 1 in { 715 716defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 717 "global_atomic_add_f32", VGPR_32, f32, atomic_fadd_global_noret 718>; 719defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 720 "global_atomic_pk_add_f16", VGPR_32, v2f16, atomic_pk_fadd_global_noret 721>; 722 723} // End SubtargetPredicate = HasAtomicFaddInsts 724 725//===----------------------------------------------------------------------===// 726// Flat Patterns 727//===----------------------------------------------------------------------===// 728 729// Patterns for global loads with no offset. 730class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 731 (vt (node (FLATOffset i64:$vaddr, i16:$offset, i1:$slc))), 732 (inst $vaddr, $offset, 0, 0, $slc) 733>; 734 735class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 736 (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in), 737 (inst $vaddr, $offset, 0, 0, $slc, $in) 738>; 739 740class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 741 (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc), vt:$in), 742 (inst $vaddr, $offset, 0, 0, $slc, $in) 743>; 744 745class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 746 (vt (node (FLATAtomic (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))), 747 (inst $vaddr, $offset, 0, 0, $slc) 748>; 749 750class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 751 (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset, i1:$slc))), 752 (inst $vaddr, $offset, 0, 0, $slc) 753>; 754 755class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 756 (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset, i1:$slc)), 757 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 758>; 759 760class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 761 (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset, i1:$slc)), 762 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 763>; 764 765class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 766 // atomic store follows atomic binop convention so the address comes 767 // first. 768 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 769 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 770>; 771 772class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, RegisterClass rc = VGPR_32> : GCNPat < 773 // atomic store follows atomic binop convention so the address comes 774 // first. 775 (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 776 (inst $vaddr, rc:$data, $offset, 0, 0, $slc) 777>; 778 779class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 780 ValueType data_vt = vt> : GCNPat < 781 (vt (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), 782 (inst $vaddr, $data, $offset, $slc) 783>; 784 785class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 786 (node (FLATAtomic i64:$vaddr, i16:$offset, i1:$slc), vt:$data), 787 (inst $vaddr, $data, $offset, $slc) 788>; 789 790class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 791 ValueType data_vt = vt> : GCNPat < 792 (vt (node (FLATSignedAtomic i64:$vaddr, i16:$offset, i1:$slc), data_vt:$data)), 793 (inst $vaddr, $data, $offset, $slc) 794>; 795 796let OtherPredicates = [HasFlatAddressSpace] in { 797 798def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 799def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 800def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 801def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 802def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 803def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 804def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 805def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 806def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 807def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 808def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 809 810def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 811def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 812 813def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 814def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 815 816foreach vt = Reg32Types.types in { 817def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 818def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 819} 820 821foreach vt = VReg_64.RegTypes in { 822def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt, VReg_64>; 823def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 824} 825 826def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32, VReg_96>; 827 828foreach vt = VReg_128.RegTypes in { 829def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 830def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt, VReg_128>; 831} 832 833def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>; 834def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64, VReg_64>; 835 836def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; 837def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; 838def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; 839def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; 840def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; 841def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; 842def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; 843def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; 844def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; 845def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; 846def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; 847def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; 848def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; 849 850def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; 851def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; 852def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; 853def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; 854def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; 855def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; 856def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; 857def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; 858def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; 859def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; 860def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; 861def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; 862def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; 863 864def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 865def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 866 867let OtherPredicates = [D16PreservesUnusedBits] in { 868def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 869def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 870 871def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 872def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 873def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 874def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 875def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 876def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 877 878def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 879def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 880def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 881def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 882def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 883def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 884} 885 886} // End OtherPredicates = [HasFlatAddressSpace] 887 888let OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 in { 889 890def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 891def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 892def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 893def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 894def : FlatLoadSignedPat <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 895def : FlatLoadSignedPat <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 896def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 897def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 898def : FlatLoadSignedPat <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 899def : FlatLoadSignedPat <GLOBAL_LOAD_USHORT, load_global, i16>; 900 901foreach vt = Reg32Types.types in { 902def : FlatLoadSignedPat <GLOBAL_LOAD_DWORD, load_global, vt>; 903def : FlatStoreSignedPat <GLOBAL_STORE_DWORD, store_global, vt, VGPR_32>; 904} 905 906foreach vt = VReg_64.RegTypes in { 907def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX2, load_global, vt>; 908def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX2, store_global, vt, VReg_64>; 909} 910 911def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 912 913foreach vt = VReg_128.RegTypes in { 914def : FlatLoadSignedPat <GLOBAL_LOAD_DWORDX4, load_global, vt>; 915def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX4, store_global, vt, VReg_128>; 916} 917 918def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 919def : FlatLoadAtomicPat <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 920 921def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i32, VGPR_32>; 922def : FlatStoreSignedPat <GLOBAL_STORE_BYTE, truncstorei8_global, i16, VGPR_32>; 923def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, truncstorei16_global, i32, VGPR_32>; 924def : FlatStoreSignedPat <GLOBAL_STORE_SHORT, store_global, i16, VGPR_32>; 925def : FlatStoreSignedPat <GLOBAL_STORE_DWORDX3, store_global, v3i32, VReg_96>; 926 927let OtherPredicates = [D16PreservesUnusedBits] in { 928def : FlatStoreSignedPat <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 929def : FlatStoreSignedPat <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 930 931def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 932def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 933def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 934def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 935def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 936def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 937 938def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 939def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 940def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 941def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 942def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 943def : FlatSignedLoadPat_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 944} 945 946def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>; 947def : FlatStoreSignedAtomicPat <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64, VReg_64>; 948 949def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; 950def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; 951def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; 952def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; 953def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; 954def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; 955def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; 956def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; 957def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; 958def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; 959def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; 960def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; 961def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; 962def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CSUB_RTN, atomic_csub_global_32, i32>; 963 964def : FlatSignedAtomicPat <GLOBAL_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; 965def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; 966def : FlatSignedAtomicPat <GLOBAL_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; 967def : FlatSignedAtomicPat <GLOBAL_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; 968def : FlatSignedAtomicPat <GLOBAL_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; 969def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; 970def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; 971def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; 972def : FlatSignedAtomicPat <GLOBAL_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; 973def : FlatSignedAtomicPat <GLOBAL_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; 974def : FlatSignedAtomicPat <GLOBAL_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; 975def : FlatSignedAtomicPat <GLOBAL_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; 976def : FlatSignedAtomicPat <GLOBAL_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; 977 978def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_ADD_F32, atomic_fadd_global_noret, f32>; 979def : FlatAtomicPatNoRtn <GLOBAL_ATOMIC_PK_ADD_F16, atomic_pk_fadd_global_noret, v2f16>; 980 981} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 982 983 984//===----------------------------------------------------------------------===// 985// Target 986//===----------------------------------------------------------------------===// 987 988//===----------------------------------------------------------------------===// 989// CI 990//===----------------------------------------------------------------------===// 991 992class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 993 FLAT_Real <op, ps>, 994 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 995 let AssemblerPredicate = isGFX7Only; 996 let DecoderNamespace="GFX7"; 997} 998 999def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1000def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1001def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1002def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1003def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1004def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1005def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1006def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1007 1008def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1009def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1010def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1011def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1012def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1013def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1014 1015multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 1016 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1017 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1018} 1019 1020defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 1021defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 1022defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 1023defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 1024defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 1025defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 1026defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 1027defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 1028defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 1029defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 1030defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 1031defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 1032defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 1033defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 1034defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 1035defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 1036defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 1037defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 1038defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 1039defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 1040defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 1041defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 1042defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 1043defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 1044defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1045defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1046 1047// CI Only flat instructions 1048defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1049defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1050defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1051defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1052defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1053defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1054 1055 1056//===----------------------------------------------------------------------===// 1057// VI 1058//===----------------------------------------------------------------------===// 1059 1060class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> : 1061 FLAT_Real <op, ps>, 1062 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1063 let AssemblerPredicate = isGFX8GFX9; 1064 let DecoderNamespace = "GFX8"; 1065} 1066 1067multiclass FLAT_Real_AllAddr_vi<bits<7> op> { 1068 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>; 1069 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1070} 1071 1072def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1073def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1074def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1075def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1076def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1077def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1078def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1079def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1080 1081def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1082def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1083def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1084def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1085def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1086def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1087def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1088def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1089 1090def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1091def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1092def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1093def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1094def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1095def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1096 1097multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> { 1098 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1099 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1100} 1101 1102multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> : 1103 FLAT_Real_AllAddr_vi<op> { 1104 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1105 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1106} 1107 1108 1109defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1110defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1111defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1112defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1113defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1114defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1115defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1116defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1117defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1118defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1119defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1120defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1121defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1122defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1123defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1124defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1125defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1126defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1127defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1128defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1129defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1130defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1131defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1132defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1133defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1134defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1135 1136defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1137defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1138defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1139defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1140defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1141defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1142defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1143defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1144 1145defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1146defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1147defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1148defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1149defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1150defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1151 1152defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1153defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1154defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1155defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1156defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1157defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1158defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1159defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1160 1161 1162defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1163defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1164defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1165defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1166defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1167defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1168defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1169defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1170defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1171defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1172defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1173defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1174defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1175defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1176defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1177defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1178defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1179defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1180defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1181defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1182defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1183defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1184defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1185defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1186defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1187defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1188 1189defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1190defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1191defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1192defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1193defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1194defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1195defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1196defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1197defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1198defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1199defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1200defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1201defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1202defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1203defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1204defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1205defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1206defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1207defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1208defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1209defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1210defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1211 1212 1213//===----------------------------------------------------------------------===// 1214// GFX10. 1215//===----------------------------------------------------------------------===// 1216 1217class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 1218 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 1219 let AssemblerPredicate = isGFX10Plus; 1220 let DecoderNamespace = "GFX10"; 1221 1222 let Inst{11-0} = offset{11-0}; 1223 let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue); 1224 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 1225 let Inst{55} = 0; 1226} 1227 1228 1229multiclass FLAT_Real_Base_gfx10<bits<7> op> { 1230 def _gfx10 : 1231 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 1232} 1233 1234multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 1235 def _RTN_gfx10 : 1236 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1237} 1238 1239multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 1240 def _SADDR_gfx10 : 1241 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1242} 1243 1244multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 1245 def _SADDR_RTN_gfx10 : 1246 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1247} 1248 1249 1250multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 1251 FLAT_Real_Base_gfx10<op>, 1252 FLAT_Real_SADDR_gfx10<op>; 1253 1254multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 1255 FLAT_Real_Base_gfx10<op>, 1256 FLAT_Real_RTN_gfx10<op>; 1257 1258multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 1259 FLAT_Real_AllAddr_gfx10<op>, 1260 FLAT_Real_RTN_gfx10<op>, 1261 FLAT_Real_SADDR_RTN_gfx10<op>; 1262 1263multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> : 1264 FLAT_Real_RTN_gfx10<op>, 1265 FLAT_Real_SADDR_RTN_gfx10<op>; 1266 1267// ENC_FLAT. 1268defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 1269defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 1270defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 1271defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 1272defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 1273defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 1274defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 1275defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 1276defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 1277defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 1278defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 1279defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 1280defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 1281defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 1282defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 1283defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 1284defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 1285defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 1286defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 1287defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 1288defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 1289defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 1290defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 1291defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 1292defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 1293defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 1294defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 1295defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 1296defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 1297defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 1298defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 1299defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 1300defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 1301defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 1302defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 1303defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 1304defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 1305defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 1306defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 1307defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 1308defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 1309defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 1310defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 1311defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 1312defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 1313defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 1314defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 1315defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 1316defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 1317defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 1318defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 1319defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 1320defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 1321defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 1322 1323 1324// ENC_FLAT_GLBL. 1325defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1326defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1327defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1328defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1329defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1330defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1331defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1332defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1333defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1334defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1335defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1336defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1337defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1338defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1339defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1340defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1341defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1342defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1343defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1344defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1345defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1346defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1347defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 1348defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 1349defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 1350defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 1351defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>; 1352defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 1353defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 1354defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 1355defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 1356defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 1357defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 1358defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 1359defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 1360defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 1361defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 1362defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 1363defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 1364defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 1365defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 1366defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 1367defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 1368defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 1369defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 1370defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 1371defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 1372defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 1373defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 1374defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 1375defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 1376defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 1377defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 1378defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 1379defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 1380defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x016>; 1381defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_Base_gfx10<0x017>; 1382 1383// ENC_FLAT_SCRATCH. 1384defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1385defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1386defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1387defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1388defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1389defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1390defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1391defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1392defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1393defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1394defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1395defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1396defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1397defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1398defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1399defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1400defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1401defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1402defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1403defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1404defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1405defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1406 1407let SubtargetPredicate = HasAtomicFaddInsts in { 1408 1409defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_AllAddr_vi <0x04d>; 1410defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>; 1411 1412} // End SubtargetPredicate = HasAtomicFaddInsts 1413