1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FLATOffset : ComplexPattern<i64, 2, "SelectFlatOffset<false>", [], [SDNPWantRoot], -10>; 10def FLATOffsetSigned : ComplexPattern<i64, 2, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>; 11def ScratchOffset : ComplexPattern<i32, 2, "SelectFlatOffset<true>", [], [SDNPWantRoot], -10>; 12 13def GlobalSAddr : ComplexPattern<i64, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>; 14def ScratchSAddr : ComplexPattern<i32, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>; 15 16//===----------------------------------------------------------------------===// 17// FLAT classes 18//===----------------------------------------------------------------------===// 19 20class FLAT_Pseudo<string opName, dag outs, dag ins, 21 string asmOps, list<dag> pattern=[]> : 22 InstSI<outs, ins, "", pattern>, 23 SIMCInstr<opName, SIEncodingFamily.NONE> { 24 25 let isPseudo = 1; 26 let isCodeGenOnly = 1; 27 28 let FLAT = 1; 29 30 let UseNamedOperandTable = 1; 31 let hasSideEffects = 0; 32 let SchedRW = [WriteVMEM]; 33 34 string Mnemonic = opName; 35 string AsmOperands = asmOps; 36 37 bits<1> is_flat_global = 0; 38 bits<1> is_flat_scratch = 0; 39 40 bits<1> has_vdst = 1; 41 42 // We need to distinguish having saddr and enabling saddr because 43 // saddr is only valid for scratch and global instructions. Pre-gfx9 44 // these bits were reserved, so we also don't necessarily want to 45 // set these bits to the disabled value for the original flat 46 // segment instructions. 47 bits<1> has_saddr = 0; 48 bits<1> enabled_saddr = 0; 49 bits<7> saddr_value = 0; 50 bits<1> has_vaddr = 1; 51 52 bits<1> has_data = 1; 53 bits<1> has_glc = 1; 54 bits<1> glcValue = 0; 55 bits<1> has_dlc = 1; 56 bits<1> dlcValue = 0; 57 58 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 59 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 60 61 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 62 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 63 64 // Internally, FLAT instruction are executed as both an LDS and a 65 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 66 // and are not considered done until both have been decremented. 67 let VM_CNT = 1; 68 let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch)); 69 70 let IsFlatGlobal = is_flat_global; 71 72 let IsFlatScratch = is_flat_scratch; 73} 74 75class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : 76 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 77 Enc64 { 78 79 let isPseudo = 0; 80 let isCodeGenOnly = 0; 81 82 // copy relevant pseudo op flags 83 let SubtargetPredicate = ps.SubtargetPredicate; 84 let AsmMatchConverter = ps.AsmMatchConverter; 85 let OtherPredicates = ps.OtherPredicates; 86 let TSFlags = ps.TSFlags; 87 let UseNamedOperandTable = ps.UseNamedOperandTable; 88 89 // encoding fields 90 bits<8> vaddr; 91 bits<8> vdata; 92 bits<7> saddr; 93 bits<8> vdst; 94 95 bits<1> slc; 96 bits<1> glc; 97 bits<1> dlc; 98 99 // Only valid on gfx9 100 bits<1> lds = 0; // XXX - What does this actually do? 101 102 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 103 bits<2> seg = !if(ps.is_flat_global, 0b10, 104 !if(ps.is_flat_scratch, 0b01, 0)); 105 106 // Signed offset. Highest bit ignored for flat and treated as 12-bit 107 // unsigned for flat accesses. 108 bits<13> offset; 109 bits<1> nv = 0; // XXX - What does this actually do? 110 111 // We don't use tfe right now, and it was removed in gfx9. 112 bits<1> tfe = 0; 113 114 // Only valid on GFX9+ 115 let Inst{12-0} = offset; 116 let Inst{13} = lds; 117 let Inst{15-14} = seg; 118 119 let Inst{16} = !if(ps.has_glc, glc, ps.glcValue); 120 let Inst{17} = slc; 121 let Inst{24-18} = op; 122 let Inst{31-26} = 0x37; // Encoding. 123 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 124 let Inst{47-40} = !if(ps.has_data, vdata, ?); 125 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 126 127 // 54-48 is reserved. 128 let Inst{55} = nv; // nv on GFX9+, TFE before. 129 let Inst{63-56} = !if(ps.has_vdst, vdst, ?); 130} 131 132class GlobalSaddrTable <bit is_saddr, string Name = ""> { 133 bit IsSaddr = is_saddr; 134 string SaddrOp = Name; 135} 136 137// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 138// same encoding value as exec_hi, so it isn't possible to use that if 139// saddr is 32-bit (which isn't handled here yet). 140class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 141 bit HasTiedOutput = 0, 142 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 143 opName, 144 (outs regClass:$vdst), 145 !con( 146 !con( 147 !if(EnableSaddr, 148 (ins SReg_64:$saddr, VGPR_32:$vaddr), 149 (ins VReg_64:$vaddr)), 150 (ins flat_offset:$offset)), 151 // FIXME: Operands with default values do not work with following non-optional operands. 152 !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in), 153 (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))), 154 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { 155 let has_data = 0; 156 let mayLoad = 1; 157 let has_saddr = HasSaddr; 158 let enabled_saddr = EnableSaddr; 159 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 160 let maybeAtomic = 1; 161 162 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 163 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 164} 165 166class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 167 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 168 opName, 169 (outs), 170 !con( 171 !if(EnableSaddr, 172 (ins VGPR_32:$vaddr, vdataClass:$vdata, SReg_64:$saddr), 173 (ins VReg_64:$vaddr, vdataClass:$vdata)), 174 (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc)), 175 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$glc$slc$dlc"> { 176 let mayLoad = 0; 177 let mayStore = 1; 178 let has_vdst = 0; 179 let has_saddr = HasSaddr; 180 let enabled_saddr = EnableSaddr; 181 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 182 let maybeAtomic = 1; 183} 184 185multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 186 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 187 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 188 GlobalSaddrTable<0, opName>; 189 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 190 GlobalSaddrTable<1, opName>; 191 } 192} 193 194class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 195 bit HasTiedOutput = 0, bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 196 opName, 197 (outs regClass:$vdst), 198 !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), 199 (ins flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc), 200 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 201 " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { 202 let is_flat_global = 1; 203 let has_data = 0; 204 let mayLoad = 1; 205 let has_vaddr = 0; 206 let has_saddr = 1; 207 let enabled_saddr = EnableSaddr; 208 let maybeAtomic = 1; 209 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 210 211 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 212 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 213} 214 215multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass, 216 bit HasTiedOutput = 0, bit HasSignedOffset = 0> { 217 def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset>, 218 GlobalSaddrTable<0, opName>; 219 def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset, 1>, 220 GlobalSaddrTable<1, opName>; 221} 222 223multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 224 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 225 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 226 GlobalSaddrTable<0, opName>; 227 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 228 GlobalSaddrTable<1, opName>; 229 } 230} 231 232class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 233 bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 234 opName, 235 (outs), 236 !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), 237 (ins flat_offset:$offset, GLC:$glc, SLC:$slc, DLC:$dlc)), 238 " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { 239 let is_flat_global = 1; 240 let mayLoad = 0; 241 let mayStore = 1; 242 let has_vdst = 0; 243 let has_vaddr = 0; 244 let has_saddr = 1; 245 let enabled_saddr = EnableSaddr; 246 let maybeAtomic = 1; 247 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 248} 249 250multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass, 251 bit HasSignedOffset = 0> { 252 def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset>, 253 GlobalSaddrTable<0, opName>; 254 def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset, 1>, 255 GlobalSaddrTable<1, opName>; 256} 257 258class FlatScratchInst <string sv_op, string mode> { 259 string SVOp = sv_op; 260 string Mode = mode; 261} 262 263class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 264 bit HasTiedOutput = 0, 265 bit EnableSaddr = 0, 266 bit EnableVaddr = !not(EnableSaddr)> 267 : FLAT_Pseudo< 268 opName, 269 (outs regClass:$vdst), 270 !con( 271 !if(EnableSaddr, 272 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 273 !if(EnableVaddr, 274 (ins VGPR_32:$vaddr, flat_offset:$offset), 275 (ins flat_offset:$offset))), 276 !if(HasTiedOutput, (ins GLC:$glc, SLC:$slc, DLC:$dlc, regClass:$vdst_in), 277 (ins GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))), 278 " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { 279 let has_data = 0; 280 let mayLoad = 1; 281 let has_saddr = 1; 282 let enabled_saddr = EnableSaddr; 283 let has_vaddr = EnableVaddr; 284 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")); 285 let maybeAtomic = 1; 286 287 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 288 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 289} 290 291class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0, 292 bit EnableVaddr = !not(EnableSaddr)> : FLAT_Pseudo< 293 opName, 294 (outs), 295 !if(EnableSaddr, 296 (ins vdataClass:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc), 297 !if(EnableVaddr, 298 (ins vdataClass:$vdata, VGPR_32:$vaddr, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc), 299 (ins vdataClass:$vdata, flat_offset:$offset, GLC_0:$glc, SLC_0:$slc, DLC_0:$dlc))), 300 " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$glc$slc$dlc"> { 301 let mayLoad = 0; 302 let mayStore = 1; 303 let has_vdst = 0; 304 let has_saddr = 1; 305 let enabled_saddr = EnableSaddr; 306 let has_vaddr = EnableVaddr; 307 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")); 308 let maybeAtomic = 1; 309} 310 311multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { 312 let is_flat_scratch = 1 in { 313 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>, 314 FlatScratchInst<opName, "SV">; 315 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>, 316 FlatScratchInst<opName, "SS">; 317 318 let SubtargetPredicate = HasFlatScratchSTMode in 319 def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0>, 320 FlatScratchInst<opName, "ST">; 321 } 322} 323 324multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 325 let is_flat_scratch = 1 in { 326 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>, 327 FlatScratchInst<opName, "SV">; 328 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>, 329 FlatScratchInst<opName, "SS">; 330 331 let SubtargetPredicate = HasFlatScratchSTMode in 332 def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0>, 333 FlatScratchInst<opName, "ST">; 334 } 335} 336 337class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 338 string asm, list<dag> pattern = []> : 339 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 340 let mayLoad = 1; 341 let mayStore = 1; 342 let has_glc = 0; 343 let glcValue = 0; 344 let has_dlc = 0; 345 let dlcValue = 0; 346 let has_vdst = 0; 347 let maybeAtomic = 1; 348} 349 350class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 351 string asm, list<dag> pattern = []> 352 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 353 let hasPostISelHook = 1; 354 let has_vdst = 1; 355 let glcValue = 1; 356 let dlcValue = 0; 357 let PseudoInstr = NAME # "_RTN"; 358} 359 360multiclass FLAT_Atomic_Pseudo< 361 string opName, 362 RegisterClass vdst_rc, 363 ValueType vt, 364 SDPatternOperator atomic = null_frag, 365 ValueType data_vt = vt, 366 RegisterClass data_rc = vdst_rc, 367 bit isFP = isFloatType<data_vt>.ret> { 368 def "" : FLAT_AtomicNoRet_Pseudo <opName, 369 (outs), 370 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC_0:$slc), 371 " $vaddr, $vdata$offset$slc">, 372 GlobalSaddrTable<0, opName>, 373 AtomicNoRet <opName, 0> { 374 let PseudoInstr = NAME; 375 let FPAtomic = isFP; 376 let AddedComplexity = -1; // Prefer global atomics if available 377 } 378 379 def _RTN : FLAT_AtomicRet_Pseudo <opName, 380 (outs vdst_rc:$vdst), 381 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc), 382 " $vdst, $vaddr, $vdata$offset$glc1$slc", 383 [(set vt:$vdst, 384 (atomic (FLATOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, 385 GlobalSaddrTable<0, opName#"_rtn">, 386 AtomicNoRet <opName, 1>{ 387 let FPAtomic = isFP; 388 let AddedComplexity = -1; // Prefer global atomics if available 389 } 390} 391 392multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 393 string opName, 394 RegisterClass vdst_rc, 395 ValueType vt, 396 SDPatternOperator atomic = null_frag, 397 ValueType data_vt = vt, 398 RegisterClass data_rc = vdst_rc, 399 bit isFP = isFloatType<data_vt>.ret> { 400 401 def "" : FLAT_AtomicNoRet_Pseudo <opName, 402 (outs), 403 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, SLC_0:$slc), 404 " $vaddr, $vdata, off$offset$slc">, 405 GlobalSaddrTable<0, opName>, 406 AtomicNoRet <opName, 0> { 407 let has_saddr = 1; 408 let PseudoInstr = NAME; 409 let FPAtomic = isFP; 410 } 411 412 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 413 (outs), 414 (ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, SLC_0:$slc), 415 " $vaddr, $vdata, $saddr$offset$slc">, 416 GlobalSaddrTable<1, opName>, 417 AtomicNoRet <opName#"_saddr", 0> { 418 let has_saddr = 1; 419 let enabled_saddr = 1; 420 let PseudoInstr = NAME#"_SADDR"; 421 let FPAtomic = isFP; 422 } 423} 424 425multiclass FLAT_Global_Atomic_Pseudo_RTN< 426 string opName, 427 RegisterClass vdst_rc, 428 ValueType vt, 429 SDPatternOperator atomic = null_frag, 430 ValueType data_vt = vt, 431 RegisterClass data_rc = vdst_rc, 432 bit isFP = isFloatType<data_vt>.ret> { 433 434 def _RTN : FLAT_AtomicRet_Pseudo <opName, 435 (outs vdst_rc:$vdst), 436 (ins VReg_64:$vaddr, data_rc:$vdata, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc), 437 " $vdst, $vaddr, $vdata, off$offset$glc1$slc", 438 [(set vt:$vdst, 439 (atomic (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$vdata))]>, 440 GlobalSaddrTable<0, opName#"_rtn">, 441 AtomicNoRet <opName, 1> { 442 let has_saddr = 1; 443 let FPAtomic = isFP; 444 } 445 446 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 447 (outs vdst_rc:$vdst), 448 (ins VGPR_32:$vaddr, data_rc:$vdata, SReg_64:$saddr, flat_offset:$offset, GLC_1:$glc1, SLC_0:$slc), 449 " $vdst, $vaddr, $vdata, $saddr$offset$glc1$slc">, 450 GlobalSaddrTable<1, opName#"_rtn">, 451 AtomicNoRet <opName#"_saddr", 1> { 452 let has_saddr = 1; 453 let enabled_saddr = 1; 454 let PseudoInstr = NAME#"_SADDR_RTN"; 455 let FPAtomic = isFP; 456 } 457} 458 459multiclass FLAT_Global_Atomic_Pseudo< 460 string opName, 461 RegisterClass vdst_rc, 462 ValueType vt, 463 SDPatternOperator atomic_rtn = null_frag, 464 SDPatternOperator atomic_no_rtn = null_frag, 465 ValueType data_vt = vt, 466 RegisterClass data_rc = vdst_rc> { 467 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 468 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>; 469 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>; 470 } 471} 472 473//===----------------------------------------------------------------------===// 474// Flat Instructions 475//===----------------------------------------------------------------------===// 476 477def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 478def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 479def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 480def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 481def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 482def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 483def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 484def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 485 486def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 487def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 488def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 489def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 490def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 491def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 492 493let SubtargetPredicate = HasD16LoadStore in { 494def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 495def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 496def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 497def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 498def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 499def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 500 501def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 502def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 503} 504 505defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 506 VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32, 507 v2i32, VReg_64>; 508 509defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 510 VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64, 511 v2i64, VReg_128>; 512 513defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 514 VGPR_32, i32, atomic_swap_flat_32>; 515 516defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 517 VReg_64, i64, atomic_swap_flat_64>; 518 519defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 520 VGPR_32, i32, atomic_load_add_flat_32>; 521 522defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 523 VGPR_32, i32, atomic_load_sub_flat_32>; 524 525defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 526 VGPR_32, i32, atomic_load_min_flat_32>; 527 528defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 529 VGPR_32, i32, atomic_load_umin_flat_32>; 530 531defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 532 VGPR_32, i32, atomic_load_max_flat_32>; 533 534defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 535 VGPR_32, i32, atomic_load_umax_flat_32>; 536 537defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 538 VGPR_32, i32, atomic_load_and_flat_32>; 539 540defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 541 VGPR_32, i32, atomic_load_or_flat_32>; 542 543defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 544 VGPR_32, i32, atomic_load_xor_flat_32>; 545 546defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 547 VGPR_32, i32, atomic_inc_flat_32>; 548 549defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 550 VGPR_32, i32, atomic_dec_flat_32>; 551 552defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 553 VReg_64, i64, atomic_load_add_flat_64>; 554 555defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 556 VReg_64, i64, atomic_load_sub_flat_64>; 557 558defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 559 VReg_64, i64, atomic_load_min_flat_64>; 560 561defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 562 VReg_64, i64, atomic_load_umin_flat_64>; 563 564defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 565 VReg_64, i64, atomic_load_max_flat_64>; 566 567defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 568 VReg_64, i64, atomic_load_umax_flat_64>; 569 570defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 571 VReg_64, i64, atomic_load_and_flat_64>; 572 573defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 574 VReg_64, i64, atomic_load_or_flat_64>; 575 576defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 577 VReg_64, i64, atomic_load_xor_flat_64>; 578 579defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 580 VReg_64, i64, atomic_inc_flat_64>; 581 582defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 583 VReg_64, i64, atomic_dec_flat_64>; 584 585// GFX7-, GFX10-only flat instructions. 586let SubtargetPredicate = isGFX7GFX10 in { 587 588defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 589 VGPR_32, f32, null_frag, v2f32, VReg_64>; 590 591defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 592 VReg_64, f64, null_frag, v2f64, VReg_128>; 593 594defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 595 VGPR_32, f32>; 596 597defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 598 VGPR_32, f32>; 599 600defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 601 VReg_64, f64>; 602 603defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 604 VReg_64, f64>; 605 606} // End SubtargetPredicate = isGFX7GFX10 607 608defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 609defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 610defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 611defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 612defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 613defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 614defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 615defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 616 617defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 618defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 619defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 620defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 621defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 622defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 623let OtherPredicates = [HasGFX10_BEncoding] in 624defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 625 626defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 627defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 628defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 629defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 630defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 631defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 632let OtherPredicates = [HasGFX10_BEncoding] in 633defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 634 635defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 636defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 637 638let is_flat_global = 1 in { 639defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 640 VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag, 641 v2i32, VReg_64>; 642 643defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 644 VReg_64, i64, AMDGPUatomic_cmp_swap_global_64, 645 null_frag, 646 v2i64, VReg_128>; 647 648defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 649 VGPR_32, i32, atomic_swap_global_32>; 650 651defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 652 VReg_64, i64, atomic_swap_global_64>; 653 654defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 655 VGPR_32, i32, atomic_load_add_global_32>; 656 657defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 658 VGPR_32, i32, atomic_load_sub_global_32>; 659 660defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 661 VGPR_32, i32, atomic_load_min_global_32>; 662 663defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 664 VGPR_32, i32, atomic_load_umin_global_32>; 665 666defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 667 VGPR_32, i32, atomic_load_max_global_32>; 668 669defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 670 VGPR_32, i32, atomic_load_umax_global_32>; 671 672defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 673 VGPR_32, i32, atomic_load_and_global_32>; 674 675defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 676 VGPR_32, i32, atomic_load_or_global_32>; 677 678defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 679 VGPR_32, i32, atomic_load_xor_global_32>; 680 681defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 682 VGPR_32, i32, atomic_inc_global_32>; 683 684defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 685 VGPR_32, i32, atomic_dec_global_32>; 686 687defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 688 VReg_64, i64, atomic_load_add_global_64>; 689 690defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 691 VReg_64, i64, atomic_load_sub_global_64>; 692 693defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 694 VReg_64, i64, atomic_load_min_global_64>; 695 696defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 697 VReg_64, i64, atomic_load_umin_global_64>; 698 699defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 700 VReg_64, i64, atomic_load_max_global_64>; 701 702defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 703 VReg_64, i64, atomic_load_umax_global_64>; 704 705defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 706 VReg_64, i64, atomic_load_and_global_64>; 707 708defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 709 VReg_64, i64, atomic_load_or_global_64>; 710 711defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 712 VReg_64, i64, atomic_load_xor_global_64>; 713 714defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 715 VReg_64, i64, atomic_inc_global_64>; 716 717defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 718 VReg_64, i64, atomic_dec_global_64>; 719 720let SubtargetPredicate = HasGFX10_BEncoding in 721defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", 722 VGPR_32, i32, int_amdgcn_global_atomic_csub>; 723} // End is_flat_global = 1 724 725 726 727let SubtargetPredicate = HasFlatScratchInsts in { 728defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 729defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 730defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 731defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 732defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 733defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 734defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 735defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 736 737defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; 738defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; 739defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; 740defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; 741defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; 742defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; 743 744defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 745defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 746defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 747defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 748defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 749defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 750 751defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 752defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 753 754} // End SubtargetPredicate = HasFlatScratchInsts 755 756let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 757 defm GLOBAL_ATOMIC_FCMPSWAP : 758 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>; 759 defm GLOBAL_ATOMIC_FMIN : 760 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 761 defm GLOBAL_ATOMIC_FMAX : 762 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 763 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 764 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>; 765 defm GLOBAL_ATOMIC_FMIN_X2 : 766 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 767 defm GLOBAL_ATOMIC_FMAX_X2 : 768 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 769} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 770 771let is_flat_global = 1 in { 772let OtherPredicates = [HasAtomicFaddInsts] in { 773 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 774 "global_atomic_add_f32", VGPR_32, f32 775 >; 776 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 777 "global_atomic_pk_add_f16", VGPR_32, v2f16 778 >; 779} // End OtherPredicates = [HasAtomicFaddInsts] 780} // End is_flat_global = 1 781 782//===----------------------------------------------------------------------===// 783// Flat Patterns 784//===----------------------------------------------------------------------===// 785 786// Patterns for global loads with no offset. 787class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 788 (vt (node (FLATOffset i64:$vaddr, i16:$offset))), 789 (inst $vaddr, $offset) 790>; 791 792class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 793 (node (FLATOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), 794 (inst $vaddr, $offset, 0, 0, 0, $in) 795>; 796 797class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 798 (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset), vt:$in), 799 (inst $vaddr, $offset, 0, 0, 0, $in) 800>; 801 802class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 803 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)), 804 (inst $saddr, $voffset, $offset, 0, 0, 0, $in) 805>; 806 807class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 808 (vt (node (FLATOffsetSigned (i64 VReg_64:$vaddr), i16:$offset))), 809 (inst $vaddr, $offset) 810>; 811 812class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 813 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))), 814 (inst $saddr, $voffset, $offset, 0, 0, 0) 815>; 816 817class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 818 ValueType vt> : GCNPat < 819 (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset)), 820 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 821>; 822 823class GlobalAtomicStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 824 ValueType vt> : GCNPat < 825 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data), 826 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 827>; 828 829class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 830 ValueType vt, ValueType data_vt = vt> : GCNPat < 831 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), data_vt:$data)), 832 (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset) 833>; 834 835class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 836 ValueType vt> : GCNPat < 837 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data), 838 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 839>; 840 841class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 842 (node vt:$data, (FLATOffset i64:$vaddr, i16:$offset)), 843 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 844>; 845 846class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 847 (node vt:$data, (FLATOffsetSigned i64:$vaddr, i16:$offset)), 848 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 849>; 850 851class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 852 // atomic store follows atomic binop convention so the address comes 853 // first. 854 (node (FLATOffset i64:$vaddr, i16:$offset), vt:$data), 855 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 856>; 857 858class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, 859 ValueType vt, ValueType data_vt = vt> : GCNPat < 860 // atomic store follows atomic binop convention so the address comes 861 // first. 862 (node (FLATOffset i64:$vaddr, i16:$offset), data_vt:$data), 863 (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 864>; 865 866class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 867 ValueType data_vt = vt> : GCNPat < 868 (vt (node (FLATOffset i64:$vaddr, i16:$offset), data_vt:$data)), 869 (inst $vaddr, $data, $offset) 870>; 871 872class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 873 (node (FLATOffset i64:$vaddr, i16:$offset), vt:$data), 874 (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 875>; 876 877class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 878 (node (FLATOffsetSigned i64:$vaddr, i16:$offset), vt:$data), 879 (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 880>; 881 882class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 883 ValueType data_vt = vt> : GCNPat < 884 (vt (node (FLATOffsetSigned i64:$vaddr, i16:$offset), data_vt:$data)), 885 (inst $vaddr, $data, $offset) 886>; 887 888class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 889 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset))), 890 (inst $vaddr, $offset) 891>; 892 893class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 894 (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in), 895 (inst $vaddr, $offset, 0, 0, 0, $in) 896>; 897 898class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 899 (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset)), 900 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset) 901>; 902 903class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 904 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset))), 905 (inst $saddr, $offset) 906>; 907 908class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 909 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), 910 (inst $saddr, $offset, 0, 0, 0, $in) 911>; 912 913class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 914 ValueType vt> : GCNPat < 915 (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset)), 916 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 917>; 918 919let OtherPredicates = [HasFlatAddressSpace] in { 920 921def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 922def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 923def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 924def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 925def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 926def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 927def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 928def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 929def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 930def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 931def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 932 933def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 934def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 935 936def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 937def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 938 939foreach vt = Reg32Types.types in { 940def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 941def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 942} 943 944foreach vt = VReg_64.RegTypes in { 945def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>; 946def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 947} 948 949def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; 950 951foreach vt = VReg_128.RegTypes in { 952def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 953def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>; 954} 955 956def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>; 957def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>; 958 959def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; 960def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; 961def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; 962def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; 963def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; 964def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; 965def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; 966def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; 967def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; 968def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; 969def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; 970def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; 971def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; 972 973def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; 974def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; 975def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; 976def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; 977def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; 978def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; 979def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; 980def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; 981def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; 982def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; 983def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; 984def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; 985def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; 986 987def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 988def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 989 990let OtherPredicates = [D16PreservesUnusedBits] in { 991def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 992def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 993 994def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 995def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 996def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 997def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 998def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 999def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 1000 1001def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 1002def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 1003def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 1004def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 1005def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 1006def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 1007} 1008 1009} // End OtherPredicates = [HasFlatAddressSpace] 1010 1011 1012multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1013 def : FlatLoadSignedPat <inst, node, vt> { 1014 let AddedComplexity = 10; 1015 } 1016 1017 def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1018 let AddedComplexity = 11; 1019 } 1020} 1021 1022multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1023 def : FlatSignedLoadPat_D16 <inst, node, vt> { 1024 let AddedComplexity = 10; 1025 } 1026 1027 def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1028 let AddedComplexity = 11; 1029 } 1030} 1031 1032multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1033 ValueType vt> { 1034 def : FlatStoreSignedPat <inst, node, vt> { 1035 let AddedComplexity = 10; 1036 } 1037 1038 def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1039 let AddedComplexity = 11; 1040 } 1041} 1042 1043// Deal with swapped operands for atomic_store vs. regular store 1044multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1045 def : FlatStoreSignedAtomicPat <inst, node, vt> { 1046 let AddedComplexity = 10; 1047 } 1048 1049 def : GlobalAtomicStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1050 let AddedComplexity = 11; 1051 } 1052} 1053 1054multiclass GlobalFLATAtomicPats<string nortn_inst_name, SDPatternOperator node, 1055 ValueType vt, ValueType data_vt = vt> { 1056 def : FlatSignedAtomicPat <!cast<FLAT_Pseudo>(nortn_inst_name#"_RTN"), node, vt, data_vt> { 1057 let AddedComplexity = 10; 1058 } 1059 1060 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(nortn_inst_name#"_SADDR_RTN"), node, vt, data_vt> { 1061 let AddedComplexity = 11; 1062 } 1063} 1064 1065multiclass GlobalFLATNoRtnAtomicPats<FLAT_Pseudo inst, SDPatternOperator node, 1066 ValueType vt> { 1067 def : FlatSignedAtomicPatNoRtn <inst, node, vt> { 1068 let AddedComplexity = 10; 1069 } 1070 1071 def : GlobalAtomicNoRtnSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1072 let AddedComplexity = 11; 1073 } 1074} 1075 1076multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1077 def : ScratchLoadSignedPat <inst, node, vt> { 1078 let AddedComplexity = 25; 1079 } 1080 1081 def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1082 let AddedComplexity = 26; 1083 } 1084} 1085 1086multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1087 ValueType vt> { 1088 def : ScratchStoreSignedPat <inst, node, vt> { 1089 let AddedComplexity = 25; 1090 } 1091 1092 def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1093 let AddedComplexity = 26; 1094 } 1095} 1096 1097multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1098 def : ScratchLoadSignedPat_D16 <inst, node, vt> { 1099 let AddedComplexity = 25; 1100 } 1101 1102 def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1103 let AddedComplexity = 26; 1104 } 1105} 1106 1107let OtherPredicates = [HasFlatGlobalInsts] in { 1108 1109defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 1110defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 1111defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 1112defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 1113defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 1114defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 1115defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 1116defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 1117defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 1118defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; 1119 1120foreach vt = Reg32Types.types in { 1121defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; 1122defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>; 1123} 1124 1125foreach vt = VReg_64.RegTypes in { 1126defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>; 1127defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>; 1128} 1129 1130defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 1131 1132foreach vt = VReg_128.RegTypes in { 1133defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>; 1134defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>; 1135} 1136 1137// There is no distinction for atomic load lowering during selection; 1138// the memory legalizer will set the cache bits and insert the 1139// appropriate waits. 1140defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 1141defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 1142 1143defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; 1144defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; 1145defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; 1146defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; 1147defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; 1148 1149let OtherPredicates = [D16PreservesUnusedBits] in { 1150defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 1151defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 1152 1153defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 1154defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 1155defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 1156defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 1157defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 1158defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 1159 1160defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 1161defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 1162defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 1163defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 1164defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 1165defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 1166} 1167 1168defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>; 1169defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64>; 1170 1171defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", atomic_load_add_global_32, i32>; 1172defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", atomic_load_sub_global_32, i32>; 1173defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", atomic_inc_global_32, i32>; 1174defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", atomic_dec_global_32, i32>; 1175defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", atomic_load_and_global_32, i32>; 1176defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", atomic_load_max_global_32, i32>; 1177defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", atomic_load_umax_global_32, i32>; 1178defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", atomic_load_min_global_32, i32>; 1179defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", atomic_load_umin_global_32, i32>; 1180defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", atomic_load_or_global_32, i32>; 1181defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", atomic_swap_global_32, i32>; 1182defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; 1183defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", atomic_load_xor_global_32, i32>; 1184defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CSUB", int_amdgcn_global_atomic_csub, i32>; 1185 1186defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", atomic_load_add_global_64, i64>; 1187defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", atomic_load_sub_global_64, i64>; 1188defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", atomic_inc_global_64, i64>; 1189defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", atomic_dec_global_64, i64>; 1190defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", atomic_load_and_global_64, i64>; 1191defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", atomic_load_max_global_64, i64>; 1192defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", atomic_load_umax_global_64, i64>; 1193defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", atomic_load_min_global_64, i64>; 1194defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", atomic_load_umin_global_64, i64>; 1195defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", atomic_load_or_global_64, i64>; 1196defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", atomic_swap_global_64, i64>; 1197defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; 1198defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", atomic_load_xor_global_64, i64>; 1199 1200let OtherPredicates = [HasAtomicFaddInsts] in { 1201defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_ADD_F32, atomic_load_fadd_global_noret_32, f32>; 1202defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_PK_ADD_F16, atomic_load_fadd_v2f16_global_noret_32, v2f16>; 1203} 1204 1205} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 1206 1207let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in { 1208 1209defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>; 1210defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>; 1211defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>; 1212defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; 1213defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; 1214defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; 1215defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; 1216defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; 1217defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; 1218defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; 1219 1220foreach vt = Reg32Types.types in { 1221defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; 1222defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; 1223} 1224 1225foreach vt = VReg_64.RegTypes in { 1226defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>; 1227defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>; 1228} 1229 1230defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>; 1231 1232foreach vt = VReg_128.RegTypes in { 1233defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>; 1234defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>; 1235} 1236 1237defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>; 1238defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>; 1239defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; 1240defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; 1241defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; 1242 1243let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { 1244defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; 1245defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; 1246 1247defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>; 1248defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>; 1249defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>; 1250defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>; 1251defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>; 1252defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>; 1253 1254defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>; 1255defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>; 1256defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>; 1257defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>; 1258defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>; 1259defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>; 1260} 1261 1262} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch] 1263 1264//===----------------------------------------------------------------------===// 1265// Target 1266//===----------------------------------------------------------------------===// 1267 1268//===----------------------------------------------------------------------===// 1269// CI 1270//===----------------------------------------------------------------------===// 1271 1272class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 1273 FLAT_Real <op, ps>, 1274 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 1275 let AssemblerPredicate = isGFX7Only; 1276 let DecoderNamespace="GFX7"; 1277} 1278 1279def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1280def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1281def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1282def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1283def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1284def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1285def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1286def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1287 1288def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1289def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1290def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1291def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1292def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1293def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1294 1295multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 1296 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1297 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1298} 1299 1300defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 1301defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 1302defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 1303defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 1304defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 1305defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 1306defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 1307defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 1308defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 1309defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 1310defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 1311defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 1312defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 1313defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 1314defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 1315defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 1316defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 1317defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 1318defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 1319defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 1320defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 1321defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 1322defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 1323defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 1324defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1325defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1326 1327// CI Only flat instructions 1328defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1329defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1330defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1331defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1332defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1333defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1334 1335 1336//===----------------------------------------------------------------------===// 1337// VI 1338//===----------------------------------------------------------------------===// 1339 1340class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> : 1341 FLAT_Real <op, ps>, 1342 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1343 let AssemblerPredicate = isGFX8GFX9; 1344 let DecoderNamespace = "GFX8"; 1345} 1346 1347multiclass FLAT_Real_AllAddr_vi<bits<7> op> { 1348 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)>; 1349 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1350} 1351 1352def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1353def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1354def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1355def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1356def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1357def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1358def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1359def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1360 1361def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1362def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1363def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1364def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1365def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1366def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1367def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1368def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1369 1370def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1371def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1372def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1373def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1374def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1375def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1376 1377multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> { 1378 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1379 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1380} 1381 1382multiclass FLAT_Global_Real_Atomics_vi<bits<7> op> : 1383 FLAT_Real_AllAddr_vi<op> { 1384 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1385 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1386} 1387 1388 1389defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1390defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1391defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1392defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1393defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1394defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1395defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1396defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1397defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1398defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1399defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1400defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1401defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1402defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1403defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1404defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1405defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1406defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1407defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1408defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1409defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1410defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1411defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1412defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1413defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1414defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1415 1416defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1417defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1418defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1419defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1420defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1421defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1422defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1423defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1424 1425defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1426defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1427defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1428defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1429defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1430defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1431 1432defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1433defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1434defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1435defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1436defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1437defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1438defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1439defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1440 1441 1442defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1443defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1444defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1445defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1446defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1447defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1448defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1449defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1450defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1451defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1452defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1453defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1454defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1455defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1456defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1457defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1458defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1459defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1460defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1461defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1462defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1463defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1464defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1465defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1466defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1467defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1468 1469defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1470defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1471defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1472defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1473defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1474defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1475defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1476defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1477defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1478defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1479defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1480defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1481defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1482defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1483defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1484defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1485defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1486defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1487defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1488defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1489defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1490defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1491 1492 1493//===----------------------------------------------------------------------===// 1494// GFX10. 1495//===----------------------------------------------------------------------===// 1496 1497class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 1498 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 1499 let AssemblerPredicate = isGFX10Plus; 1500 let DecoderNamespace = "GFX10"; 1501 1502 let Inst{11-0} = offset{11-0}; 1503 let Inst{12} = !if(ps.has_dlc, dlc, ps.dlcValue); 1504 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 1505 let Inst{55} = 0; 1506} 1507 1508 1509multiclass FLAT_Real_Base_gfx10<bits<7> op> { 1510 def _gfx10 : 1511 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 1512} 1513 1514multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 1515 def _RTN_gfx10 : 1516 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1517} 1518 1519multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 1520 def _SADDR_gfx10 : 1521 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1522} 1523 1524multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 1525 def _SADDR_RTN_gfx10 : 1526 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1527} 1528 1529multiclass FLAT_Real_ST_gfx10<bits<7> op> { 1530 def _ST_gfx10 : 1531 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> { 1532 let Inst{54-48} = !cast<int>(EXEC_HI.HWEncoding); 1533 let OtherPredicates = [HasFlatScratchSTMode]; 1534 } 1535} 1536 1537multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 1538 FLAT_Real_Base_gfx10<op>, 1539 FLAT_Real_SADDR_gfx10<op>; 1540 1541multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 1542 FLAT_Real_Base_gfx10<op>, 1543 FLAT_Real_RTN_gfx10<op>; 1544 1545multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 1546 FLAT_Real_AllAddr_gfx10<op>, 1547 FLAT_Real_RTN_gfx10<op>, 1548 FLAT_Real_SADDR_RTN_gfx10<op>; 1549 1550multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> : 1551 FLAT_Real_RTN_gfx10<op>, 1552 FLAT_Real_SADDR_RTN_gfx10<op>; 1553 1554multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> : 1555 FLAT_Real_Base_gfx10<op>, 1556 FLAT_Real_SADDR_gfx10<op>, 1557 FLAT_Real_ST_gfx10<op>; 1558 1559// ENC_FLAT. 1560defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 1561defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 1562defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 1563defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 1564defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 1565defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 1566defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 1567defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 1568defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 1569defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 1570defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 1571defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 1572defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 1573defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 1574defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 1575defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 1576defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 1577defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 1578defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 1579defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 1580defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 1581defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 1582defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 1583defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 1584defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 1585defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 1586defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 1587defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 1588defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 1589defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 1590defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 1591defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 1592defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 1593defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 1594defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 1595defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 1596defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 1597defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 1598defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 1599defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 1600defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 1601defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 1602defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 1603defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 1604defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 1605defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 1606defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 1607defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 1608defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 1609defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 1610defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 1611defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 1612defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 1613defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 1614 1615 1616// ENC_FLAT_GLBL. 1617defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1618defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1619defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1620defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1621defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1622defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1623defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1624defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1625defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1626defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1627defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1628defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1629defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1630defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1631defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1632defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1633defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1634defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1635defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1636defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1637defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1638defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1639defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 1640defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 1641defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 1642defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 1643defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>; 1644defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 1645defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 1646defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 1647defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 1648defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 1649defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 1650defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 1651defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 1652defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 1653defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 1654defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 1655defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 1656defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 1657defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 1658defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 1659defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 1660defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 1661defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 1662defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 1663defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 1664defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 1665defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 1666defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 1667defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 1668defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 1669defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 1670defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 1671defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 1672defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; 1673defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; 1674 1675// ENC_FLAT_SCRATCH. 1676defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; 1677defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; 1678defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>; 1679defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>; 1680defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>; 1681defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>; 1682defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>; 1683defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>; 1684defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>; 1685defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>; 1686defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>; 1687defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>; 1688defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>; 1689defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>; 1690defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>; 1691defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>; 1692defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>; 1693defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>; 1694defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; 1695defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; 1696defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; 1697defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; 1698 1699let SubtargetPredicate = HasAtomicFaddInsts in { 1700 1701defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_AllAddr_vi <0x04d>; 1702defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Real_AllAddr_vi <0x04e>; 1703 1704} // End SubtargetPredicate = HasAtomicFaddInsts 1705