1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>; 10def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>; 11def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>; 12 13def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>; 14def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>; 15 16//===----------------------------------------------------------------------===// 17// FLAT classes 18//===----------------------------------------------------------------------===// 19 20class FLAT_Pseudo<string opName, dag outs, dag ins, 21 string asmOps, list<dag> pattern=[]> : 22 InstSI<outs, ins, "", pattern>, 23 SIMCInstr<opName, SIEncodingFamily.NONE> { 24 25 let isPseudo = 1; 26 let isCodeGenOnly = 1; 27 28 let FLAT = 1; 29 30 let UseNamedOperandTable = 1; 31 let hasSideEffects = 0; 32 let SchedRW = [WriteVMEM]; 33 34 string Mnemonic = opName; 35 string AsmOperands = asmOps; 36 37 bits<1> is_flat_global = 0; 38 bits<1> is_flat_scratch = 0; 39 40 bits<1> has_vdst = 1; 41 42 // We need to distinguish having saddr and enabling saddr because 43 // saddr is only valid for scratch and global instructions. Pre-gfx9 44 // these bits were reserved, so we also don't necessarily want to 45 // set these bits to the disabled value for the original flat 46 // segment instructions. 47 bits<1> has_saddr = 0; 48 bits<1> enabled_saddr = 0; 49 bits<7> saddr_value = 0; 50 bits<1> has_vaddr = 1; 51 52 bits<1> has_data = 1; 53 bits<1> has_glc = 1; 54 bits<1> glcValue = 0; 55 bits<1> has_dlc = 1; 56 bits<1> dlcValue = 0; 57 bits<1> has_sccb = 1; 58 bits<1> sccbValue = 0; 59 60 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 61 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 62 63 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 64 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 65 66 // Internally, FLAT instruction are executed as both an LDS and a 67 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 68 // and are not considered done until both have been decremented. 69 let VM_CNT = 1; 70 let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch)); 71 72 let FlatGlobal = is_flat_global; 73 74 let FlatScratch = is_flat_scratch; 75} 76 77class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : 78 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 79 Enc64 { 80 81 let isPseudo = 0; 82 let isCodeGenOnly = 0; 83 84 let FLAT = 1; 85 86 // copy relevant pseudo op flags 87 let SubtargetPredicate = ps.SubtargetPredicate; 88 let AsmMatchConverter = ps.AsmMatchConverter; 89 let OtherPredicates = ps.OtherPredicates; 90 let TSFlags = ps.TSFlags; 91 let UseNamedOperandTable = ps.UseNamedOperandTable; 92 let SchedRW = ps.SchedRW; 93 let mayLoad = ps.mayLoad; 94 let mayStore = ps.mayStore; 95 let IsAtomicRet = ps.IsAtomicRet; 96 let IsAtomicNoRet = ps.IsAtomicNoRet; 97 let VM_CNT = ps.VM_CNT; 98 let LGKM_CNT = ps.LGKM_CNT; 99 100 // encoding fields 101 bits<8> vaddr; 102 bits<10> vdata; 103 bits<7> saddr; 104 bits<10> vdst; 105 106 bits<5> cpol; 107 108 // Only valid on gfx9 109 bits<1> lds = 0; // XXX - What does this actually do? 110 111 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 112 bits<2> seg = !if(ps.is_flat_global, 0b10, 113 !if(ps.is_flat_scratch, 0b01, 0)); 114 115 // Signed offset. Highest bit ignored for flat and treated as 12-bit 116 // unsigned for flat accesses. 117 bits<13> offset; 118 // GFX90A+ only: instruction uses AccVGPR for data 119 bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0)); 120 121 // We don't use tfe right now, and it was removed in gfx9. 122 bits<1> tfe = 0; 123 124 // Only valid on GFX9+ 125 let Inst{12-0} = offset; 126 let Inst{13} = lds; 127 let Inst{15-14} = seg; 128 129 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 130 let Inst{17} = cpol{CPolBit.SLC}; 131 let Inst{24-18} = op; 132 let Inst{31-26} = 0x37; // Encoding. 133 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 134 let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?); 135 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 136 137 // 54-48 is reserved. 138 let Inst{55} = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A. 139 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?); 140} 141 142class GlobalSaddrTable <bit is_saddr, string Name = ""> { 143 bit IsSaddr = is_saddr; 144 string SaddrOp = Name; 145} 146 147// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 148// same encoding value as exec_hi, so it isn't possible to use that if 149// saddr is 32-bit (which isn't handled here yet). 150class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 151 bit HasTiedOutput = 0, 152 bit HasSaddr = 0, bit EnableSaddr = 0, 153 RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo< 154 opName, 155 (outs vdata_op:$vdst), 156 !con( 157 !con( 158 !if(EnableSaddr, 159 (ins SReg_64:$saddr, VGPR_32:$vaddr), 160 (ins VReg_64:$vaddr)), 161 (ins flat_offset:$offset)), 162 // FIXME: Operands with default values do not work with following non-optional operands. 163 !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), 164 (ins CPol_0:$cpol))), 165 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 166 let has_data = 0; 167 let mayLoad = 1; 168 let has_saddr = HasSaddr; 169 let enabled_saddr = EnableSaddr; 170 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 171 let maybeAtomic = 1; 172 173 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 174 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 175} 176 177class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 178 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 179 opName, 180 (outs), 181 !con( 182 !if(EnableSaddr, 183 (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr), 184 (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)), 185 (ins flat_offset:$offset, CPol_0:$cpol)), 186 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 187 let mayLoad = 0; 188 let mayStore = 1; 189 let has_vdst = 0; 190 let has_saddr = HasSaddr; 191 let enabled_saddr = EnableSaddr; 192 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 193 let maybeAtomic = 1; 194} 195 196multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 197 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 198 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 199 GlobalSaddrTable<0, opName>; 200 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 201 GlobalSaddrTable<1, opName>; 202 } 203} 204 205class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 206 bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 207 opName, 208 (outs regClass:$vdst), 209 !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), 210 (ins flat_offset:$offset, CPol_0:$cpol), 211 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 212 " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 213 let is_flat_global = 1; 214 let has_data = 0; 215 let mayLoad = 1; 216 let has_vaddr = 0; 217 let has_saddr = 1; 218 let enabled_saddr = EnableSaddr; 219 let maybeAtomic = 1; 220 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 221 222 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 223 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 224} 225 226multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass, 227 bit HasTiedOutput = 0> { 228 def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>, 229 GlobalSaddrTable<0, opName>; 230 def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>, 231 GlobalSaddrTable<1, opName>; 232} 233 234multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 235 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 236 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 237 GlobalSaddrTable<0, opName>; 238 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 239 GlobalSaddrTable<1, opName>; 240 } 241} 242 243class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 244 bit EnableSaddr = 0> : FLAT_Pseudo< 245 opName, 246 (outs), 247 !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), 248 (ins flat_offset:$offset, CPol:$cpol)), 249 " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 250 let is_flat_global = 1; 251 let mayLoad = 0; 252 let mayStore = 1; 253 let has_vdst = 0; 254 let has_vaddr = 0; 255 let has_saddr = 1; 256 let enabled_saddr = EnableSaddr; 257 let maybeAtomic = 1; 258 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 259} 260 261multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> { 262 def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>, 263 GlobalSaddrTable<0, opName>; 264 def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>, 265 GlobalSaddrTable<1, opName>; 266} 267 268class FlatScratchInst <string sv_op, string mode> { 269 string SVOp = sv_op; 270 string Mode = mode; 271} 272 273class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 274 bit HasTiedOutput = 0, 275 bit EnableSaddr = 0, 276 bit EnableVaddr = !not(EnableSaddr)> 277 : FLAT_Pseudo< 278 opName, 279 (outs getLdStRegisterOperand<regClass>.ret:$vdst), 280 !con( 281 !if(EnableSaddr, 282 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 283 !if(EnableVaddr, 284 (ins VGPR_32:$vaddr, flat_offset:$offset), 285 (ins flat_offset:$offset))), 286 !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in), 287 (ins CPol_0:$cpol))), 288 " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 289 let has_data = 0; 290 let mayLoad = 1; 291 let has_saddr = 1; 292 let enabled_saddr = EnableSaddr; 293 let has_vaddr = EnableVaddr; 294 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")); 295 let maybeAtomic = 1; 296 297 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 298 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 299} 300 301class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0, 302 bit EnableVaddr = !not(EnableSaddr), 303 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo< 304 opName, 305 (outs), 306 !if(EnableSaddr, 307 (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 308 !if(EnableVaddr, 309 (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), 310 (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol))), 311 " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 312 let mayLoad = 0; 313 let mayStore = 1; 314 let has_vdst = 0; 315 let has_saddr = 1; 316 let enabled_saddr = EnableSaddr; 317 let has_vaddr = EnableVaddr; 318 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")); 319 let maybeAtomic = 1; 320} 321 322multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { 323 let is_flat_scratch = 1 in { 324 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>, 325 FlatScratchInst<opName, "SV">; 326 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>, 327 FlatScratchInst<opName, "SS">; 328 329 let SubtargetPredicate = HasFlatScratchSTMode in 330 def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0>, 331 FlatScratchInst<opName, "ST">; 332 } 333} 334 335multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 336 let is_flat_scratch = 1 in { 337 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>, 338 FlatScratchInst<opName, "SV">; 339 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>, 340 FlatScratchInst<opName, "SS">; 341 342 let SubtargetPredicate = HasFlatScratchSTMode in 343 def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0>, 344 FlatScratchInst<opName, "ST">; 345 } 346} 347 348class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 349 string asm, list<dag> pattern = []> : 350 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 351 let mayLoad = 1; 352 let mayStore = 1; 353 let has_glc = 0; 354 let glcValue = 0; 355 let has_vdst = 0; 356 let has_sccb = 1; 357 let sccbValue = 0; 358 let maybeAtomic = 1; 359 let IsAtomicNoRet = 1; 360} 361 362class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 363 string asm, list<dag> pattern = []> 364 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 365 let hasPostISelHook = 1; 366 let has_vdst = 1; 367 let glcValue = 1; 368 let sccbValue = 0; 369 let IsAtomicNoRet = 0; 370 let IsAtomicRet = 1; 371 let PseudoInstr = NAME # "_RTN"; 372} 373 374multiclass FLAT_Atomic_Pseudo< 375 string opName, 376 RegisterClass vdst_rc, 377 ValueType vt, 378 SDPatternOperator atomic = null_frag, 379 ValueType data_vt = vt, 380 RegisterClass data_rc = vdst_rc, 381 bit isFP = isFloatType<data_vt>.ret, 382 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 383 def "" : FLAT_AtomicNoRet_Pseudo <opName, 384 (outs), 385 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 386 " $vaddr, $vdata$offset$cpol">, 387 GlobalSaddrTable<0, opName>, 388 AtomicNoRet <opName, 0> { 389 let PseudoInstr = NAME; 390 let FPAtomic = isFP; 391 let AddedComplexity = -1; // Prefer global atomics if available 392 } 393 394 def _RTN : FLAT_AtomicRet_Pseudo <opName, 395 (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst), 396 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 397 " $vdst, $vaddr, $vdata$offset$cpol", 398 [(set vt:$vdst, 399 (atomic (FlatOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, 400 GlobalSaddrTable<0, opName#"_rtn">, 401 AtomicNoRet <opName, 1>{ 402 let FPAtomic = isFP; 403 let AddedComplexity = -1; // Prefer global atomics if available 404 } 405} 406 407multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 408 string opName, 409 RegisterClass vdst_rc, 410 ValueType vt, 411 ValueType data_vt = vt, 412 RegisterClass data_rc = vdst_rc, 413 bit isFP = isFloatType<data_vt>.ret, 414 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 415 416 def "" : FLAT_AtomicNoRet_Pseudo <opName, 417 (outs), 418 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 419 " $vaddr, $vdata, off$offset$cpol">, 420 GlobalSaddrTable<0, opName>, 421 AtomicNoRet <opName, 0> { 422 let has_saddr = 1; 423 let PseudoInstr = NAME; 424 let FPAtomic = isFP; 425 } 426 427 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 428 (outs), 429 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol), 430 " $vaddr, $vdata, $saddr$offset$cpol">, 431 GlobalSaddrTable<1, opName>, 432 AtomicNoRet <opName#"_saddr", 0> { 433 let has_saddr = 1; 434 let enabled_saddr = 1; 435 let PseudoInstr = NAME#"_SADDR"; 436 let FPAtomic = isFP; 437 } 438} 439 440multiclass FLAT_Global_Atomic_Pseudo_RTN< 441 string opName, 442 RegisterClass vdst_rc, 443 ValueType vt, 444 SDPatternOperator atomic = null_frag, 445 ValueType data_vt = vt, 446 RegisterClass data_rc = vdst_rc, 447 bit isFP = isFloatType<data_vt>.ret, 448 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret, 449 RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> { 450 451 def _RTN : FLAT_AtomicRet_Pseudo <opName, 452 (outs vdst_op:$vdst), 453 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 454 " $vdst, $vaddr, $vdata, off$offset$cpol", 455 [(set vt:$vdst, 456 (atomic (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, 457 GlobalSaddrTable<0, opName#"_rtn">, 458 AtomicNoRet <opName, 1> { 459 let has_saddr = 1; 460 let FPAtomic = isFP; 461 } 462 463 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 464 (outs vdst_op:$vdst), 465 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), 466 " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, 467 GlobalSaddrTable<1, opName#"_rtn">, 468 AtomicNoRet <opName#"_saddr", 1> { 469 let has_saddr = 1; 470 let enabled_saddr = 1; 471 let PseudoInstr = NAME#"_SADDR_RTN"; 472 let FPAtomic = isFP; 473 } 474} 475 476multiclass FLAT_Global_Atomic_Pseudo< 477 string opName, 478 RegisterClass vdst_rc, 479 ValueType vt, 480 SDPatternOperator atomic_rtn = null_frag, 481 ValueType data_vt = vt, 482 RegisterClass data_rc = vdst_rc> { 483 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 484 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 485 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>; 486 } 487} 488 489//===----------------------------------------------------------------------===// 490// Flat Instructions 491//===----------------------------------------------------------------------===// 492 493def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 494def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 495def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 496def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 497def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 498def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 499def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 500def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 501 502def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 503def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 504def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 505def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 506def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 507def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 508 509let SubtargetPredicate = HasD16LoadStore in { 510def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 511def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 512def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 513def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 514def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 515def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 516 517def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 518def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 519} 520 521defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 522 VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32, 523 v2i32, VReg_64>; 524 525defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 526 VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64, 527 v2i64, VReg_128>; 528 529defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 530 VGPR_32, i32, atomic_swap_flat_32>; 531 532defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 533 VReg_64, i64, atomic_swap_flat_64>; 534 535defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 536 VGPR_32, i32, atomic_load_add_flat_32>; 537 538defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 539 VGPR_32, i32, atomic_load_sub_flat_32>; 540 541defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 542 VGPR_32, i32, atomic_load_min_flat_32>; 543 544defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 545 VGPR_32, i32, atomic_load_umin_flat_32>; 546 547defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 548 VGPR_32, i32, atomic_load_max_flat_32>; 549 550defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 551 VGPR_32, i32, atomic_load_umax_flat_32>; 552 553defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 554 VGPR_32, i32, atomic_load_and_flat_32>; 555 556defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 557 VGPR_32, i32, atomic_load_or_flat_32>; 558 559defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 560 VGPR_32, i32, atomic_load_xor_flat_32>; 561 562defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 563 VGPR_32, i32, atomic_inc_flat_32>; 564 565defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 566 VGPR_32, i32, atomic_dec_flat_32>; 567 568defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 569 VReg_64, i64, atomic_load_add_flat_64>; 570 571defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 572 VReg_64, i64, atomic_load_sub_flat_64>; 573 574defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 575 VReg_64, i64, atomic_load_min_flat_64>; 576 577defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 578 VReg_64, i64, atomic_load_umin_flat_64>; 579 580defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 581 VReg_64, i64, atomic_load_max_flat_64>; 582 583defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 584 VReg_64, i64, atomic_load_umax_flat_64>; 585 586defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 587 VReg_64, i64, atomic_load_and_flat_64>; 588 589defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 590 VReg_64, i64, atomic_load_or_flat_64>; 591 592defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 593 VReg_64, i64, atomic_load_xor_flat_64>; 594 595defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 596 VReg_64, i64, atomic_inc_flat_64>; 597 598defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 599 VReg_64, i64, atomic_dec_flat_64>; 600 601// GFX7-, GFX10-only flat instructions. 602let SubtargetPredicate = isGFX7GFX10 in { 603 604defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 605 VGPR_32, f32, null_frag, v2f32, VReg_64>; 606 607defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 608 VReg_64, f64, null_frag, v2f64, VReg_128>; 609 610defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 611 VGPR_32, f32>; 612 613defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 614 VGPR_32, f32>; 615 616defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 617 VReg_64, f64>; 618 619defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 620 VReg_64, f64>; 621 622} // End SubtargetPredicate = isGFX7GFX10 623 624let SubtargetPredicate = isGFX90APlus in { 625 defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64, int_amdgcn_flat_atomic_fadd>; 626 defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64, int_amdgcn_flat_atomic_fmin>; 627 defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64, int_amdgcn_flat_atomic_fmax>; 628 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>; 629 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin>; 630 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>; 631} // End SubtargetPredicate = isGFX90APlus 632 633defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 634defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 635defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 636defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 637defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 638defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 639defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 640defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 641 642defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 643defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 644defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 645defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 646defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 647defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 648let OtherPredicates = [HasGFX10_BEncoding] in 649defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 650 651defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 652defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 653defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 654defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 655defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 656defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 657let OtherPredicates = [HasGFX10_BEncoding] in 658defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 659 660defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 661defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 662 663let is_flat_global = 1 in { 664defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 665 VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, 666 v2i32, VReg_64>; 667 668defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 669 VReg_64, i64, AMDGPUatomic_cmp_swap_global_64, 670 v2i64, VReg_128>; 671 672defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 673 VGPR_32, i32, atomic_swap_global_32>; 674 675defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 676 VReg_64, i64, atomic_swap_global_64>; 677 678defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 679 VGPR_32, i32, atomic_load_add_global_32>; 680 681defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 682 VGPR_32, i32, atomic_load_sub_global_32>; 683 684defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 685 VGPR_32, i32, atomic_load_min_global_32>; 686 687defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 688 VGPR_32, i32, atomic_load_umin_global_32>; 689 690defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 691 VGPR_32, i32, atomic_load_max_global_32>; 692 693defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 694 VGPR_32, i32, atomic_load_umax_global_32>; 695 696defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 697 VGPR_32, i32, atomic_load_and_global_32>; 698 699defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 700 VGPR_32, i32, atomic_load_or_global_32>; 701 702defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 703 VGPR_32, i32, atomic_load_xor_global_32>; 704 705defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 706 VGPR_32, i32, atomic_inc_global_32>; 707 708defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 709 VGPR_32, i32, atomic_dec_global_32>; 710 711defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 712 VReg_64, i64, atomic_load_add_global_64>; 713 714defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 715 VReg_64, i64, atomic_load_sub_global_64>; 716 717defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 718 VReg_64, i64, atomic_load_min_global_64>; 719 720defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 721 VReg_64, i64, atomic_load_umin_global_64>; 722 723defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 724 VReg_64, i64, atomic_load_max_global_64>; 725 726defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 727 VReg_64, i64, atomic_load_umax_global_64>; 728 729defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 730 VReg_64, i64, atomic_load_and_global_64>; 731 732defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 733 VReg_64, i64, atomic_load_or_global_64>; 734 735defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 736 VReg_64, i64, atomic_load_xor_global_64>; 737 738defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 739 VReg_64, i64, atomic_inc_global_64>; 740 741defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 742 VReg_64, i64, atomic_dec_global_64>; 743 744let SubtargetPredicate = HasGFX10_BEncoding in 745defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", 746 VGPR_32, i32, int_amdgcn_global_atomic_csub>; 747} // End is_flat_global = 1 748 749 750 751let SubtargetPredicate = HasFlatScratchInsts in { 752defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 753defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 754defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 755defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 756defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 757defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 758defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 759defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 760 761defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; 762defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; 763defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; 764defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; 765defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; 766defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; 767 768defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 769defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 770defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 771defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 772defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 773defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 774 775defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 776defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 777 778} // End SubtargetPredicate = HasFlatScratchInsts 779 780let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 781 defm GLOBAL_ATOMIC_FCMPSWAP : 782 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, null_frag, v2f32, VReg_64>; 783 defm GLOBAL_ATOMIC_FMIN : 784 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32, int_amdgcn_global_atomic_fmin>; 785 defm GLOBAL_ATOMIC_FMAX : 786 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32, int_amdgcn_global_atomic_fmax>; 787 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 788 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, null_frag, v2f64, VReg_128>; 789 defm GLOBAL_ATOMIC_FMIN_X2 : 790 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64, int_amdgcn_global_atomic_fmin>; 791 defm GLOBAL_ATOMIC_FMAX_X2 : 792 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64, int_amdgcn_global_atomic_fmax>; 793} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 794 795let is_flat_global = 1 in { 796let OtherPredicates = [HasAtomicFaddInsts] in { 797 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 798 "global_atomic_add_f32", VGPR_32, f32 799 >; 800 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 801 "global_atomic_pk_add_f16", VGPR_32, v2f16 802 >; 803} // End OtherPredicates = [HasAtomicFaddInsts] 804 805let OtherPredicates = [isGFX90APlus] in { 806 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < 807 "global_atomic_add_f32", VGPR_32, f32, int_amdgcn_global_atomic_fadd 808 >; 809 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < 810 "global_atomic_pk_add_f16", VGPR_32, v2f16, int_amdgcn_global_atomic_fadd 811 >; 812} // End OtherPredicates = [isGFX90APlus] 813} // End is_flat_global = 1 814 815//===----------------------------------------------------------------------===// 816// Flat Patterns 817//===----------------------------------------------------------------------===// 818 819// Patterns for global loads with no offset. 820class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 821 (vt (node (FlatOffset i64:$vaddr, i16:$offset))), 822 (inst $vaddr, $offset) 823>; 824 825class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 826 (node (FlatOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), 827 (inst $vaddr, $offset, 0, $in) 828>; 829 830class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 831 (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), 832 (inst $vaddr, $offset, 0, $in) 833>; 834 835class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 836 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)), 837 (inst $saddr, $voffset, $offset, 0, $in) 838>; 839 840class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 841 (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset))), 842 (inst $vaddr, $offset) 843>; 844 845class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 846 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))), 847 (inst $saddr, $voffset, $offset, 0) 848>; 849 850class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 851 ValueType vt> : GCNPat < 852 (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset)), 853 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 854>; 855 856class GlobalAtomicStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 857 ValueType vt> : GCNPat < 858 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data), 859 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 860>; 861 862class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 863 ValueType vt, ValueType data_vt = vt> : GCNPat < 864 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), data_vt:$data)), 865 (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset) 866>; 867 868class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 869 ValueType vt> : GCNPat < 870 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data), 871 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 872>; 873 874class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 875 (node vt:$data, (FlatOffset i64:$vaddr, i16:$offset)), 876 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 877>; 878 879class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 880 (node vt:$data, (GlobalOffset i64:$vaddr, i16:$offset)), 881 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 882>; 883 884class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 885 // atomic store follows atomic binop convention so the address comes 886 // first. 887 (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data), 888 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 889>; 890 891class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, 892 ValueType vt, ValueType data_vt = vt> : GCNPat < 893 // atomic store follows atomic binop convention so the address comes 894 // first. 895 (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data), 896 (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 897>; 898 899class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 900 ValueType data_vt = vt> : GCNPat < 901 (vt (node (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), 902 (inst $vaddr, $data, $offset) 903>; 904 905class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 906 (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data), 907 (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 908>; 909 910class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 911 (node (GlobalOffset i64:$vaddr, i16:$offset), vt:$data), 912 (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 913>; 914 915class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 916 ValueType data_vt = vt> : GCNPat < 917 (vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), 918 (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 919>; 920 921class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 922 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset))), 923 (inst $vaddr, $offset) 924>; 925 926class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 927 (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in), 928 (inst $vaddr, $offset, 0, $in) 929>; 930 931class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 932 (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset)), 933 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset) 934>; 935 936class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 937 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset))), 938 (inst $saddr, $offset) 939>; 940 941class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 942 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), 943 (inst $saddr, $offset, 0, $in) 944>; 945 946class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 947 ValueType vt> : GCNPat < 948 (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset)), 949 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 950>; 951 952let OtherPredicates = [HasFlatAddressSpace] in { 953 954def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 955def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 956def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 957def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 958def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 959def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 960def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 961def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 962def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 963def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 964def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 965 966def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 967def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 968 969def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 970def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 971 972foreach vt = Reg32Types.types in { 973def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 974def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 975} 976 977foreach vt = VReg_64.RegTypes in { 978def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>; 979def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 980} 981 982def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; 983 984foreach vt = VReg_128.RegTypes in { 985def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 986def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>; 987} 988 989def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>; 990def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>; 991 992def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; 993def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; 994def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; 995def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; 996def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; 997def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; 998def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; 999def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; 1000def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; 1001def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; 1002def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; 1003def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; 1004def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; 1005 1006def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; 1007def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; 1008def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; 1009def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; 1010def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; 1011def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; 1012def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; 1013def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; 1014def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; 1015def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; 1016def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; 1017def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; 1018def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; 1019 1020def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 1021def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 1022 1023let OtherPredicates = [D16PreservesUnusedBits] in { 1024def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 1025def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 1026 1027def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 1028def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 1029def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 1030def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 1031def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 1032def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 1033 1034def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 1035def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 1036def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 1037def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 1038def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 1039def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 1040} 1041 1042} // End OtherPredicates = [HasFlatAddressSpace] 1043 1044 1045multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1046 def : FlatLoadSignedPat <inst, node, vt> { 1047 let AddedComplexity = 10; 1048 } 1049 1050 def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1051 let AddedComplexity = 11; 1052 } 1053} 1054 1055multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1056 def : FlatSignedLoadPat_D16 <inst, node, vt> { 1057 let AddedComplexity = 10; 1058 } 1059 1060 def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1061 let AddedComplexity = 11; 1062 } 1063} 1064 1065multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1066 ValueType vt> { 1067 def : FlatStoreSignedPat <inst, node, vt> { 1068 let AddedComplexity = 10; 1069 } 1070 1071 def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1072 let AddedComplexity = 11; 1073 } 1074} 1075 1076// Deal with swapped operands for atomic_store vs. regular store 1077multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1078 def : FlatStoreSignedAtomicPat <inst, node, vt> { 1079 let AddedComplexity = 10; 1080 } 1081 1082 def : GlobalAtomicStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1083 let AddedComplexity = 11; 1084 } 1085} 1086 1087multiclass GlobalFLATAtomicPats<string nortn_inst_name, SDPatternOperator node, 1088 ValueType vt, ValueType data_vt = vt> { 1089 def : FlatSignedAtomicPat <!cast<FLAT_Pseudo>(nortn_inst_name#"_RTN"), node, vt, data_vt> { 1090 let AddedComplexity = 10; 1091 } 1092 1093 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(nortn_inst_name#"_SADDR_RTN"), node, vt, data_vt> { 1094 let AddedComplexity = 11; 1095 } 1096} 1097 1098multiclass GlobalFLATNoRtnAtomicPats<FLAT_Pseudo inst, SDPatternOperator node, 1099 ValueType vt> { 1100 def : FlatSignedAtomicPatNoRtn <inst, node, vt> { 1101 let AddedComplexity = 10; 1102 } 1103 1104 def : GlobalAtomicNoRtnSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1105 let AddedComplexity = 11; 1106 } 1107} 1108 1109multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1110 def : ScratchLoadSignedPat <inst, node, vt> { 1111 let AddedComplexity = 25; 1112 } 1113 1114 def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1115 let AddedComplexity = 26; 1116 } 1117} 1118 1119multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1120 ValueType vt> { 1121 def : ScratchStoreSignedPat <inst, node, vt> { 1122 let AddedComplexity = 25; 1123 } 1124 1125 def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1126 let AddedComplexity = 26; 1127 } 1128} 1129 1130multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1131 def : ScratchLoadSignedPat_D16 <inst, node, vt> { 1132 let AddedComplexity = 25; 1133 } 1134 1135 def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1136 let AddedComplexity = 26; 1137 } 1138} 1139 1140let OtherPredicates = [HasFlatGlobalInsts] in { 1141 1142defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 1143defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 1144defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 1145defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 1146defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 1147defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 1148defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 1149defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 1150defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 1151defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; 1152 1153foreach vt = Reg32Types.types in { 1154defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; 1155defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>; 1156} 1157 1158foreach vt = VReg_64.RegTypes in { 1159defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>; 1160defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>; 1161} 1162 1163defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 1164 1165foreach vt = VReg_128.RegTypes in { 1166defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>; 1167defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>; 1168} 1169 1170// There is no distinction for atomic load lowering during selection; 1171// the memory legalizer will set the cache bits and insert the 1172// appropriate waits. 1173defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 1174defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 1175 1176defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; 1177defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; 1178defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; 1179defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; 1180defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; 1181 1182let OtherPredicates = [D16PreservesUnusedBits] in { 1183defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 1184defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 1185 1186defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 1187defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 1188defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 1189defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 1190defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 1191defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 1192 1193defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 1194defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 1195defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 1196defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 1197defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 1198defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 1199} 1200 1201defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>; 1202defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64>; 1203 1204defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", atomic_load_add_global_32, i32>; 1205defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", atomic_load_sub_global_32, i32>; 1206defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", atomic_inc_global_32, i32>; 1207defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", atomic_dec_global_32, i32>; 1208defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", atomic_load_and_global_32, i32>; 1209defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", atomic_load_max_global_32, i32>; 1210defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", atomic_load_umax_global_32, i32>; 1211defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", atomic_load_min_global_32, i32>; 1212defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", atomic_load_umin_global_32, i32>; 1213defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", atomic_load_or_global_32, i32>; 1214defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", atomic_swap_global_32, i32>; 1215defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; 1216defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", atomic_load_xor_global_32, i32>; 1217defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CSUB", int_amdgcn_global_atomic_csub, i32>; 1218 1219defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", atomic_load_add_global_64, i64>; 1220defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", atomic_load_sub_global_64, i64>; 1221defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", atomic_inc_global_64, i64>; 1222defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", atomic_dec_global_64, i64>; 1223defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", atomic_load_and_global_64, i64>; 1224defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", atomic_load_max_global_64, i64>; 1225defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", atomic_load_umax_global_64, i64>; 1226defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", atomic_load_min_global_64, i64>; 1227defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", atomic_load_umin_global_64, i64>; 1228defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", atomic_load_or_global_64, i64>; 1229defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", atomic_swap_global_64, i64>; 1230defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; 1231defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", atomic_load_xor_global_64, i64>; 1232 1233let OtherPredicates = [isGFX10Plus] in { 1234defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", atomic_load_fmin_global_32, f32>; 1235defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", atomic_load_fmax_global_32, f32>; 1236defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", atomic_load_fmin_global_64, f64>; 1237defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", atomic_load_fmax_global_64, f64>; 1238} 1239 1240let OtherPredicates = [HasAtomicFaddInsts] in { 1241defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_ADD_F32, atomic_load_fadd_global_noret_32, f32>; 1242defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_PK_ADD_F16, atomic_load_fadd_v2f16_global_noret_32, v2f16>; 1243} 1244 1245let OtherPredicates = [isGFX90APlus] in { 1246defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F32", atomic_load_fadd_global_32, f32>; 1247defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", atomic_load_fadd_v2f16_global_32, v2f16>; 1248defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", atomic_load_fadd_global_64, f64>; 1249defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", atomic_load_fmin_global_64, f64>; 1250defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", atomic_load_fmax_global_64, f64>; 1251def : FlatSignedAtomicPat <FLAT_ATOMIC_ADD_F64_RTN, atomic_load_fadd_flat_64, f64>; 1252def : FlatSignedAtomicPat <FLAT_ATOMIC_MIN_F64_RTN, atomic_load_fmin_flat_64, f64>; 1253def : FlatSignedAtomicPat <FLAT_ATOMIC_MAX_F64_RTN, atomic_load_fmax_flat_64, f64>; 1254} 1255 1256} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 1257 1258let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in { 1259 1260defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>; 1261defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>; 1262defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>; 1263defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; 1264defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; 1265defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; 1266defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; 1267defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; 1268defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; 1269defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; 1270 1271foreach vt = Reg32Types.types in { 1272defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; 1273defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; 1274} 1275 1276foreach vt = VReg_64.RegTypes in { 1277defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>; 1278defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>; 1279} 1280 1281defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>; 1282 1283foreach vt = VReg_128.RegTypes in { 1284defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>; 1285defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>; 1286} 1287 1288defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>; 1289defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>; 1290defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; 1291defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; 1292defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; 1293 1294let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { 1295defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; 1296defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; 1297 1298defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>; 1299defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>; 1300defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>; 1301defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>; 1302defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>; 1303defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>; 1304 1305defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>; 1306defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>; 1307defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>; 1308defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>; 1309defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>; 1310defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>; 1311} 1312 1313} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch] 1314 1315//===----------------------------------------------------------------------===// 1316// Target 1317//===----------------------------------------------------------------------===// 1318 1319//===----------------------------------------------------------------------===// 1320// CI 1321//===----------------------------------------------------------------------===// 1322 1323class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 1324 FLAT_Real <op, ps>, 1325 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 1326 let AssemblerPredicate = isGFX7Only; 1327 let DecoderNamespace="GFX7"; 1328} 1329 1330def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1331def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1332def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1333def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1334def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1335def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1336def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1337def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1338 1339def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1340def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1341def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1342def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1343def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1344def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1345 1346multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 1347 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1348 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1349} 1350 1351defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 1352defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 1353defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 1354defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 1355defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 1356defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 1357defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 1358defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 1359defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 1360defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 1361defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 1362defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 1363defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 1364defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 1365defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 1366defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 1367defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 1368defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 1369defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 1370defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 1371defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 1372defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 1373defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 1374defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 1375defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1376defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1377 1378// CI Only flat instructions 1379defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1380defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1381defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1382defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1383defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1384defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1385 1386 1387//===----------------------------------------------------------------------===// 1388// VI 1389//===----------------------------------------------------------------------===// 1390 1391class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : 1392 FLAT_Real <op, ps>, 1393 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1394 let AssemblerPredicate = isGFX8GFX9; 1395 let DecoderNamespace = "GFX8"; 1396 1397 let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1398 let AsmString = ps.Mnemonic # 1399 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); 1400} 1401 1402multiclass FLAT_Real_AllAddr_vi<bits<7> op, 1403 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1404 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; 1405 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; 1406} 1407 1408def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1409def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1410def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1411def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1412def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1413def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1414def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1415def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1416 1417def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1418def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1419def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1420def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1421def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1422def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1423def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1424def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1425 1426def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1427def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1428def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1429def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1430def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1431def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1432 1433multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps, 1434 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1435 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; 1436 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; 1437} 1438 1439multiclass FLAT_Global_Real_Atomics_vi<bits<7> op, 1440 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : 1441 FLAT_Real_AllAddr_vi<op, has_sccb> { 1442 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; 1443 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; 1444} 1445 1446 1447defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1448defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1449defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1450defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1451defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1452defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1453defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1454defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1455defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1456defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1457defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1458defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1459defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1460defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1461defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1462defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1463defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1464defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1465defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1466defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1467defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1468defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1469defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1470defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1471defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1472defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1473 1474defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1475defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1476defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1477defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1478defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1479defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1480defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1481defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1482 1483defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1484defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1485defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1486defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1487defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1488defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1489 1490defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1491defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1492defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1493defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1494defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1495defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1496defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1497defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1498 1499 1500defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1501defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1502defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1503defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1504defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1505defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1506defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1507defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1508defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1509defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1510defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1511defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1512defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1513defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1514defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1515defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1516defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1517defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1518defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1519defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1520defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1521defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1522defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1523defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1524defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1525defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1526 1527defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1528defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1529defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1530defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1531defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1532defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1533defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1534defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1535defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1536defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1537defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1538defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1539defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1540defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1541defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1542defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1543defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1544defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1545defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1546defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1547defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1548defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1549 1550let SubtargetPredicate = HasAtomicFaddInsts in { 1551defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; 1552defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; 1553} 1554 1555let SubtargetPredicate = isGFX90AOnly in { 1556 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>; 1557 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>; 1558 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>; 1559 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>; 1560 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>; 1561 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; 1562} // End SubtargetPredicate = isGFX90AOnly 1563 1564//===----------------------------------------------------------------------===// 1565// GFX10. 1566//===----------------------------------------------------------------------===// 1567 1568class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 1569 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 1570 let AssemblerPredicate = isGFX10Plus; 1571 let DecoderNamespace = "GFX10"; 1572 1573 let Inst{11-0} = offset{11-0}; 1574 let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 1575 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 1576 let Inst{55} = 0; 1577} 1578 1579 1580multiclass FLAT_Real_Base_gfx10<bits<7> op> { 1581 def _gfx10 : 1582 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 1583} 1584 1585multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 1586 def _RTN_gfx10 : 1587 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1588} 1589 1590multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 1591 def _SADDR_gfx10 : 1592 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1593} 1594 1595multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 1596 def _SADDR_RTN_gfx10 : 1597 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1598} 1599 1600multiclass FLAT_Real_ST_gfx10<bits<7> op> { 1601 def _ST_gfx10 : 1602 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> { 1603 let Inst{54-48} = !cast<int>(EXEC_HI.HWEncoding); 1604 let OtherPredicates = [HasFlatScratchSTMode]; 1605 } 1606} 1607 1608multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 1609 FLAT_Real_Base_gfx10<op>, 1610 FLAT_Real_SADDR_gfx10<op>; 1611 1612multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 1613 FLAT_Real_Base_gfx10<op>, 1614 FLAT_Real_RTN_gfx10<op>; 1615 1616multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 1617 FLAT_Real_AllAddr_gfx10<op>, 1618 FLAT_Real_RTN_gfx10<op>, 1619 FLAT_Real_SADDR_RTN_gfx10<op>; 1620 1621multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> : 1622 FLAT_Real_RTN_gfx10<op>, 1623 FLAT_Real_SADDR_RTN_gfx10<op>; 1624 1625multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> : 1626 FLAT_Real_Base_gfx10<op>, 1627 FLAT_Real_SADDR_gfx10<op>, 1628 FLAT_Real_ST_gfx10<op>; 1629 1630// ENC_FLAT. 1631defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 1632defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 1633defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 1634defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 1635defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 1636defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 1637defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 1638defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 1639defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 1640defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 1641defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 1642defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 1643defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 1644defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 1645defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 1646defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 1647defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 1648defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 1649defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 1650defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 1651defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 1652defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 1653defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 1654defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 1655defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 1656defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 1657defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 1658defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 1659defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 1660defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 1661defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 1662defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 1663defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 1664defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 1665defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 1666defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 1667defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 1668defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 1669defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 1670defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 1671defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 1672defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 1673defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 1674defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 1675defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 1676defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 1677defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 1678defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 1679defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 1680defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 1681defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 1682defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 1683defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 1684defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 1685 1686 1687// ENC_FLAT_GLBL. 1688defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1689defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1690defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1691defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1692defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1693defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1694defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1695defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1696defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1697defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1698defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1699defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1700defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1701defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1702defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1703defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1704defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1705defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1706defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1707defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1708defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1709defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1710defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 1711defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 1712defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 1713defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 1714defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>; 1715defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 1716defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 1717defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 1718defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 1719defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 1720defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 1721defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 1722defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 1723defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 1724defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 1725defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 1726defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 1727defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 1728defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 1729defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 1730defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 1731defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 1732defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 1733defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 1734defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 1735defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 1736defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 1737defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 1738defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 1739defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 1740defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 1741defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 1742defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 1743defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; 1744defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; 1745 1746// ENC_FLAT_SCRATCH. 1747defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; 1748defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; 1749defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>; 1750defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>; 1751defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>; 1752defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>; 1753defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>; 1754defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>; 1755defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>; 1756defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>; 1757defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>; 1758defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>; 1759defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>; 1760defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>; 1761defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>; 1762defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>; 1763defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>; 1764defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>; 1765defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; 1766defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; 1767defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; 1768defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; 1769