1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FlatOffset : ComplexPattern<i64, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>; 10def GlobalOffset : ComplexPattern<i64, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>; 11def ScratchOffset : ComplexPattern<i32, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>; 12 13def GlobalSAddr : ComplexPattern<i64, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>; 14def ScratchSAddr : ComplexPattern<i32, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>; 15 16//===----------------------------------------------------------------------===// 17// FLAT classes 18//===----------------------------------------------------------------------===// 19 20class FLAT_Pseudo<string opName, dag outs, dag ins, 21 string asmOps, list<dag> pattern=[]> : 22 InstSI<outs, ins, "", pattern>, 23 SIMCInstr<opName, SIEncodingFamily.NONE> { 24 25 let isPseudo = 1; 26 let isCodeGenOnly = 1; 27 28 let FLAT = 1; 29 30 let UseNamedOperandTable = 1; 31 let hasSideEffects = 0; 32 let SchedRW = [WriteVMEM]; 33 34 string Mnemonic = opName; 35 string AsmOperands = asmOps; 36 37 bits<1> is_flat_global = 0; 38 bits<1> is_flat_scratch = 0; 39 40 bits<1> has_vdst = 1; 41 42 // We need to distinguish having saddr and enabling saddr because 43 // saddr is only valid for scratch and global instructions. Pre-gfx9 44 // these bits were reserved, so we also don't necessarily want to 45 // set these bits to the disabled value for the original flat 46 // segment instructions. 47 bits<1> has_saddr = 0; 48 bits<1> enabled_saddr = 0; 49 bits<7> saddr_value = 0; 50 bits<1> has_vaddr = 1; 51 52 bits<1> has_data = 1; 53 bits<1> has_glc = 1; 54 bits<1> glcValue = 0; 55 bits<1> has_dlc = 1; 56 bits<1> dlcValue = 0; 57 bits<1> has_sccb = 1; 58 bits<1> sccbValue = 0; 59 60 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 61 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 62 63 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 64 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 65 66 // Internally, FLAT instruction are executed as both an LDS and a 67 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 68 // and are not considered done until both have been decremented. 69 let VM_CNT = 1; 70 let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch)); 71 72 let FlatGlobal = is_flat_global; 73 74 let FlatScratch = is_flat_scratch; 75} 76 77class FLAT_Real <bits<7> op, FLAT_Pseudo ps> : 78 InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>, 79 Enc64 { 80 81 let isPseudo = 0; 82 let isCodeGenOnly = 0; 83 84 let FLAT = 1; 85 86 // copy relevant pseudo op flags 87 let SubtargetPredicate = ps.SubtargetPredicate; 88 let AsmMatchConverter = ps.AsmMatchConverter; 89 let OtherPredicates = ps.OtherPredicates; 90 let TSFlags = ps.TSFlags; 91 let UseNamedOperandTable = ps.UseNamedOperandTable; 92 let SchedRW = ps.SchedRW; 93 let mayLoad = ps.mayLoad; 94 let mayStore = ps.mayStore; 95 let IsAtomicRet = ps.IsAtomicRet; 96 let IsAtomicNoRet = ps.IsAtomicNoRet; 97 let VM_CNT = ps.VM_CNT; 98 let LGKM_CNT = ps.LGKM_CNT; 99 100 // encoding fields 101 bits<8> vaddr; 102 bits<10> vdata; 103 bits<7> saddr; 104 bits<10> vdst; 105 106 bits<5> cpol; 107 108 // Only valid on gfx9 109 bits<1> lds = 0; // XXX - What does this actually do? 110 111 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 112 bits<2> seg = !if(ps.is_flat_global, 0b10, 113 !if(ps.is_flat_scratch, 0b01, 0)); 114 115 // Signed offset. Highest bit ignored for flat and treated as 12-bit 116 // unsigned for flat accesses. 117 bits<13> offset; 118 // GFX90A+ only: instruction uses AccVGPR for data 119 bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0)); 120 121 // We don't use tfe right now, and it was removed in gfx9. 122 bits<1> tfe = 0; 123 124 // Only valid on GFX9+ 125 let Inst{12-0} = offset; 126 let Inst{13} = lds; 127 let Inst{15-14} = seg; 128 129 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 130 let Inst{17} = cpol{CPolBit.SLC}; 131 let Inst{24-18} = op; 132 let Inst{31-26} = 0x37; // Encoding. 133 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 134 let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?); 135 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 136 137 // 54-48 is reserved. 138 let Inst{55} = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A. 139 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?); 140} 141 142class GlobalSaddrTable <bit is_saddr, string Name = ""> { 143 bit IsSaddr = is_saddr; 144 string SaddrOp = Name; 145} 146 147// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 148// same encoding value as exec_hi, so it isn't possible to use that if 149// saddr is 32-bit (which isn't handled here yet). 150class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 151 bit HasTiedOutput = 0, 152 bit HasSaddr = 0, bit EnableSaddr = 0, 153 RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo< 154 opName, 155 (outs vdata_op:$vdst), 156 !con( 157 !con( 158 !if(EnableSaddr, 159 (ins SReg_64:$saddr, VGPR_32:$vaddr), 160 (ins VReg_64:$vaddr)), 161 (ins flat_offset:$offset)), 162 // FIXME: Operands with default values do not work with following non-optional operands. 163 !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), 164 (ins CPol_0:$cpol))), 165 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 166 let has_data = 0; 167 let mayLoad = 1; 168 let has_saddr = HasSaddr; 169 let enabled_saddr = EnableSaddr; 170 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 171 let maybeAtomic = 1; 172 173 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 174 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 175} 176 177class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 178 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 179 opName, 180 (outs), 181 !con( 182 !if(EnableSaddr, 183 (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr), 184 (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)), 185 (ins flat_offset:$offset, CPol_0:$cpol)), 186 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 187 let mayLoad = 0; 188 let mayStore = 1; 189 let has_vdst = 0; 190 let has_saddr = HasSaddr; 191 let enabled_saddr = EnableSaddr; 192 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 193 let maybeAtomic = 1; 194} 195 196multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 197 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 198 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 199 GlobalSaddrTable<0, opName>; 200 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 201 GlobalSaddrTable<1, opName>; 202 } 203} 204 205class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 206 bit HasTiedOutput = 0, bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 207 opName, 208 (outs regClass:$vdst), 209 !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), 210 (ins flat_offset:$offset, CPol_0:$cpol), 211 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 212 " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 213 let is_flat_global = 1; 214 let has_data = 0; 215 let mayLoad = 1; 216 let has_vaddr = 0; 217 let has_saddr = 1; 218 let enabled_saddr = EnableSaddr; 219 let maybeAtomic = 1; 220 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 221 222 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 223 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 224} 225 226multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass, 227 bit HasTiedOutput = 0, bit HasSignedOffset = 0> { 228 def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset>, 229 GlobalSaddrTable<0, opName>; 230 def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, HasSignedOffset, 1>, 231 GlobalSaddrTable<1, opName>; 232} 233 234multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 235 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 236 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 237 GlobalSaddrTable<0, opName>; 238 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 239 GlobalSaddrTable<1, opName>; 240 } 241} 242 243class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 244 bit HasSignedOffset = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 245 opName, 246 (outs), 247 !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), 248 (ins flat_offset:$offset, CPol:$cpol)), 249 " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 250 let is_flat_global = 1; 251 let mayLoad = 0; 252 let mayStore = 1; 253 let has_vdst = 0; 254 let has_vaddr = 0; 255 let has_saddr = 1; 256 let enabled_saddr = EnableSaddr; 257 let maybeAtomic = 1; 258 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 259} 260 261multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass, 262 bit HasSignedOffset = 0> { 263 def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset>, 264 GlobalSaddrTable<0, opName>; 265 def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, HasSignedOffset, 1>, 266 GlobalSaddrTable<1, opName>; 267} 268 269class FlatScratchInst <string sv_op, string mode> { 270 string SVOp = sv_op; 271 string Mode = mode; 272} 273 274class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 275 bit HasTiedOutput = 0, 276 bit EnableSaddr = 0, 277 bit EnableVaddr = !not(EnableSaddr)> 278 : FLAT_Pseudo< 279 opName, 280 (outs getLdStRegisterOperand<regClass>.ret:$vdst), 281 !con( 282 !if(EnableSaddr, 283 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 284 !if(EnableVaddr, 285 (ins VGPR_32:$vaddr, flat_offset:$offset), 286 (ins flat_offset:$offset))), 287 !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in), 288 (ins CPol_0:$cpol))), 289 " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 290 let has_data = 0; 291 let mayLoad = 1; 292 let has_saddr = 1; 293 let enabled_saddr = EnableSaddr; 294 let has_vaddr = EnableVaddr; 295 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")); 296 let maybeAtomic = 1; 297 298 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 299 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 300} 301 302class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0, 303 bit EnableVaddr = !not(EnableSaddr), 304 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo< 305 opName, 306 (outs), 307 !if(EnableSaddr, 308 (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 309 !if(EnableVaddr, 310 (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), 311 (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol))), 312 " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 313 let mayLoad = 0; 314 let mayStore = 1; 315 let has_vdst = 0; 316 let has_saddr = 1; 317 let enabled_saddr = EnableSaddr; 318 let has_vaddr = EnableVaddr; 319 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST")); 320 let maybeAtomic = 1; 321} 322 323multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { 324 let is_flat_scratch = 1 in { 325 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>, 326 FlatScratchInst<opName, "SV">; 327 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>, 328 FlatScratchInst<opName, "SS">; 329 330 let SubtargetPredicate = HasFlatScratchSTMode in 331 def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0>, 332 FlatScratchInst<opName, "ST">; 333 } 334} 335 336multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 337 let is_flat_scratch = 1 in { 338 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>, 339 FlatScratchInst<opName, "SV">; 340 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>, 341 FlatScratchInst<opName, "SS">; 342 343 let SubtargetPredicate = HasFlatScratchSTMode in 344 def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0>, 345 FlatScratchInst<opName, "ST">; 346 } 347} 348 349class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 350 string asm, list<dag> pattern = []> : 351 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 352 let mayLoad = 1; 353 let mayStore = 1; 354 let has_glc = 0; 355 let glcValue = 0; 356 let has_dlc = 0; 357 let dlcValue = 0; 358 let has_vdst = 0; 359 let has_sccb = 1; 360 let sccbValue = 0; 361 let maybeAtomic = 1; 362 let IsAtomicNoRet = 1; 363} 364 365class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 366 string asm, list<dag> pattern = []> 367 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 368 let hasPostISelHook = 1; 369 let has_vdst = 1; 370 let glcValue = 1; 371 let dlcValue = 0; 372 let sccbValue = 0; 373 let IsAtomicNoRet = 0; 374 let IsAtomicRet = 1; 375 let PseudoInstr = NAME # "_RTN"; 376} 377 378multiclass FLAT_Atomic_Pseudo< 379 string opName, 380 RegisterClass vdst_rc, 381 ValueType vt, 382 SDPatternOperator atomic = null_frag, 383 ValueType data_vt = vt, 384 RegisterClass data_rc = vdst_rc, 385 bit isFP = isFloatType<data_vt>.ret, 386 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 387 def "" : FLAT_AtomicNoRet_Pseudo <opName, 388 (outs), 389 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 390 " $vaddr, $vdata$offset$cpol">, 391 GlobalSaddrTable<0, opName>, 392 AtomicNoRet <opName, 0> { 393 let PseudoInstr = NAME; 394 let FPAtomic = isFP; 395 let AddedComplexity = -1; // Prefer global atomics if available 396 } 397 398 def _RTN : FLAT_AtomicRet_Pseudo <opName, 399 (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst), 400 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 401 " $vdst, $vaddr, $vdata$offset$cpol", 402 [(set vt:$vdst, 403 (atomic (FlatOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, 404 GlobalSaddrTable<0, opName#"_rtn">, 405 AtomicNoRet <opName, 1>{ 406 let FPAtomic = isFP; 407 let AddedComplexity = -1; // Prefer global atomics if available 408 } 409} 410 411multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 412 string opName, 413 RegisterClass vdst_rc, 414 ValueType vt, 415 SDPatternOperator atomic = null_frag, 416 ValueType data_vt = vt, 417 RegisterClass data_rc = vdst_rc, 418 bit isFP = isFloatType<data_vt>.ret, 419 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 420 421 def "" : FLAT_AtomicNoRet_Pseudo <opName, 422 (outs), 423 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 424 " $vaddr, $vdata, off$offset$cpol">, 425 GlobalSaddrTable<0, opName>, 426 AtomicNoRet <opName, 0> { 427 let has_saddr = 1; 428 let PseudoInstr = NAME; 429 let FPAtomic = isFP; 430 } 431 432 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 433 (outs), 434 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol), 435 " $vaddr, $vdata, $saddr$offset$cpol">, 436 GlobalSaddrTable<1, opName>, 437 AtomicNoRet <opName#"_saddr", 0> { 438 let has_saddr = 1; 439 let enabled_saddr = 1; 440 let PseudoInstr = NAME#"_SADDR"; 441 let FPAtomic = isFP; 442 } 443} 444 445multiclass FLAT_Global_Atomic_Pseudo_RTN< 446 string opName, 447 RegisterClass vdst_rc, 448 ValueType vt, 449 SDPatternOperator atomic = null_frag, 450 ValueType data_vt = vt, 451 RegisterClass data_rc = vdst_rc, 452 bit isFP = isFloatType<data_vt>.ret, 453 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret, 454 RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> { 455 456 def _RTN : FLAT_AtomicRet_Pseudo <opName, 457 (outs vdst_op:$vdst), 458 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 459 " $vdst, $vaddr, $vdata, off$offset$cpol", 460 [(set vt:$vdst, 461 (atomic (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$vdata))]>, 462 GlobalSaddrTable<0, opName#"_rtn">, 463 AtomicNoRet <opName, 1> { 464 let has_saddr = 1; 465 let FPAtomic = isFP; 466 } 467 468 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 469 (outs vdst_op:$vdst), 470 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), 471 " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, 472 GlobalSaddrTable<1, opName#"_rtn">, 473 AtomicNoRet <opName#"_saddr", 1> { 474 let has_saddr = 1; 475 let enabled_saddr = 1; 476 let PseudoInstr = NAME#"_SADDR_RTN"; 477 let FPAtomic = isFP; 478 } 479} 480 481multiclass FLAT_Global_Atomic_Pseudo< 482 string opName, 483 RegisterClass vdst_rc, 484 ValueType vt, 485 SDPatternOperator atomic_rtn = null_frag, 486 SDPatternOperator atomic_no_rtn = null_frag, 487 ValueType data_vt = vt, 488 RegisterClass data_rc = vdst_rc> { 489 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 490 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, atomic_no_rtn, data_vt, data_rc>; 491 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, atomic_rtn, data_vt, data_rc>; 492 } 493} 494 495//===----------------------------------------------------------------------===// 496// Flat Instructions 497//===----------------------------------------------------------------------===// 498 499def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 500def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 501def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 502def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 503def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 504def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 505def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 506def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 507 508def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 509def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 510def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 511def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 512def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 513def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 514 515let SubtargetPredicate = HasD16LoadStore in { 516def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 517def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 518def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 519def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 520def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 521def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 522 523def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 524def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 525} 526 527defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 528 VGPR_32, i32, AMDGPUatomic_cmp_swap_flat_32, 529 v2i32, VReg_64>; 530 531defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 532 VReg_64, i64, AMDGPUatomic_cmp_swap_flat_64, 533 v2i64, VReg_128>; 534 535defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 536 VGPR_32, i32, atomic_swap_flat_32>; 537 538defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 539 VReg_64, i64, atomic_swap_flat_64>; 540 541defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 542 VGPR_32, i32, atomic_load_add_flat_32>; 543 544defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 545 VGPR_32, i32, atomic_load_sub_flat_32>; 546 547defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 548 VGPR_32, i32, atomic_load_min_flat_32>; 549 550defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 551 VGPR_32, i32, atomic_load_umin_flat_32>; 552 553defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 554 VGPR_32, i32, atomic_load_max_flat_32>; 555 556defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 557 VGPR_32, i32, atomic_load_umax_flat_32>; 558 559defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 560 VGPR_32, i32, atomic_load_and_flat_32>; 561 562defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 563 VGPR_32, i32, atomic_load_or_flat_32>; 564 565defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 566 VGPR_32, i32, atomic_load_xor_flat_32>; 567 568defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 569 VGPR_32, i32, atomic_inc_flat_32>; 570 571defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 572 VGPR_32, i32, atomic_dec_flat_32>; 573 574defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 575 VReg_64, i64, atomic_load_add_flat_64>; 576 577defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 578 VReg_64, i64, atomic_load_sub_flat_64>; 579 580defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 581 VReg_64, i64, atomic_load_min_flat_64>; 582 583defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 584 VReg_64, i64, atomic_load_umin_flat_64>; 585 586defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 587 VReg_64, i64, atomic_load_max_flat_64>; 588 589defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 590 VReg_64, i64, atomic_load_umax_flat_64>; 591 592defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 593 VReg_64, i64, atomic_load_and_flat_64>; 594 595defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 596 VReg_64, i64, atomic_load_or_flat_64>; 597 598defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 599 VReg_64, i64, atomic_load_xor_flat_64>; 600 601defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 602 VReg_64, i64, atomic_inc_flat_64>; 603 604defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 605 VReg_64, i64, atomic_dec_flat_64>; 606 607// GFX7-, GFX10-only flat instructions. 608let SubtargetPredicate = isGFX7GFX10 in { 609 610defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 611 VGPR_32, f32, null_frag, v2f32, VReg_64>; 612 613defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 614 VReg_64, f64, null_frag, v2f64, VReg_128>; 615 616defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 617 VGPR_32, f32>; 618 619defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 620 VGPR_32, f32>; 621 622defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 623 VReg_64, f64>; 624 625defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 626 VReg_64, f64>; 627 628} // End SubtargetPredicate = isGFX7GFX10 629 630let SubtargetPredicate = isGFX90APlus in { 631 defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64, int_amdgcn_flat_atomic_fadd>; 632 defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64, int_amdgcn_flat_atomic_fmin>; 633 defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64, int_amdgcn_flat_atomic_fmax>; 634 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64, int_amdgcn_global_atomic_fadd>; 635 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64, int_amdgcn_global_atomic_fmin>; 636 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64, int_amdgcn_global_atomic_fmax>; 637} // End SubtargetPredicate = isGFX90APlus 638 639defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 640defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 641defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 642defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 643defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 644defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 645defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 646defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 647 648defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 649defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 650defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 651defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 652defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 653defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 654let OtherPredicates = [HasGFX10_BEncoding] in 655defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 656 657defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 658defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 659defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 660defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 661defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 662defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 663let OtherPredicates = [HasGFX10_BEncoding] in 664defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 665 666defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 667defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 668 669let is_flat_global = 1 in { 670defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 671 VGPR_32, i32, AMDGPUatomic_cmp_swap_global_32, null_frag, 672 v2i32, VReg_64>; 673 674defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 675 VReg_64, i64, AMDGPUatomic_cmp_swap_global_64, 676 null_frag, 677 v2i64, VReg_128>; 678 679defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 680 VGPR_32, i32, atomic_swap_global_32>; 681 682defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 683 VReg_64, i64, atomic_swap_global_64>; 684 685defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 686 VGPR_32, i32, atomic_load_add_global_32>; 687 688defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 689 VGPR_32, i32, atomic_load_sub_global_32>; 690 691defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 692 VGPR_32, i32, atomic_load_min_global_32>; 693 694defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 695 VGPR_32, i32, atomic_load_umin_global_32>; 696 697defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 698 VGPR_32, i32, atomic_load_max_global_32>; 699 700defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 701 VGPR_32, i32, atomic_load_umax_global_32>; 702 703defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 704 VGPR_32, i32, atomic_load_and_global_32>; 705 706defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 707 VGPR_32, i32, atomic_load_or_global_32>; 708 709defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 710 VGPR_32, i32, atomic_load_xor_global_32>; 711 712defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 713 VGPR_32, i32, atomic_inc_global_32>; 714 715defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 716 VGPR_32, i32, atomic_dec_global_32>; 717 718defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 719 VReg_64, i64, atomic_load_add_global_64>; 720 721defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 722 VReg_64, i64, atomic_load_sub_global_64>; 723 724defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 725 VReg_64, i64, atomic_load_min_global_64>; 726 727defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 728 VReg_64, i64, atomic_load_umin_global_64>; 729 730defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 731 VReg_64, i64, atomic_load_max_global_64>; 732 733defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 734 VReg_64, i64, atomic_load_umax_global_64>; 735 736defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 737 VReg_64, i64, atomic_load_and_global_64>; 738 739defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 740 VReg_64, i64, atomic_load_or_global_64>; 741 742defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 743 VReg_64, i64, atomic_load_xor_global_64>; 744 745defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 746 VReg_64, i64, atomic_inc_global_64>; 747 748defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 749 VReg_64, i64, atomic_dec_global_64>; 750 751let SubtargetPredicate = HasGFX10_BEncoding in 752defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", 753 VGPR_32, i32, int_amdgcn_global_atomic_csub>; 754} // End is_flat_global = 1 755 756 757 758let SubtargetPredicate = HasFlatScratchInsts in { 759defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 760defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 761defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 762defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 763defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 764defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 765defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 766defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 767 768defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; 769defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; 770defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; 771defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; 772defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; 773defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; 774 775defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 776defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 777defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 778defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 779defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 780defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 781 782defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 783defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 784 785} // End SubtargetPredicate = HasFlatScratchInsts 786 787let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 788 defm GLOBAL_ATOMIC_FCMPSWAP : 789 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32>; 790 defm GLOBAL_ATOMIC_FMIN : 791 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 792 defm GLOBAL_ATOMIC_FMAX : 793 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 794 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 795 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64>; 796 defm GLOBAL_ATOMIC_FMIN_X2 : 797 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 798 defm GLOBAL_ATOMIC_FMAX_X2 : 799 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 800} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 801 802let is_flat_global = 1 in { 803let OtherPredicates = [HasAtomicFaddInsts] in { 804 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 805 "global_atomic_add_f32", VGPR_32, f32 806 >; 807 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 808 "global_atomic_pk_add_f16", VGPR_32, v2f16 809 >; 810} // End OtherPredicates = [HasAtomicFaddInsts] 811 812let OtherPredicates = [isGFX90APlus] in { 813 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < 814 "global_atomic_add_f32", VGPR_32, f32, int_amdgcn_global_atomic_fadd 815 >; 816 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < 817 "global_atomic_pk_add_f16", VGPR_32, v2f16, int_amdgcn_global_atomic_fadd 818 >; 819} // End OtherPredicates = [isGFX90APlus] 820} // End is_flat_global = 1 821 822//===----------------------------------------------------------------------===// 823// Flat Patterns 824//===----------------------------------------------------------------------===// 825 826// Patterns for global loads with no offset. 827class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 828 (vt (node (FlatOffset i64:$vaddr, i16:$offset))), 829 (inst $vaddr, $offset) 830>; 831 832class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 833 (node (FlatOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), 834 (inst $vaddr, $offset, 0, $in) 835>; 836 837class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 838 (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), 839 (inst $vaddr, $offset, 0, $in) 840>; 841 842class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 843 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)), 844 (inst $saddr, $voffset, $offset, 0, $in) 845>; 846 847class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 848 (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset))), 849 (inst $vaddr, $offset) 850>; 851 852class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 853 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))), 854 (inst $saddr, $voffset, $offset, 0) 855>; 856 857class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 858 ValueType vt> : GCNPat < 859 (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset)), 860 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 861>; 862 863class GlobalAtomicStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 864 ValueType vt> : GCNPat < 865 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data), 866 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 867>; 868 869class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 870 ValueType vt, ValueType data_vt = vt> : GCNPat < 871 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), data_vt:$data)), 872 (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset) 873>; 874 875class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 876 ValueType vt> : GCNPat < 877 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data), 878 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 879>; 880 881class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 882 (node vt:$data, (FlatOffset i64:$vaddr, i16:$offset)), 883 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 884>; 885 886class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 887 (node vt:$data, (GlobalOffset i64:$vaddr, i16:$offset)), 888 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 889>; 890 891class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 892 // atomic store follows atomic binop convention so the address comes 893 // first. 894 (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data), 895 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 896>; 897 898class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, 899 ValueType vt, ValueType data_vt = vt> : GCNPat < 900 // atomic store follows atomic binop convention so the address comes 901 // first. 902 (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data), 903 (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 904>; 905 906class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 907 ValueType data_vt = vt> : GCNPat < 908 (vt (node (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), 909 (inst $vaddr, $data, $offset) 910>; 911 912class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 913 (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data), 914 (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 915>; 916 917class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 918 (node (GlobalOffset i64:$vaddr, i16:$offset), vt:$data), 919 (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 920>; 921 922class FlatSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 923 ValueType data_vt = vt> : GCNPat < 924 (vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), 925 (inst $vaddr, $data, $offset) 926>; 927 928class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 929 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset))), 930 (inst $vaddr, $offset) 931>; 932 933class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 934 (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in), 935 (inst $vaddr, $offset, 0, $in) 936>; 937 938class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 939 (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset)), 940 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset) 941>; 942 943class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 944 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset))), 945 (inst $saddr, $offset) 946>; 947 948class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 949 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), 950 (inst $saddr, $offset, 0, $in) 951>; 952 953class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 954 ValueType vt> : GCNPat < 955 (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset)), 956 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 957>; 958 959let OtherPredicates = [HasFlatAddressSpace] in { 960 961def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 962def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 963def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 964def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 965def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 966def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 967def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 968def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 969def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 970def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 971def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 972 973def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 974def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 975 976def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 977def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 978 979foreach vt = Reg32Types.types in { 980def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 981def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 982} 983 984foreach vt = VReg_64.RegTypes in { 985def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>; 986def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 987} 988 989def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; 990 991foreach vt = VReg_128.RegTypes in { 992def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 993def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>; 994} 995 996def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_flat_32, i32>; 997def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_flat_64, i64>; 998 999def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_load_add_global_32, i32>; 1000def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_load_sub_global_32, i32>; 1001def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global_32, i32>; 1002def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global_32, i32>; 1003def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_load_and_global_32, i32>; 1004def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_load_max_global_32, i32>; 1005def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_load_umax_global_32, i32>; 1006def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_load_min_global_32, i32>; 1007def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_load_umin_global_32, i32>; 1008def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_load_or_global_32, i32>; 1009def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global_32, i32>; 1010def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; 1011def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_load_xor_global_32, i32>; 1012 1013def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_load_add_global_64, i64>; 1014def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_load_sub_global_64, i64>; 1015def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global_64, i64>; 1016def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global_64, i64>; 1017def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_load_and_global_64, i64>; 1018def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_load_max_global_64, i64>; 1019def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_load_umax_global_64, i64>; 1020def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_load_min_global_64, i64>; 1021def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_load_umin_global_64, i64>; 1022def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_load_or_global_64, i64>; 1023def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global_64, i64>; 1024def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; 1025def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_load_xor_global_64, i64>; 1026 1027def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 1028def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 1029 1030let OtherPredicates = [D16PreservesUnusedBits] in { 1031def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 1032def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 1033 1034def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 1035def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 1036def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 1037def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 1038def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 1039def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 1040 1041def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 1042def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 1043def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 1044def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 1045def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 1046def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 1047} 1048 1049} // End OtherPredicates = [HasFlatAddressSpace] 1050 1051 1052multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1053 def : FlatLoadSignedPat <inst, node, vt> { 1054 let AddedComplexity = 10; 1055 } 1056 1057 def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1058 let AddedComplexity = 11; 1059 } 1060} 1061 1062multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1063 def : FlatSignedLoadPat_D16 <inst, node, vt> { 1064 let AddedComplexity = 10; 1065 } 1066 1067 def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1068 let AddedComplexity = 11; 1069 } 1070} 1071 1072multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1073 ValueType vt> { 1074 def : FlatStoreSignedPat <inst, node, vt> { 1075 let AddedComplexity = 10; 1076 } 1077 1078 def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1079 let AddedComplexity = 11; 1080 } 1081} 1082 1083// Deal with swapped operands for atomic_store vs. regular store 1084multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1085 def : FlatStoreSignedAtomicPat <inst, node, vt> { 1086 let AddedComplexity = 10; 1087 } 1088 1089 def : GlobalAtomicStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1090 let AddedComplexity = 11; 1091 } 1092} 1093 1094multiclass GlobalFLATAtomicPats<string nortn_inst_name, SDPatternOperator node, 1095 ValueType vt, ValueType data_vt = vt> { 1096 def : FlatSignedAtomicPat <!cast<FLAT_Pseudo>(nortn_inst_name#"_RTN"), node, vt, data_vt> { 1097 let AddedComplexity = 10; 1098 } 1099 1100 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(nortn_inst_name#"_SADDR_RTN"), node, vt, data_vt> { 1101 let AddedComplexity = 11; 1102 } 1103} 1104 1105multiclass GlobalFLATNoRtnAtomicPats<FLAT_Pseudo inst, SDPatternOperator node, 1106 ValueType vt> { 1107 def : FlatSignedAtomicPatNoRtn <inst, node, vt> { 1108 let AddedComplexity = 10; 1109 } 1110 1111 def : GlobalAtomicNoRtnSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1112 let AddedComplexity = 11; 1113 } 1114} 1115 1116multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1117 def : ScratchLoadSignedPat <inst, node, vt> { 1118 let AddedComplexity = 25; 1119 } 1120 1121 def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1122 let AddedComplexity = 26; 1123 } 1124} 1125 1126multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1127 ValueType vt> { 1128 def : ScratchStoreSignedPat <inst, node, vt> { 1129 let AddedComplexity = 25; 1130 } 1131 1132 def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1133 let AddedComplexity = 26; 1134 } 1135} 1136 1137multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1138 def : ScratchLoadSignedPat_D16 <inst, node, vt> { 1139 let AddedComplexity = 25; 1140 } 1141 1142 def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1143 let AddedComplexity = 26; 1144 } 1145} 1146 1147let OtherPredicates = [HasFlatGlobalInsts] in { 1148 1149defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 1150defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 1151defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 1152defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 1153defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 1154defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 1155defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 1156defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 1157defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 1158defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; 1159 1160foreach vt = Reg32Types.types in { 1161defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; 1162defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>; 1163} 1164 1165foreach vt = VReg_64.RegTypes in { 1166defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>; 1167defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>; 1168} 1169 1170defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 1171 1172foreach vt = VReg_128.RegTypes in { 1173defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>; 1174defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>; 1175} 1176 1177// There is no distinction for atomic load lowering during selection; 1178// the memory legalizer will set the cache bits and insert the 1179// appropriate waits. 1180defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 1181defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 1182 1183defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; 1184defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; 1185defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; 1186defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; 1187defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; 1188 1189let OtherPredicates = [D16PreservesUnusedBits] in { 1190defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 1191defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 1192 1193defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 1194defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 1195defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 1196defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 1197defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 1198defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 1199 1200defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 1201defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 1202defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 1203defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 1204defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 1205defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 1206} 1207 1208defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_global_32, i32>; 1209defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_global_64, i64>; 1210 1211defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", atomic_load_add_global_32, i32>; 1212defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", atomic_load_sub_global_32, i32>; 1213defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", atomic_inc_global_32, i32>; 1214defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", atomic_dec_global_32, i32>; 1215defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", atomic_load_and_global_32, i32>; 1216defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", atomic_load_max_global_32, i32>; 1217defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", atomic_load_umax_global_32, i32>; 1218defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", atomic_load_min_global_32, i32>; 1219defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", atomic_load_umin_global_32, i32>; 1220defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", atomic_load_or_global_32, i32>; 1221defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", atomic_swap_global_32, i32>; 1222defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", AMDGPUatomic_cmp_swap_global_32, i32, v2i32>; 1223defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", atomic_load_xor_global_32, i32>; 1224defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CSUB", int_amdgcn_global_atomic_csub, i32>; 1225 1226defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", atomic_load_add_global_64, i64>; 1227defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", atomic_load_sub_global_64, i64>; 1228defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", atomic_inc_global_64, i64>; 1229defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", atomic_dec_global_64, i64>; 1230defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", atomic_load_and_global_64, i64>; 1231defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", atomic_load_max_global_64, i64>; 1232defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", atomic_load_umax_global_64, i64>; 1233defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", atomic_load_min_global_64, i64>; 1234defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", atomic_load_umin_global_64, i64>; 1235defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", atomic_load_or_global_64, i64>; 1236defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", atomic_swap_global_64, i64>; 1237defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", AMDGPUatomic_cmp_swap_global_64, i64, v2i64>; 1238defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", atomic_load_xor_global_64, i64>; 1239 1240let OtherPredicates = [HasAtomicFaddInsts] in { 1241defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_ADD_F32, atomic_load_fadd_global_noret_32, f32>; 1242defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_PK_ADD_F16, atomic_load_fadd_v2f16_global_noret_32, v2f16>; 1243} 1244 1245let OtherPredicates = [isGFX90APlus] in { 1246defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F32", atomic_load_fadd_global_32, f32>; 1247defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", atomic_load_fadd_v2f16_global_32, v2f16>; 1248defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", atomic_load_fadd_global_64, f64>; 1249defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", atomic_load_fmin_global_64, f64>; 1250defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", atomic_load_fmax_global_64, f64>; 1251def : FlatSignedAtomicPat <FLAT_ATOMIC_ADD_F64_RTN, atomic_load_fadd_flat_64, f64>; 1252def : FlatSignedAtomicPat <FLAT_ATOMIC_MIN_F64_RTN, atomic_load_fmin_flat_64, f64>; 1253def : FlatSignedAtomicPat <FLAT_ATOMIC_MAX_F64_RTN, atomic_load_fmax_flat_64, f64>; 1254} 1255 1256} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 1257 1258let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in { 1259 1260defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>; 1261defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>; 1262defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>; 1263defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; 1264defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; 1265defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; 1266defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; 1267defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; 1268defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; 1269defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; 1270 1271foreach vt = Reg32Types.types in { 1272defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; 1273defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; 1274} 1275 1276foreach vt = VReg_64.RegTypes in { 1277defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>; 1278defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>; 1279} 1280 1281defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>; 1282 1283foreach vt = VReg_128.RegTypes in { 1284defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>; 1285defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>; 1286} 1287 1288defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>; 1289defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>; 1290defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; 1291defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; 1292defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; 1293 1294let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { 1295defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; 1296defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; 1297 1298defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>; 1299defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>; 1300defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>; 1301defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>; 1302defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>; 1303defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>; 1304 1305defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>; 1306defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>; 1307defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>; 1308defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>; 1309defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>; 1310defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>; 1311} 1312 1313} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch] 1314 1315//===----------------------------------------------------------------------===// 1316// Target 1317//===----------------------------------------------------------------------===// 1318 1319//===----------------------------------------------------------------------===// 1320// CI 1321//===----------------------------------------------------------------------===// 1322 1323class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 1324 FLAT_Real <op, ps>, 1325 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 1326 let AssemblerPredicate = isGFX7Only; 1327 let DecoderNamespace="GFX7"; 1328} 1329 1330def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1331def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1332def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1333def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1334def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1335def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1336def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1337def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1338 1339def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1340def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1341def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1342def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1343def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1344def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1345 1346multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 1347 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1348 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1349} 1350 1351defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 1352defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 1353defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 1354defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 1355defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 1356defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 1357defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 1358defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 1359defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 1360defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 1361defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 1362defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 1363defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 1364defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 1365defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 1366defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 1367defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 1368defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 1369defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 1370defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 1371defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 1372defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 1373defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 1374defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 1375defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1376defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1377 1378// CI Only flat instructions 1379defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1380defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1381defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1382defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1383defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1384defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1385 1386 1387//===----------------------------------------------------------------------===// 1388// VI 1389//===----------------------------------------------------------------------===// 1390 1391class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : 1392 FLAT_Real <op, ps>, 1393 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1394 let AssemblerPredicate = isGFX8GFX9; 1395 let DecoderNamespace = "GFX8"; 1396 1397 let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1398 let AsmString = ps.Mnemonic # 1399 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); 1400} 1401 1402multiclass FLAT_Real_AllAddr_vi<bits<7> op, 1403 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1404 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; 1405 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; 1406} 1407 1408def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1409def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1410def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1411def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1412def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1413def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1414def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1415def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1416 1417def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1418def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1419def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1420def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1421def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1422def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1423def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1424def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1425 1426def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1427def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1428def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1429def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1430def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1431def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1432 1433multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps, 1434 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1435 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; 1436 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; 1437} 1438 1439multiclass FLAT_Global_Real_Atomics_vi<bits<7> op, 1440 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : 1441 FLAT_Real_AllAddr_vi<op, has_sccb> { 1442 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; 1443 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; 1444} 1445 1446 1447defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1448defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1449defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1450defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1451defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1452defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1453defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1454defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1455defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1456defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1457defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1458defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1459defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1460defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1461defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1462defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1463defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1464defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1465defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1466defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1467defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1468defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1469defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1470defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1471defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1472defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1473 1474defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1475defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1476defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1477defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1478defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1479defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1480defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1481defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1482 1483defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1484defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1485defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1486defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1487defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1488defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1489 1490defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1491defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1492defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1493defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1494defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1495defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1496defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1497defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1498 1499 1500defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1501defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1502defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1503defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1504defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1505defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1506defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1507defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1508defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1509defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1510defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1511defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1512defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1513defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1514defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1515defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1516defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1517defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1518defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1519defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1520defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1521defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1522defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1523defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1524defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1525defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1526 1527defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1528defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1529defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1530defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1531defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1532defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1533defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1534defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1535defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1536defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1537defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1538defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1539defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1540defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1541defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1542defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1543defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1544defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1545defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1546defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1547defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1548defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1549 1550let SubtargetPredicate = HasAtomicFaddInsts in { 1551defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; 1552defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; 1553} 1554 1555let SubtargetPredicate = isGFX90AOnly in { 1556 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>; 1557 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>; 1558 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>; 1559 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>; 1560 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>; 1561 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; 1562} // End SubtargetPredicate = isGFX90AOnly 1563 1564//===----------------------------------------------------------------------===// 1565// GFX10. 1566//===----------------------------------------------------------------------===// 1567 1568class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 1569 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 1570 let AssemblerPredicate = isGFX10Plus; 1571 let DecoderNamespace = "GFX10"; 1572 1573 let Inst{11-0} = offset{11-0}; 1574 let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 1575 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 1576 let Inst{55} = 0; 1577} 1578 1579 1580multiclass FLAT_Real_Base_gfx10<bits<7> op> { 1581 def _gfx10 : 1582 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 1583} 1584 1585multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 1586 def _RTN_gfx10 : 1587 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1588} 1589 1590multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 1591 def _SADDR_gfx10 : 1592 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1593} 1594 1595multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 1596 def _SADDR_RTN_gfx10 : 1597 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1598} 1599 1600multiclass FLAT_Real_ST_gfx10<bits<7> op> { 1601 def _ST_gfx10 : 1602 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> { 1603 let Inst{54-48} = !cast<int>(EXEC_HI.HWEncoding); 1604 let OtherPredicates = [HasFlatScratchSTMode]; 1605 } 1606} 1607 1608multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 1609 FLAT_Real_Base_gfx10<op>, 1610 FLAT_Real_SADDR_gfx10<op>; 1611 1612multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 1613 FLAT_Real_Base_gfx10<op>, 1614 FLAT_Real_RTN_gfx10<op>; 1615 1616multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 1617 FLAT_Real_AllAddr_gfx10<op>, 1618 FLAT_Real_RTN_gfx10<op>, 1619 FLAT_Real_SADDR_RTN_gfx10<op>; 1620 1621multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> : 1622 FLAT_Real_RTN_gfx10<op>, 1623 FLAT_Real_SADDR_RTN_gfx10<op>; 1624 1625multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> : 1626 FLAT_Real_Base_gfx10<op>, 1627 FLAT_Real_SADDR_gfx10<op>, 1628 FLAT_Real_ST_gfx10<op>; 1629 1630// ENC_FLAT. 1631defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 1632defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 1633defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 1634defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 1635defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 1636defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 1637defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 1638defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 1639defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 1640defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 1641defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 1642defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 1643defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 1644defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 1645defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 1646defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 1647defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 1648defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 1649defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 1650defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 1651defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 1652defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 1653defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 1654defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 1655defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 1656defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 1657defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 1658defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 1659defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 1660defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 1661defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 1662defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 1663defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 1664defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 1665defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 1666defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 1667defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 1668defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 1669defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 1670defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 1671defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 1672defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 1673defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 1674defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 1675defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 1676defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 1677defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 1678defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 1679defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 1680defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 1681defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 1682defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 1683defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 1684defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 1685 1686 1687// ENC_FLAT_GLBL. 1688defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 1689defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 1690defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 1691defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 1692defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 1693defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 1694defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 1695defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 1696defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 1697defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 1698defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 1699defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 1700defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 1701defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 1702defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 1703defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 1704defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 1705defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 1706defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 1707defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 1708defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 1709defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 1710defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 1711defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 1712defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 1713defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 1714defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>; 1715defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 1716defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 1717defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 1718defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 1719defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 1720defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 1721defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 1722defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 1723defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 1724defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 1725defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 1726defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 1727defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 1728defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 1729defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 1730defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 1731defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 1732defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 1733defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 1734defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 1735defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 1736defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 1737defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 1738defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 1739defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 1740defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 1741defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 1742defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 1743defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; 1744defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; 1745 1746// ENC_FLAT_SCRATCH. 1747defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; 1748defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; 1749defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>; 1750defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>; 1751defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>; 1752defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>; 1753defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>; 1754defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>; 1755defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>; 1756defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>; 1757defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>; 1758defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>; 1759defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>; 1760defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>; 1761defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>; 1762defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>; 1763defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>; 1764defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>; 1765defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; 1766defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; 1767defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; 1768defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; 1769