1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>; 10def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>; 11def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>; 12 13def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>; 14def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>; 15def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>; 16 17//===----------------------------------------------------------------------===// 18// FLAT classes 19//===----------------------------------------------------------------------===// 20 21class FLAT_Pseudo<string opName, dag outs, dag ins, 22 string asmOps, list<dag> pattern=[]> : 23 InstSI<outs, ins, "", pattern>, 24 SIMCInstr<opName, SIEncodingFamily.NONE> { 25 26 let isPseudo = 1; 27 let isCodeGenOnly = 1; 28 29 let FLAT = 1; 30 31 let UseNamedOperandTable = 1; 32 let hasSideEffects = 0; 33 let SchedRW = [WriteVMEM]; 34 35 string Mnemonic = opName; 36 string AsmOperands = asmOps; 37 38 bits<1> is_flat_global = 0; 39 bits<1> is_flat_scratch = 0; 40 41 bits<1> has_vdst = 1; 42 43 // We need to distinguish having saddr and enabling saddr because 44 // saddr is only valid for scratch and global instructions. Pre-gfx9 45 // these bits were reserved, so we also don't necessarily want to 46 // set these bits to the disabled value for the original flat 47 // segment instructions. 48 bits<1> has_saddr = 0; 49 bits<1> enabled_saddr = 0; 50 bits<7> saddr_value = 0; 51 bits<1> has_vaddr = 1; 52 53 bits<1> has_data = 1; 54 bits<1> has_glc = 1; 55 bits<1> glcValue = 0; 56 bits<1> has_dlc = 1; 57 bits<1> dlcValue = 0; 58 bits<1> has_sccb = 1; 59 bits<1> sccbValue = 0; 60 bits<1> has_sve = 0; // Scratch VGPR Enable 61 bits<1> lds = 0; 62 bits<1> sve = 0; 63 64 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 65 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 66 67 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 68 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 69 70 // Internally, FLAT instruction are executed as both an LDS and a 71 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 72 // and are not considered done until both have been decremented. 73 let VM_CNT = 1; 74 let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch)); 75 76 let FlatGlobal = is_flat_global; 77 78 let FlatScratch = is_flat_scratch; 79} 80 81class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 82 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 83 Enc64 { 84 85 let isPseudo = 0; 86 let isCodeGenOnly = 0; 87 88 let FLAT = 1; 89 90 // copy relevant pseudo op flags 91 let SubtargetPredicate = ps.SubtargetPredicate; 92 let AsmMatchConverter = ps.AsmMatchConverter; 93 let OtherPredicates = ps.OtherPredicates; 94 let TSFlags = ps.TSFlags; 95 let UseNamedOperandTable = ps.UseNamedOperandTable; 96 let SchedRW = ps.SchedRW; 97 let mayLoad = ps.mayLoad; 98 let mayStore = ps.mayStore; 99 let IsAtomicRet = ps.IsAtomicRet; 100 let IsAtomicNoRet = ps.IsAtomicNoRet; 101 let VM_CNT = ps.VM_CNT; 102 let LGKM_CNT = ps.LGKM_CNT; 103 let VALU = ps.VALU; 104 105 // encoding fields 106 bits<8> vaddr; 107 bits<10> vdata; 108 bits<7> saddr; 109 bits<10> vdst; 110 111 bits<5> cpol; 112 113 // Only valid on gfx9 114 bits<1> lds = ps.lds; // LDS DMA for global and scratch 115 116 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 117 bits<2> seg = !if(ps.is_flat_global, 0b10, 118 !if(ps.is_flat_scratch, 0b01, 0)); 119 120 // Signed offset. Highest bit ignored for flat and treated as 12-bit 121 // unsigned for flat accesses. 122 bits<13> offset; 123 // GFX90A+ only: instruction uses AccVGPR for data 124 bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0)); 125 126 // We don't use tfe right now, and it was removed in gfx9. 127 bits<1> tfe = 0; 128 129 // Only valid on GFX9+ 130 let Inst{12-0} = offset; 131 let Inst{13} = !if(ps.has_sve, ps.sve, lds); 132 let Inst{15-14} = seg; 133 134 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 135 let Inst{17} = cpol{CPolBit.SLC}; 136 let Inst{24-18} = op; 137 let Inst{31-26} = 0x37; // Encoding. 138 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 139 let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?); 140 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 141 142 // 54-48 is reserved. 143 let Inst{55} = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A. 144 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?); 145} 146 147class GlobalSaddrTable <bit is_saddr, string Name = ""> { 148 bit IsSaddr = is_saddr; 149 string SaddrOp = Name; 150} 151 152// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 153// same encoding value as exec_hi, so it isn't possible to use that if 154// saddr is 32-bit (which isn't handled here yet). 155class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 156 bit HasTiedOutput = 0, 157 bit HasSaddr = 0, bit EnableSaddr = 0, 158 RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo< 159 opName, 160 (outs vdata_op:$vdst), 161 !con( 162 !con( 163 !if(EnableSaddr, 164 (ins SReg_64:$saddr, VGPR_32:$vaddr), 165 (ins VReg_64:$vaddr)), 166 (ins flat_offset:$offset)), 167 // FIXME: Operands with default values do not work with following non-optional operands. 168 !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), 169 (ins CPol_0:$cpol))), 170 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 171 let has_data = 0; 172 let mayLoad = 1; 173 let has_saddr = HasSaddr; 174 let enabled_saddr = EnableSaddr; 175 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 176 let maybeAtomic = 1; 177 178 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 179 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 180} 181 182class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 183 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 184 opName, 185 (outs), 186 !con( 187 !if(EnableSaddr, 188 (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr), 189 (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)), 190 (ins flat_offset:$offset, CPol_0:$cpol)), 191 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 192 let mayLoad = 0; 193 let mayStore = 1; 194 let has_vdst = 0; 195 let has_saddr = HasSaddr; 196 let enabled_saddr = EnableSaddr; 197 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 198 let maybeAtomic = 1; 199} 200 201multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 202 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 203 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 204 GlobalSaddrTable<0, opName>; 205 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 206 GlobalSaddrTable<1, opName>; 207 } 208} 209 210class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 211 bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 212 opName, 213 (outs regClass:$vdst), 214 !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), 215 (ins flat_offset:$offset, CPol_0:$cpol), 216 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 217 " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 218 let is_flat_global = 1; 219 let has_data = 0; 220 let mayLoad = 1; 221 let has_vaddr = 0; 222 let has_saddr = 1; 223 let enabled_saddr = EnableSaddr; 224 let maybeAtomic = 1; 225 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 226 227 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 228 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 229} 230 231multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass, 232 bit HasTiedOutput = 0> { 233 def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>, 234 GlobalSaddrTable<0, opName>; 235 def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>, 236 GlobalSaddrTable<1, opName>; 237} 238 239multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 240 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 241 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 242 GlobalSaddrTable<0, opName>; 243 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 244 GlobalSaddrTable<1, opName>; 245 } 246} 247 248class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo< 249 opName, 250 (outs ), 251 !con( 252 !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), 253 (ins flat_offset:$offset, CPol_0:$cpol)), 254 " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { 255 let LGKM_CNT = 1; 256 let is_flat_global = 1; 257 let lds = 1; 258 let has_data = 0; 259 let has_vdst = 0; 260 let mayLoad = 1; 261 let mayStore = 1; 262 let has_saddr = 1; 263 let enabled_saddr = EnableSaddr; 264 let VALU = 1; 265 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 266 let Uses = [M0, EXEC]; 267 let SchedRW = [WriteVMEM, WriteLDS]; 268} 269 270multiclass FLAT_Global_Load_LDS_Pseudo<string opName> { 271 def "" : FLAT_Global_Load_LDS_Pseudo<opName>, 272 GlobalSaddrTable<0, opName>; 273 def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>, 274 GlobalSaddrTable<1, opName>; 275} 276 277class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 278 bit EnableSaddr = 0> : FLAT_Pseudo< 279 opName, 280 (outs), 281 !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), 282 (ins flat_offset:$offset, CPol:$cpol)), 283 " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 284 let is_flat_global = 1; 285 let mayLoad = 0; 286 let mayStore = 1; 287 let has_vdst = 0; 288 let has_vaddr = 0; 289 let has_saddr = 1; 290 let enabled_saddr = EnableSaddr; 291 let maybeAtomic = 1; 292 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 293} 294 295multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> { 296 def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>, 297 GlobalSaddrTable<0, opName>; 298 def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>, 299 GlobalSaddrTable<1, opName>; 300} 301 302class FlatScratchInst <string sv_op, string mode> { 303 string SVOp = sv_op; 304 string Mode = mode; 305} 306 307class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 308 bit HasTiedOutput = 0, 309 bit EnableSaddr = 0, 310 bit EnableSVE = 0, 311 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> 312 : FLAT_Pseudo< 313 opName, 314 (outs getLdStRegisterOperand<regClass>.ret:$vdst), 315 !con( 316 !if(EnableSVE, 317 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 318 !if(EnableSaddr, 319 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 320 !if(EnableVaddr, 321 (ins VGPR_32:$vaddr, flat_offset:$offset), 322 (ins flat_offset:$offset)))), 323 !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in), 324 (ins CPol_0:$cpol))), 325 " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 326 let has_data = 0; 327 let mayLoad = 1; 328 let has_saddr = 1; 329 let enabled_saddr = EnableSaddr; 330 let has_vaddr = EnableVaddr; 331 let has_sve = EnableSVE; 332 let sve = EnableVaddr; 333 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 334 let maybeAtomic = 1; 335 336 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 337 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 338} 339 340class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0, 341 bit EnableSVE = 0, 342 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)), 343 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo< 344 opName, 345 (outs), 346 !if(EnableSVE, 347 (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 348 !if(EnableSaddr, 349 (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 350 !if(EnableVaddr, 351 (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), 352 (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))), 353 " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 354 let mayLoad = 0; 355 let mayStore = 1; 356 let has_vdst = 0; 357 let has_saddr = 1; 358 let enabled_saddr = EnableSaddr; 359 let has_vaddr = EnableVaddr; 360 let has_sve = EnableSVE; 361 let sve = EnableVaddr; 362 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 363 let maybeAtomic = 1; 364} 365 366multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { 367 let is_flat_scratch = 1 in { 368 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>, 369 FlatScratchInst<opName, "SV">; 370 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>, 371 FlatScratchInst<opName, "SS">; 372 373 let SubtargetPredicate = HasFlatScratchSVSMode in 374 def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>, 375 FlatScratchInst<opName, "SVS">; 376 377 let SubtargetPredicate = HasFlatScratchSTMode in 378 def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>, 379 FlatScratchInst<opName, "ST">; 380 } 381} 382 383multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 384 let is_flat_scratch = 1 in { 385 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>, 386 FlatScratchInst<opName, "SV">; 387 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>, 388 FlatScratchInst<opName, "SS">; 389 390 let SubtargetPredicate = HasFlatScratchSVSMode in 391 def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>, 392 FlatScratchInst<opName, "SVS">; 393 394 let SubtargetPredicate = HasFlatScratchSTMode in 395 def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>, 396 FlatScratchInst<opName, "ST">; 397 } 398} 399 400class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0, 401 bit EnableSVE = 0, 402 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo< 403 opName, 404 (outs ), 405 !if(EnableSVE, 406 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 407 !if(EnableSaddr, 408 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 409 !if(EnableVaddr, 410 (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol), 411 (ins flat_offset:$offset, CPol:$cpol)))), 412 " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 413 414 let LGKM_CNT = 1; 415 let is_flat_scratch = 1; 416 let lds = 1; 417 let has_data = 0; 418 let has_vdst = 0; 419 let mayLoad = 1; 420 let mayStore = 1; 421 let has_saddr = 1; 422 let enabled_saddr = EnableSaddr; 423 let has_vaddr = EnableVaddr; 424 let has_sve = EnableSVE; 425 let sve = EnableVaddr; 426 let VALU = 1; 427 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 428 let Uses = [M0, EXEC]; 429 let SchedRW = [WriteVMEM, WriteLDS]; 430} 431 432multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> { 433 def "" : FLAT_Scratch_Load_LDS_Pseudo<opName>, 434 FlatScratchInst<opName, "SV">; 435 def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>, 436 FlatScratchInst<opName, "SS">; 437 def _SVS : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>, 438 FlatScratchInst<opName, "SVS">; 439 def _ST : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>, 440 FlatScratchInst<opName, "ST">; 441} 442 443class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 444 string asm, list<dag> pattern = []> : 445 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 446 let mayLoad = 1; 447 let mayStore = 1; 448 let has_glc = 0; 449 let glcValue = 0; 450 let has_vdst = 0; 451 let has_sccb = 1; 452 let sccbValue = 0; 453 let maybeAtomic = 1; 454 let IsAtomicNoRet = 1; 455} 456 457class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 458 string asm, list<dag> pattern = []> 459 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 460 let hasPostISelHook = 1; 461 let has_vdst = 1; 462 let glcValue = 1; 463 let sccbValue = 0; 464 let IsAtomicNoRet = 0; 465 let IsAtomicRet = 1; 466 let PseudoInstr = NAME # "_RTN"; 467} 468 469multiclass FLAT_Atomic_Pseudo< 470 string opName, 471 RegisterClass vdst_rc, 472 ValueType vt, 473 ValueType data_vt = vt, 474 RegisterClass data_rc = vdst_rc, 475 bit isFP = isFloatType<data_vt>.ret, 476 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 477 def "" : FLAT_AtomicNoRet_Pseudo <opName, 478 (outs), 479 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 480 " $vaddr, $vdata$offset$cpol">, 481 GlobalSaddrTable<0, opName>, 482 AtomicNoRet <opName, 0> { 483 let PseudoInstr = NAME; 484 let FPAtomic = isFP; 485 let AddedComplexity = -1; // Prefer global atomics if available 486 } 487 488 def _RTN : FLAT_AtomicRet_Pseudo <opName, 489 (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst), 490 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 491 " $vdst, $vaddr, $vdata$offset$cpol">, 492 GlobalSaddrTable<0, opName#"_rtn">, 493 AtomicNoRet <opName, 1> { 494 let FPAtomic = isFP; 495 let AddedComplexity = -1; // Prefer global atomics if available 496 } 497} 498 499multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 500 string opName, 501 RegisterClass vdst_rc, 502 ValueType vt, 503 ValueType data_vt = vt, 504 RegisterClass data_rc = vdst_rc, 505 bit isFP = isFloatType<data_vt>.ret, 506 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 507 508 def "" : FLAT_AtomicNoRet_Pseudo <opName, 509 (outs), 510 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 511 " $vaddr, $vdata, off$offset$cpol">, 512 GlobalSaddrTable<0, opName>, 513 AtomicNoRet <opName, 0> { 514 let has_saddr = 1; 515 let PseudoInstr = NAME; 516 let FPAtomic = isFP; 517 } 518 519 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 520 (outs), 521 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol), 522 " $vaddr, $vdata, $saddr$offset$cpol">, 523 GlobalSaddrTable<1, opName>, 524 AtomicNoRet <opName#"_saddr", 0> { 525 let has_saddr = 1; 526 let enabled_saddr = 1; 527 let PseudoInstr = NAME#"_SADDR"; 528 let FPAtomic = isFP; 529 } 530} 531 532multiclass FLAT_Global_Atomic_Pseudo_RTN< 533 string opName, 534 RegisterClass vdst_rc, 535 ValueType vt, 536 ValueType data_vt = vt, 537 RegisterClass data_rc = vdst_rc, 538 bit isFP = isFloatType<data_vt>.ret, 539 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret, 540 RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> { 541 542 def _RTN : FLAT_AtomicRet_Pseudo <opName, 543 (outs vdst_op:$vdst), 544 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 545 " $vdst, $vaddr, $vdata, off$offset$cpol">, 546 GlobalSaddrTable<0, opName#"_rtn">, 547 AtomicNoRet <opName, 1> { 548 let has_saddr = 1; 549 let FPAtomic = isFP; 550 } 551 552 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 553 (outs vdst_op:$vdst), 554 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), 555 " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, 556 GlobalSaddrTable<1, opName#"_rtn">, 557 AtomicNoRet <opName#"_saddr", 1> { 558 let has_saddr = 1; 559 let enabled_saddr = 1; 560 let PseudoInstr = NAME#"_SADDR_RTN"; 561 let FPAtomic = isFP; 562 } 563} 564 565multiclass FLAT_Global_Atomic_Pseudo< 566 string opName, 567 RegisterClass vdst_rc, 568 ValueType vt, 569 ValueType data_vt = vt, 570 RegisterClass data_rc = vdst_rc> { 571 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 572 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 573 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 574 } 575} 576 577//===----------------------------------------------------------------------===// 578// Flat Instructions 579//===----------------------------------------------------------------------===// 580 581def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 582def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 583def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 584def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 585def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 586def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 587def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 588def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 589 590def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 591def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 592def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 593def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 594def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 595def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 596 597let SubtargetPredicate = HasD16LoadStore in { 598def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 599def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 600def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 601def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 602def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 603def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 604 605def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 606def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 607} 608 609defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 610 VGPR_32, i32, v2i32, VReg_64>; 611 612defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 613 VReg_64, i64, v2i64, VReg_128>; 614 615defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 616 VGPR_32, i32>; 617 618defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 619 VReg_64, i64>; 620 621defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 622 VGPR_32, i32>; 623 624defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 625 VGPR_32, i32>; 626 627defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 628 VGPR_32, i32>; 629 630defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 631 VGPR_32, i32>; 632 633defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 634 VGPR_32, i32>; 635 636defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 637 VGPR_32, i32>; 638 639defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 640 VGPR_32, i32>; 641 642defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 643 VGPR_32, i32>; 644 645defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 646 VGPR_32, i32>; 647 648defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 649 VGPR_32, i32>; 650 651defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 652 VGPR_32, i32>; 653 654defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 655 VReg_64, i64>; 656 657defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 658 VReg_64, i64>; 659 660defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 661 VReg_64, i64>; 662 663defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 664 VReg_64, i64>; 665 666defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 667 VReg_64, i64>; 668 669defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 670 VReg_64, i64>; 671 672defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 673 VReg_64, i64>; 674 675defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 676 VReg_64, i64>; 677 678defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 679 VReg_64, i64>; 680 681defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 682 VReg_64, i64>; 683 684defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 685 VReg_64, i64>; 686 687// GFX7-, GFX10-only flat instructions. 688let SubtargetPredicate = isGFX7GFX10 in { 689 690defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 691 VReg_64, f64, v2f64, VReg_128>; 692 693defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 694 VReg_64, f64>; 695 696defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 697 VReg_64, f64>; 698 699} // End SubtargetPredicate = isGFX7GFX10 700 701let SubtargetPredicate = isGFX90APlus in { 702 defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>; 703 defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>; 704 defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>; 705 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>; 706 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>; 707 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; 708} // End SubtargetPredicate = isGFX90APlus 709 710let SubtargetPredicate = isGFX940Plus in { 711 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; 712 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2f16>; 713 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2f16>; 714} // End SubtargetPredicate = isGFX940Plus 715 716// GFX7-, GFX10-, GFX11-only flat instructions. 717let SubtargetPredicate = isGFX7GFX10GFX11 in { 718 719defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 720 VGPR_32, f32, v2f32, VReg_64>; 721 722defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 723 VGPR_32, f32>; 724 725defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 726 VGPR_32, f32>; 727 728} // End SubtargetPredicate = isGFX7GFX10GFX11 729 730// GFX940-, GFX11-only flat instructions. 731let SubtargetPredicate = isGFX940GFX11Plus in { 732 defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; 733} // End SubtargetPredicate = isGFX940GFX11Plus 734 735defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 736defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 737defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 738defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 739defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 740defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 741defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 742defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 743 744defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 745defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 746defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 747defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 748defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 749defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 750let OtherPredicates = [HasGFX10_BEncoding] in 751defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 752 753defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 754defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 755defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 756defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 757defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 758defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 759let OtherPredicates = [HasGFX10_BEncoding] in 760defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 761 762defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 763defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 764 765let is_flat_global = 1 in { 766defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 767 VGPR_32, i32, v2i32, VReg_64>; 768 769defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 770 VReg_64, i64, v2i64, VReg_128>; 771 772defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 773 VGPR_32, i32>; 774 775defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 776 VReg_64, i64>; 777 778defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 779 VGPR_32, i32>; 780 781defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 782 VGPR_32, i32>; 783 784defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 785 VGPR_32, i32>; 786 787defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 788 VGPR_32, i32>; 789 790defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 791 VGPR_32, i32>; 792 793defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 794 VGPR_32, i32>; 795 796defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 797 VGPR_32, i32>; 798 799defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 800 VGPR_32, i32>; 801 802defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 803 VGPR_32, i32>; 804 805defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 806 VGPR_32, i32>; 807 808defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 809 VGPR_32, i32>; 810 811defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 812 VReg_64, i64>; 813 814defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 815 VReg_64, i64>; 816 817defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 818 VReg_64, i64>; 819 820defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 821 VReg_64, i64>; 822 823defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 824 VReg_64, i64>; 825 826defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 827 VReg_64, i64>; 828 829defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 830 VReg_64, i64>; 831 832defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 833 VReg_64, i64>; 834 835defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 836 VReg_64, i64>; 837 838defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 839 VReg_64, i64>; 840 841defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 842 VReg_64, i64>; 843 844let SubtargetPredicate = HasGFX10_BEncoding in 845defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo_RTN <"global_atomic_csub", 846 VGPR_32, i32>; 847 848defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; 849defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; 850defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; 851defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; 852defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; 853 854} // End is_flat_global = 1 855 856 857 858let SubtargetPredicate = HasFlatScratchInsts in { 859defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 860defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 861defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 862defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 863defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 864defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 865defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 866defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 867 868defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; 869defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; 870defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; 871defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; 872defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; 873defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; 874 875defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 876defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 877defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 878defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 879defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 880defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 881 882defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 883defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 884 885defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; 886defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; 887defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; 888defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; 889defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; 890 891} // End SubtargetPredicate = HasFlatScratchInsts 892 893let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 894 defm GLOBAL_ATOMIC_FCMPSWAP : 895 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>; 896 defm GLOBAL_ATOMIC_FMIN : 897 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 898 defm GLOBAL_ATOMIC_FMAX : 899 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 900 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 901 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>; 902 defm GLOBAL_ATOMIC_FMIN_X2 : 903 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 904 defm GLOBAL_ATOMIC_FMAX_X2 : 905 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 906} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 907 908let is_flat_global = 1 in { 909let OtherPredicates = [HasAtomicFaddNoRtnInsts] in 910 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 911 "global_atomic_add_f32", VGPR_32, f32 912 >; 913let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in 914 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 915 "global_atomic_pk_add_f16", VGPR_32, v2f16 916 >; 917let OtherPredicates = [HasAtomicFaddRtnInsts] in 918 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < 919 "global_atomic_add_f32", VGPR_32, f32 920 >; 921let OtherPredicates = [isGFX90APlus] in 922 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < 923 "global_atomic_pk_add_f16", VGPR_32, v2f16 924 >; 925} // End is_flat_global = 1 926 927//===----------------------------------------------------------------------===// 928// Flat Patterns 929//===----------------------------------------------------------------------===// 930 931// Patterns for global loads with no offset. 932class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 933 (vt (node (FlatOffset i64:$vaddr, i16:$offset))), 934 (inst $vaddr, $offset) 935>; 936 937class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 938 (node (FlatOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), 939 (inst $vaddr, $offset, 0, $in) 940>; 941 942class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 943 (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset), vt:$in), 944 (inst $vaddr, $offset, 0, $in) 945>; 946 947class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 948 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$in)), 949 (inst $saddr, $voffset, $offset, 0, $in) 950>; 951 952class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 953 (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i16:$offset))), 954 (inst $vaddr, $offset) 955>; 956 957class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 958 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset))), 959 (inst $saddr, $voffset, $offset, 0) 960>; 961 962class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 963 ValueType vt> : GCNPat < 964 (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset)), 965 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 966>; 967 968class GlobalAtomicStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 969 ValueType vt> : GCNPat < 970 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data), 971 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 972>; 973 974class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 975 ValueType vt, ValueType data_vt = vt> : GCNPat < 976 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), data_vt:$data)), 977 (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset) 978>; 979 980class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 981 ValueType vt> : GCNPat < 982 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i16:$offset), vt:$data), 983 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 984>; 985 986class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 987 (node vt:$data, (FlatOffset i64:$vaddr, i16:$offset)), 988 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 989>; 990 991class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 992 (node vt:$data, (GlobalOffset i64:$vaddr, i16:$offset)), 993 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 994>; 995 996class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 997 // atomic store follows atomic binop convention so the address comes 998 // first. 999 (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data), 1000 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1001>; 1002 1003class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, 1004 ValueType vt, ValueType data_vt = vt> : GCNPat < 1005 // atomic store follows atomic binop convention so the address comes 1006 // first. 1007 (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data), 1008 (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1009>; 1010 1011class FlatAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1012 (node (FlatOffset i64:$vaddr, i16:$offset), vt:$data), 1013 (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1014>; 1015 1016multiclass FlatAtomicPat <string inst, string node, ValueType vt, 1017 ValueType data_vt = vt> { 1018 defvar rtnNode = !cast<PatFrags>(node#"_"#vt.Size); 1019 defvar noRtnNode = !cast<PatFrags>(node#"_noret_"#vt.Size); 1020 1021 def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), 1022 (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1023 1024 let AddedComplexity = 1 in 1025 def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i16:$offset), data_vt:$data)), 1026 (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1027} 1028 1029multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt, 1030 ValueType data_vt = vt, int complexity = 0, 1031 bit isIntr = 0> { 1032 defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size)); 1033 defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size)); 1034 1035 let AddedComplexity = complexity in 1036 def : GCNPat <(vt (rtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), 1037 (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1038 1039 let AddedComplexity = !add(complexity, 1) in 1040 def : GCNPat <(vt (noRtnNode (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), 1041 (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1042} 1043 1044multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt, 1045 ValueType data_vt = vt> { 1046 defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>; 1047} 1048 1049class FlatSignedAtomicPatNoRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1050 (node (GlobalOffset i64:$vaddr, i16:$offset), vt:$data), 1051 (inst VReg_64:$vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1052>; 1053 1054class FlatSignedAtomicPatRtn <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt, 1055 ValueType data_vt = vt> : GCNPat < 1056 (vt (node (GlobalOffset i64:$vaddr, i16:$offset), data_vt:$data)), 1057 (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1058>; 1059 1060class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1061 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset))), 1062 (inst $vaddr, $offset) 1063>; 1064 1065class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1066 (node (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset), vt:$in), 1067 (inst $vaddr, $offset, 0, $in) 1068>; 1069 1070class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1071 (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i16:$offset)), 1072 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset) 1073>; 1074 1075class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1076 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset))), 1077 (inst $saddr, $offset) 1078>; 1079 1080class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1081 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), 1082 (inst $saddr, $offset, 0, $in) 1083>; 1084 1085class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1086 ValueType vt> : GCNPat < 1087 (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i16:$offset)), 1088 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1089>; 1090 1091class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1092 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset))), 1093 (inst $vaddr, $saddr, $offset, 0) 1094>; 1095 1096class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1097 ValueType vt> : GCNPat < 1098 (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset)), 1099 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset) 1100>; 1101 1102class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1103 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i16:$offset), vt:$in)), 1104 (inst $vaddr, $saddr, $offset, 0, $in) 1105>; 1106 1107let OtherPredicates = [HasFlatAddressSpace] in { 1108 1109def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>; 1110def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>; 1111def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>; 1112def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>; 1113def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 1114def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 1115def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 1116def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 1117def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 1118def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 1119def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 1120def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 1121def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 1122def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 1123def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 1124 1125def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 1126def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 1127 1128def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 1129def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 1130 1131foreach vt = Reg32Types.types in { 1132def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 1133def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 1134} 1135 1136foreach vt = VReg_64.RegTypes in { 1137def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>; 1138def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 1139} 1140 1141def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; 1142 1143foreach vt = VReg_128.RegTypes in { 1144def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 1145def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>; 1146} 1147 1148def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>; 1149def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>; 1150def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>; 1151def : FlatStoreAtomicPat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>; 1152def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>; 1153def : FlatStoreAtomicPat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>; 1154 1155foreach as = [ "flat", "global" ] in { 1156defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; 1157defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; 1158defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_inc_"#as, i32>; 1159defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_dec_"#as, i32>; 1160defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; 1161defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; 1162defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; 1163defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>; 1164defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>; 1165defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>; 1166defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>; 1167defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>; 1168defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; 1169 1170defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; 1171defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; 1172defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_inc_"#as, i64>; 1173defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_dec_"#as, i64>; 1174defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; 1175defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; 1176defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; 1177defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>; 1178defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>; 1179defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>; 1180defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>; 1181defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>; 1182defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>; 1183} // end foreach as 1184 1185def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 1186def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 1187 1188let OtherPredicates = [HasD16LoadStore] in { 1189def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 1190def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 1191} 1192 1193let OtherPredicates = [D16PreservesUnusedBits] in { 1194def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 1195def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 1196def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 1197def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 1198def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 1199def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 1200 1201def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 1202def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 1203def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 1204def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 1205def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 1206def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 1207} 1208 1209} // End OtherPredicates = [HasFlatAddressSpace] 1210 1211 1212multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1213 def : FlatLoadSignedPat <inst, node, vt> { 1214 let AddedComplexity = 10; 1215 } 1216 1217 def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1218 let AddedComplexity = 11; 1219 } 1220} 1221 1222multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1223 def : FlatSignedLoadPat_D16 <inst, node, vt> { 1224 let AddedComplexity = 10; 1225 } 1226 1227 def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1228 let AddedComplexity = 11; 1229 } 1230} 1231 1232multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1233 ValueType vt> { 1234 def : FlatStoreSignedPat <inst, node, vt> { 1235 let AddedComplexity = 10; 1236 } 1237 1238 def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1239 let AddedComplexity = 11; 1240 } 1241} 1242 1243// Deal with swapped operands for atomic_store vs. regular store 1244multiclass GlobalFLATAtomicStorePats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1245 def : FlatStoreSignedAtomicPat <inst, node, vt> { 1246 let AddedComplexity = 10; 1247 } 1248 1249 def : GlobalAtomicStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1250 let AddedComplexity = 11; 1251 } 1252} 1253 1254multiclass GlobalFLATAtomicPatsRtn<string nortn_inst_name, SDPatternOperator node, 1255 ValueType vt, ValueType data_vt = vt> { 1256 def : FlatSignedAtomicPatRtn <!cast<FLAT_Pseudo>(nortn_inst_name#"_RTN"), node, vt, data_vt> { 1257 let AddedComplexity = 10; 1258 } 1259 1260 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(nortn_inst_name#"_SADDR_RTN"), node, vt, data_vt> { 1261 let AddedComplexity = 11; 1262 } 1263} 1264 1265multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt, 1266 ValueType data_vt = vt, bit isIntr = 0> { 1267 defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size)); 1268 defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size)); 1269 1270 defm : FlatSignedAtomicPat <inst, node, vt, data_vt, /* complexity */ 10, isIntr>; 1271 1272 let AddedComplexity = 13 in 1273 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), noRtnNode, vt, data_vt>; 1274 1275 let AddedComplexity = 12 in 1276 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>; 1277} 1278 1279multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt, 1280 ValueType data_vt = vt> { 1281 defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>; 1282} 1283 1284multiclass GlobalFLATNoRtnAtomicPats<FLAT_Pseudo inst, SDPatternOperator node, 1285 ValueType vt> { 1286 def : FlatSignedAtomicPatNoRtn <inst, node, vt> { 1287 let AddedComplexity = 10; 1288 } 1289 1290 def : GlobalAtomicNoRtnSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1291 let AddedComplexity = 11; 1292 } 1293} 1294 1295multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1296 def : ScratchLoadSignedPat <inst, node, vt> { 1297 let AddedComplexity = 25; 1298 } 1299 1300 def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1301 let AddedComplexity = 26; 1302 } 1303 1304 def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1305 let SubtargetPredicate = HasFlatScratchSVSMode; 1306 let AddedComplexity = 27; 1307 } 1308} 1309 1310multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1311 ValueType vt> { 1312 def : ScratchStoreSignedPat <inst, node, vt> { 1313 let AddedComplexity = 25; 1314 } 1315 1316 def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1317 let AddedComplexity = 26; 1318 } 1319 1320 def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1321 let SubtargetPredicate = HasFlatScratchSVSMode; 1322 let AddedComplexity = 27; 1323 } 1324} 1325 1326multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1327 def : ScratchLoadSignedPat_D16 <inst, node, vt> { 1328 let AddedComplexity = 25; 1329 } 1330 1331 def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1332 let AddedComplexity = 26; 1333 } 1334 1335 def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1336 let SubtargetPredicate = HasFlatScratchSVSMode; 1337 let AddedComplexity = 27; 1338 } 1339} 1340 1341let OtherPredicates = [HasFlatGlobalInsts] in { 1342 1343defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>; 1344defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>; 1345defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>; 1346defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>; 1347defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 1348defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 1349defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 1350defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 1351defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 1352defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 1353defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 1354defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 1355defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 1356defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; 1357 1358foreach vt = Reg32Types.types in { 1359defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; 1360defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>; 1361} 1362 1363foreach vt = VReg_64.RegTypes in { 1364defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>; 1365defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>; 1366} 1367 1368defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 1369 1370foreach vt = VReg_128.RegTypes in { 1371defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>; 1372defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>; 1373} 1374 1375// There is no distinction for atomic load lowering during selection; 1376// the memory legalizer will set the cache bits and insert the 1377// appropriate waits. 1378defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 1379defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 1380 1381defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; 1382defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; 1383defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; 1384defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; 1385defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; 1386 1387let OtherPredicates = [HasD16LoadStore] in { 1388defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 1389defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 1390} 1391 1392let OtherPredicates = [D16PreservesUnusedBits] in { 1393defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 1394defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 1395defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 1396defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 1397defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 1398defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 1399 1400defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 1401defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 1402defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 1403defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 1404defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 1405defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 1406} 1407 1408defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>; 1409defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>; 1410defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>; 1411defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>; 1412defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>; 1413defm : GlobalFLATAtomicStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>; 1414 1415defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; 1416defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; 1417defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_inc_global", i32>; 1418defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_dec_global", i32>; 1419defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>; 1420defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>; 1421defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>; 1422defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>; 1423defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>; 1424defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>; 1425defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>; 1426defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>; 1427defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>; 1428defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", int_amdgcn_global_atomic_csub, i32>; 1429 1430defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; 1431defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; 1432defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_inc_global", i64>; 1433defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_dec_global", i64>; 1434defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>; 1435defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>; 1436defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>; 1437defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>; 1438defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>; 1439defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>; 1440defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>; 1441defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>; 1442defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; 1443 1444let OtherPredicates = [isGFX10Plus] in { 1445defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; 1446defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; 1447defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>; 1448defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>; 1449defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>; 1450defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>; 1451defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>; 1452defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>; 1453} 1454 1455let OtherPredicates = [HasAtomicFaddNoRtnInsts] in 1456defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_ADD_F32, atomic_load_fadd_global_noret_32, f32>; 1457let OtherPredicates = [HasAtomicPkFaddNoRtnInsts] in 1458defm : GlobalFLATNoRtnAtomicPats <GLOBAL_ATOMIC_PK_ADD_F16, atomic_load_fadd_v2f16_global_noret_32, v2f16>; 1459 1460let OtherPredicates = [isGFX90APlus] in { 1461defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1462defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_v2f16_global", v2f16>; 1463defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>; 1464defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>; 1465defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>; 1466defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", f32>; 1467defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", f64>; 1468defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", v2f16>; 1469defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>; 1470defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>; 1471defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>; 1472defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>; 1473defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>; 1474defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>; 1475defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>; 1476defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>; 1477} 1478 1479let OtherPredicates = [isGFX940Plus] in { 1480defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>; 1481defm : FlatSignedAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_v2f16_flat", v2f16>; 1482defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>; 1483defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", v2f16>; 1484defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>; 1485defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>; 1486} 1487 1488} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 1489 1490let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in { 1491 1492defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>; 1493defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>; 1494defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>; 1495defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; 1496defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; 1497defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; 1498defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; 1499defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; 1500defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; 1501defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; 1502 1503foreach vt = Reg32Types.types in { 1504defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; 1505defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; 1506} 1507 1508foreach vt = VReg_64.RegTypes in { 1509defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>; 1510defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>; 1511} 1512 1513defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>; 1514 1515foreach vt = VReg_128.RegTypes in { 1516defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>; 1517defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>; 1518} 1519 1520defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>; 1521defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>; 1522defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; 1523defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; 1524defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; 1525 1526let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in { 1527defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; 1528defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; 1529} 1530 1531let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { 1532defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>; 1533defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>; 1534defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>; 1535defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>; 1536defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>; 1537defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>; 1538 1539defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>; 1540defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>; 1541defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>; 1542defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>; 1543defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>; 1544defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>; 1545} 1546 1547} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch] 1548 1549//===----------------------------------------------------------------------===// 1550// Target 1551//===----------------------------------------------------------------------===// 1552 1553//===----------------------------------------------------------------------===// 1554// CI 1555//===----------------------------------------------------------------------===// 1556 1557class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 1558 FLAT_Real <op, ps>, 1559 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 1560 let AssemblerPredicate = isGFX7Only; 1561 let DecoderNamespace="GFX7"; 1562} 1563 1564def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1565def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1566def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1567def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1568def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1569def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1570def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1571def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1572 1573def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1574def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1575def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1576def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1577def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1578def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1579 1580multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 1581 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1582 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1583} 1584 1585defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 1586defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 1587defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 1588defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 1589defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 1590defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 1591defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 1592defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 1593defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 1594defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 1595defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 1596defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 1597defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 1598defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 1599defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 1600defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 1601defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 1602defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 1603defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 1604defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 1605defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 1606defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 1607defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 1608defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 1609defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1610defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1611 1612// CI Only flat instructions 1613defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1614defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1615defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1616defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1617defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1618defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1619 1620 1621//===----------------------------------------------------------------------===// 1622// VI 1623//===----------------------------------------------------------------------===// 1624 1625class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : 1626 FLAT_Real <op, ps>, 1627 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1628 let AssemblerPredicate = isGFX8GFX9; 1629 let DecoderNamespace = "GFX8"; 1630 1631 let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1632 let AsmString = ps.Mnemonic # 1633 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); 1634} 1635 1636multiclass FLAT_Real_AllAddr_vi<bits<7> op, 1637 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1638 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; 1639 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; 1640} 1641 1642class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> : 1643 FLAT_Real <op, ps>, 1644 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> { 1645 let AssemblerPredicate = isGFX940Plus; 1646 let DecoderNamespace = "GFX9"; 1647 let Inst{13} = ps.sve; 1648 let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1649} 1650 1651multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> { 1652 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> { 1653 let AssemblerPredicate = isGFX8GFX9NotGFX940; 1654 let OtherPredicates = [isGFX8GFX9NotGFX940]; 1655 } 1656 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> { 1657 let DecoderNamespace = "GFX9"; 1658 } 1659 let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in { 1660 def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1661 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1662 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1663 } 1664} 1665 1666multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op, 1667 string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr), 1668 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1669 1670 let OtherPredicates = [isGFX8GFX9NotGFX940] in { 1671 def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> { 1672 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds"; 1673 } 1674 def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> { 1675 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds"; 1676 } 1677 } 1678 1679 let SubtargetPredicate = isGFX940Plus in { 1680 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1681 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1682 } 1683} 1684 1685multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> { 1686 defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>; 1687 let SubtargetPredicate = isGFX940Plus in { 1688 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1689 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1690 } 1691} 1692 1693def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1694def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1695def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1696def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1697def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1698def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1699def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1700def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1701 1702def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1703def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1704def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1705def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1706def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1707def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1708def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1709def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1710 1711def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1712def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1713def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1714def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1715def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1716def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1717 1718multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps, 1719 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1720 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; 1721 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; 1722} 1723 1724multiclass FLAT_Global_Real_Atomics_vi<bits<7> op, 1725 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : 1726 FLAT_Real_AllAddr_vi<op, has_sccb> { 1727 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; 1728 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; 1729} 1730 1731 1732defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1733defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1734defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1735defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1736defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1737defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1738defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1739defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1740defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1741defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1742defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1743defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1744defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1745defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1746defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1747defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1748defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1749defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1750defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1751defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1752defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1753defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1754defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1755defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1756defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1757defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1758 1759defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1760defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1761defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1762defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1763defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1764defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1765defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1766defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1767 1768defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1769defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1770defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1771defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1772defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1773defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1774 1775defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1776defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1777defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1778defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1779defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1780defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1781defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1782defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1783 1784defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS <0x026, 0x10>; 1785defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS <0x027, 0x11>; 1786defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; 1787defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; 1788defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; 1789 1790defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1791defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1792defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1793defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1794defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1795defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1796defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1797defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1798defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1799defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1800defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1801defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1802defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1803defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1804defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1805defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1806defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1807defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1808defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1809defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1810defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1811defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1812defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1813defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1814defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1815defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1816 1817defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>; 1818defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>; 1819defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>; 1820defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>; 1821defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>; 1822 1823defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; 1824defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; 1825defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>; 1826defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x13>; 1827defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_SVE_vi <0x14>; 1828defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x15>; 1829defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x16>; 1830defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x17>; 1831defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_SVE_vi <0x18>; 1832defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x19>; 1833defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x20>; 1834defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x21>; 1835defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x22>; 1836defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x23>; 1837defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_SVE_vi <0x24>; 1838defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x25>; 1839defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_SVE_vi <0x1a>; 1840defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>; 1841defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_SVE_vi <0x1c>; 1842defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>; 1843defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; 1844defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; 1845 1846let SubtargetPredicate = isGFX8GFX9NotGFX940 in { 1847 // These instructions are encoded differently on gfx90* and gfx940. 1848 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; 1849 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; 1850} 1851 1852let SubtargetPredicate = isGFX90AOnly in { 1853 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>; 1854 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>; 1855 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>; 1856 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>; 1857 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>; 1858 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; 1859} // End SubtargetPredicate = isGFX90AOnly 1860 1861multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> { 1862 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1863 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1864} 1865 1866multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> { 1867 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1868 def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1869} 1870 1871multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> : 1872 FLAT_Real_AllAddr_gfx940<op> { 1873 def _RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1874 def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1875} 1876 1877let SubtargetPredicate = isGFX940Plus in { 1878 // These instructions are encoded differently on gfx90* and gfx940. 1879 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>; 1880 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>; 1881 1882 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>; 1883 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>; 1884 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>; 1885 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; 1886 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; 1887 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; 1888 defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>; 1889 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>; 1890 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>; 1891 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; 1892} // End SubtargetPredicate = isGFX940Plus 1893 1894//===----------------------------------------------------------------------===// 1895// GFX10. 1896//===----------------------------------------------------------------------===// 1897 1898class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 1899 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 1900 let AssemblerPredicate = isGFX10Only; 1901 let DecoderNamespace = "GFX10"; 1902 1903 let Inst{11-0} = offset{11-0}; 1904 let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 1905 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 1906 let Inst{55} = 0; 1907} 1908 1909 1910multiclass FLAT_Real_Base_gfx10<bits<7> op> { 1911 def _gfx10 : 1912 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 1913} 1914 1915multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 1916 def _RTN_gfx10 : 1917 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 1918} 1919 1920multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 1921 def _SADDR_gfx10 : 1922 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1923} 1924 1925multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 1926 def _SADDR_RTN_gfx10 : 1927 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 1928} 1929 1930multiclass FLAT_Real_ST_gfx10<bits<7> op> { 1931 def _ST_gfx10 : 1932 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> { 1933 let Inst{54-48} = !cast<int>(EXEC_HI.HWEncoding); 1934 let OtherPredicates = [HasFlatScratchSTMode]; 1935 } 1936} 1937 1938multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 1939 FLAT_Real_Base_gfx10<op>, 1940 FLAT_Real_SADDR_gfx10<op>; 1941 1942multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 1943 FLAT_Real_Base_gfx10<op>, 1944 FLAT_Real_RTN_gfx10<op>; 1945 1946multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 1947 FLAT_Real_AllAddr_gfx10<op>, 1948 FLAT_Real_RTN_gfx10<op>, 1949 FLAT_Real_SADDR_RTN_gfx10<op>; 1950 1951multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> : 1952 FLAT_Real_RTN_gfx10<op>, 1953 FLAT_Real_SADDR_RTN_gfx10<op>; 1954 1955multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> : 1956 FLAT_Real_Base_gfx10<op>, 1957 FLAT_Real_SADDR_gfx10<op>, 1958 FLAT_Real_ST_gfx10<op>; 1959 1960multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op, 1961 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> { 1962 let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in 1963 defm "" : FLAT_Real_Base_gfx10<op>; 1964 1965 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in 1966 defm "" : FLAT_Real_SADDR_gfx10<op>; 1967} 1968 1969multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op, 1970 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> { 1971 defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>; 1972 1973 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in 1974 defm "" : FLAT_Real_ST_gfx10<op>; 1975} 1976 1977// ENC_FLAT. 1978defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 1979defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 1980defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 1981defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 1982defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 1983defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 1984defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 1985defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 1986defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 1987defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 1988defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 1989defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 1990defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 1991defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 1992defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 1993defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 1994defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 1995defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 1996defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 1997defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 1998defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 1999defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 2000defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 2001defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 2002defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 2003defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 2004defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 2005defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 2006defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 2007defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 2008defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 2009defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 2010defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 2011defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 2012defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 2013defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 2014defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 2015defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 2016defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 2017defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 2018defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 2019defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 2020defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 2021defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 2022defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 2023defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 2024defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 2025defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 2026defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 2027defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 2028defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 2029defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 2030defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 2031defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 2032 2033 2034// ENC_FLAT_GLBL. 2035defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 2036defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 2037defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 2038defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 2039defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 2040defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 2041defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 2042defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 2043defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 2044defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 2045defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 2046defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 2047defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 2048defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 2049defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 2050defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 2051defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 2052defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 2053defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 2054defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 2055defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 2056defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 2057defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 2058defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 2059defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 2060defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 2061defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_RTN_gfx10<0x034>; 2062defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 2063defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 2064defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 2065defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 2066defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 2067defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 2068defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 2069defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 2070defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 2071defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 2072defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 2073defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 2074defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 2075defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 2076defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 2077defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 2078defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 2079defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 2080defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 2081defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 2082defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 2083defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 2084defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 2085defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 2086defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 2087defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 2088defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 2089defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 2090defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; 2091defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; 2092 2093defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x008>; 2094defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x009>; 2095defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>; 2096defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>; 2097defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>; 2098 2099// ENC_FLAT_SCRATCH. 2100defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; 2101defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; 2102defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>; 2103defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>; 2104defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>; 2105defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>; 2106defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>; 2107defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>; 2108defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>; 2109defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>; 2110defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>; 2111defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>; 2112defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>; 2113defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>; 2114defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>; 2115defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>; 2116defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>; 2117defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>; 2118defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; 2119defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; 2120defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; 2121defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; 2122 2123defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>; 2124defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>; 2125defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>; 2126defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>; 2127defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>; 2128 2129//===----------------------------------------------------------------------===// 2130// GFX11 2131//===----------------------------------------------------------------------===// 2132 2133class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 2134 FLAT_Real <op, ps, opName>, 2135 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> { 2136 let AssemblerPredicate = isGFX11Plus; 2137 let DecoderNamespace = "GFX11"; 2138 2139 let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2140 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 2141 let Inst{15} = cpol{CPolBit.SLC}; 2142 let Inst{17-16} = seg; 2143 let Inst{55} = ps.sve; 2144} 2145 2146multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> { 2147 def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> { 2148 let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2149 } 2150 if renamed then 2151 def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Plus]>; 2152} 2153 2154multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> { 2155 def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> { 2156 let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2157 } 2158} 2159 2160multiclass FLAT_Real_SADDR_gfx11<bits<7> op, string ps, string opName> { 2161 def _SADDR_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>; 2162} 2163 2164multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> { 2165 def _SADDR_RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>; 2166} 2167 2168multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> { 2169 def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> { 2170 let Inst{54-48} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2171 let OtherPredicates = [HasFlatScratchSTMode]; 2172 } 2173} 2174 2175multiclass FLAT_Real_SVS_gfx11<bits<7> op, string ps, string opName> { 2176 def _SVS_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> { 2177 let OtherPredicates = [HasFlatScratchSVSMode]; 2178 } 2179} 2180 2181multiclass FLAT_Real_AllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2182 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2183 FLAT_Real_SADDR_gfx11<op, ps, opName>; 2184 2185multiclass FLAT_Real_Atomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2186 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2187 FLAT_Real_RTN_gfx11<op, ps, opName>; 2188 2189multiclass FLAT_Real_GlblAtomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2190 FLAT_Real_AllAddr_gfx11<op, ps, opName, renamed>, 2191 FLAT_Real_RTN_gfx11<op, ps, opName>, 2192 FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>; 2193 2194multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName> : 2195 FLAT_Real_RTN_gfx11<op, ps, opName>, 2196 FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>; 2197 2198multiclass FLAT_Real_ScratchAllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2199 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2200 FLAT_Real_SADDR_gfx11<op, ps, opName>, 2201 FLAT_Real_ST_gfx11<op, ps, opName>, 2202 FLAT_Real_SVS_gfx11<op, ps, opName>; 2203 2204// ENC_FLAT. 2205defm FLAT_LOAD_U8 : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>; 2206defm FLAT_LOAD_I8 : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>; 2207defm FLAT_LOAD_U16 : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>; 2208defm FLAT_LOAD_I16 : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>; 2209defm FLAT_LOAD_B32 : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>; 2210defm FLAT_LOAD_B64 : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>; 2211defm FLAT_LOAD_B96 : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>; 2212defm FLAT_LOAD_B128 : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>; 2213defm FLAT_STORE_B8 : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>; 2214defm FLAT_STORE_B16 : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>; 2215defm FLAT_STORE_B32 : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>; 2216defm FLAT_STORE_B64 : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>; 2217defm FLAT_STORE_B96 : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>; 2218defm FLAT_STORE_B128 : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>; 2219defm FLAT_LOAD_D16_U8 : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">; 2220defm FLAT_LOAD_D16_I8 : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">; 2221defm FLAT_LOAD_D16_B16 : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">; 2222defm FLAT_LOAD_D16_HI_U8 : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">; 2223defm FLAT_LOAD_D16_HI_I8 : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">; 2224defm FLAT_LOAD_D16_HI_B16 : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">; 2225defm FLAT_STORE_D16_HI_B8 : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">; 2226defm FLAT_STORE_D16_HI_B16 : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">; 2227defm FLAT_ATOMIC_SWAP_B32 : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>; 2228defm FLAT_ATOMIC_CMPSWAP_B32 : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>; 2229defm FLAT_ATOMIC_ADD_U32 : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>; 2230defm FLAT_ATOMIC_SUB_U32 : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>; 2231defm FLAT_ATOMIC_MIN_I32 : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>; 2232defm FLAT_ATOMIC_MIN_U32 : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>; 2233defm FLAT_ATOMIC_MAX_I32 : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>; 2234defm FLAT_ATOMIC_MAX_U32 : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>; 2235defm FLAT_ATOMIC_AND_B32 : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>; 2236defm FLAT_ATOMIC_OR_B32 : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>; 2237defm FLAT_ATOMIC_XOR_B32 : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>; 2238defm FLAT_ATOMIC_INC_U32 : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>; 2239defm FLAT_ATOMIC_DEC_U32 : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>; 2240defm FLAT_ATOMIC_SWAP_B64 : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>; 2241defm FLAT_ATOMIC_CMPSWAP_B64 : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>; 2242defm FLAT_ATOMIC_ADD_U64 : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>; 2243defm FLAT_ATOMIC_SUB_U64 : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>; 2244defm FLAT_ATOMIC_MIN_I64 : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>; 2245defm FLAT_ATOMIC_MIN_U64 : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>; 2246defm FLAT_ATOMIC_MAX_I64 : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>; 2247defm FLAT_ATOMIC_MAX_U64 : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>; 2248defm FLAT_ATOMIC_AND_B64 : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>; 2249defm FLAT_ATOMIC_OR_B64 : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>; 2250defm FLAT_ATOMIC_XOR_B64 : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>; 2251defm FLAT_ATOMIC_INC_U64 : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>; 2252defm FLAT_ATOMIC_DEC_U64 : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>; 2253defm FLAT_ATOMIC_CMPSWAP_F32 : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">; 2254defm FLAT_ATOMIC_MIN_F32 : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">; 2255defm FLAT_ATOMIC_MAX_F32 : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">; 2256defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">; 2257 2258// ENC_FLAT_GLBL. 2259defm GLOBAL_LOAD_U8 : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>; 2260defm GLOBAL_LOAD_I8 : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>; 2261defm GLOBAL_LOAD_U16 : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>; 2262defm GLOBAL_LOAD_I16 : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>; 2263defm GLOBAL_LOAD_B32 : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>; 2264defm GLOBAL_LOAD_B64 : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>; 2265defm GLOBAL_LOAD_B96 : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>; 2266defm GLOBAL_LOAD_B128 : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>; 2267defm GLOBAL_STORE_B8 : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>; 2268defm GLOBAL_STORE_B16 : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>; 2269defm GLOBAL_STORE_B32 : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>; 2270defm GLOBAL_STORE_B64 : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>; 2271defm GLOBAL_STORE_B96 : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>; 2272defm GLOBAL_STORE_B128 : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>; 2273defm GLOBAL_LOAD_D16_U8 : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">; 2274defm GLOBAL_LOAD_D16_I8 : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">; 2275defm GLOBAL_LOAD_D16_B16 : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">; 2276defm GLOBAL_LOAD_D16_HI_U8 : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">; 2277defm GLOBAL_LOAD_D16_HI_I8 : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">; 2278defm GLOBAL_LOAD_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">; 2279defm GLOBAL_STORE_D16_HI_B8 : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">; 2280defm GLOBAL_STORE_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">; 2281defm GLOBAL_LOAD_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">; 2282defm GLOBAL_STORE_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">; 2283defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>; 2284defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; 2285defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; 2286defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; 2287defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_RTN_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32">; 2288defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; 2289defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; 2290defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; 2291defm GLOBAL_ATOMIC_MAX_U32 : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>; 2292defm GLOBAL_ATOMIC_AND_B32 : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>; 2293defm GLOBAL_ATOMIC_OR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>; 2294defm GLOBAL_ATOMIC_XOR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>; 2295defm GLOBAL_ATOMIC_INC_U32 : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>; 2296defm GLOBAL_ATOMIC_DEC_U32 : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>; 2297defm GLOBAL_ATOMIC_SWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>; 2298defm GLOBAL_ATOMIC_CMPSWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>; 2299defm GLOBAL_ATOMIC_ADD_U64 : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>; 2300defm GLOBAL_ATOMIC_SUB_U64 : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>; 2301defm GLOBAL_ATOMIC_MIN_I64 : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>; 2302defm GLOBAL_ATOMIC_MIN_U64 : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>; 2303defm GLOBAL_ATOMIC_MAX_I64 : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>; 2304defm GLOBAL_ATOMIC_MAX_U64 : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>; 2305defm GLOBAL_ATOMIC_AND_B64 : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>; 2306defm GLOBAL_ATOMIC_OR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>; 2307defm GLOBAL_ATOMIC_XOR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>; 2308defm GLOBAL_ATOMIC_INC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>; 2309defm GLOBAL_ATOMIC_DEC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>; 2310defm GLOBAL_ATOMIC_CMPSWAP_F32 : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">; 2311defm GLOBAL_ATOMIC_MIN_F32 : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">; 2312defm GLOBAL_ATOMIC_MAX_F32 : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">; 2313defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; 2314 2315// ENC_FLAT_SCRATCH. 2316defm SCRATCH_LOAD_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; 2317defm SCRATCH_LOAD_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; 2318defm SCRATCH_LOAD_U16 : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>; 2319defm SCRATCH_LOAD_I16 : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>; 2320defm SCRATCH_LOAD_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>; 2321defm SCRATCH_LOAD_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>; 2322defm SCRATCH_LOAD_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>; 2323defm SCRATCH_LOAD_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>; 2324defm SCRATCH_STORE_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>; 2325defm SCRATCH_STORE_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>; 2326defm SCRATCH_STORE_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>; 2327defm SCRATCH_STORE_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>; 2328defm SCRATCH_STORE_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>; 2329defm SCRATCH_STORE_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>; 2330defm SCRATCH_LOAD_D16_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">; 2331defm SCRATCH_LOAD_D16_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">; 2332defm SCRATCH_LOAD_D16_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">; 2333defm SCRATCH_LOAD_D16_HI_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">; 2334defm SCRATCH_LOAD_D16_HI_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">; 2335defm SCRATCH_LOAD_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">; 2336defm SCRATCH_STORE_D16_HI_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">; 2337defm SCRATCH_STORE_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">; 2338