1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>; 10def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>; 11def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>; 12 13def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>; 14def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>; 15def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>; 16 17//===----------------------------------------------------------------------===// 18// FLAT classes 19//===----------------------------------------------------------------------===// 20 21class FLAT_Pseudo<string opName, dag outs, dag ins, 22 string asmOps, list<dag> pattern=[]> : 23 InstSI<outs, ins, "", pattern>, 24 SIMCInstr<NAME, SIEncodingFamily.NONE> { 25 26 let isPseudo = 1; 27 let isCodeGenOnly = 1; 28 29 let FLAT = 1; 30 31 let UseNamedOperandTable = 1; 32 let hasSideEffects = 0; 33 let SchedRW = [WriteVMEM]; 34 35 string Mnemonic = opName; 36 string AsmOperands = asmOps; 37 38 bits<1> is_flat_global = 0; 39 bits<1> is_flat_scratch = 0; 40 41 bits<1> has_vdst = 1; 42 43 // We need to distinguish having saddr and enabling saddr because 44 // saddr is only valid for scratch and global instructions. Pre-gfx9 45 // these bits were reserved, so we also don't necessarily want to 46 // set these bits to the disabled value for the original flat 47 // segment instructions. 48 bits<1> has_saddr = 0; 49 bits<1> enabled_saddr = 0; 50 bits<7> saddr_value = 0; 51 bits<1> has_vaddr = 1; 52 53 bits<1> has_data = 1; 54 bits<1> has_glc = 1; 55 bits<1> glcValue = 0; 56 bits<1> has_dlc = 1; 57 bits<1> dlcValue = 0; 58 bits<1> has_sccb = 1; 59 bits<1> sccbValue = 0; 60 bits<1> has_sve = 0; // Scratch VGPR Enable 61 bits<1> lds = 0; 62 bits<1> sve = 0; 63 bits<1> has_offset = 1; 64 65 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 66 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 67 68 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 69 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 70 71 // Internally, FLAT instruction are executed as both an LDS and a 72 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 73 // and are not considered done until both have been decremented. 74 let VM_CNT = 1; 75 let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch)); 76 77 let FlatGlobal = is_flat_global; 78 79 let FlatScratch = is_flat_scratch; 80} 81 82class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 83 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 84 Enc64 { 85 86 let isPseudo = 0; 87 let isCodeGenOnly = 0; 88 89 let FLAT = 1; 90 91 // copy relevant pseudo op flags 92 let SubtargetPredicate = ps.SubtargetPredicate; 93 let AsmMatchConverter = ps.AsmMatchConverter; 94 let OtherPredicates = ps.OtherPredicates; 95 let TSFlags = ps.TSFlags; 96 let UseNamedOperandTable = ps.UseNamedOperandTable; 97 let SchedRW = ps.SchedRW; 98 let mayLoad = ps.mayLoad; 99 let mayStore = ps.mayStore; 100 let IsAtomicRet = ps.IsAtomicRet; 101 let IsAtomicNoRet = ps.IsAtomicNoRet; 102 let VM_CNT = ps.VM_CNT; 103 let LGKM_CNT = ps.LGKM_CNT; 104 let VALU = ps.VALU; 105 let Uses = ps.Uses; 106 let Defs = ps.Defs; 107 108 // encoding fields 109 bits<8> vaddr; 110 bits<10> vdata; 111 bits<7> saddr; 112 bits<10> vdst; 113 114 bits<5> cpol; 115 116 // Only valid on gfx9 117 bits<1> lds = ps.lds; // LDS DMA for global and scratch 118 119 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 120 bits<2> seg = {ps.is_flat_global, ps.is_flat_scratch}; 121 122 // Signed offset. Highest bit ignored for flat and treated as 12-bit 123 // unsigned for flat accesses. 124 bits<13> offset; 125 // GFX90A+ only: instruction uses AccVGPR for data 126 bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0)); 127 128 // We don't use tfe right now, and it was removed in gfx9. 129 bits<1> tfe = 0; 130 131 // Only valid on GFX9+ 132 let Inst{12-0} = offset; 133 let Inst{13} = !if(ps.has_sve, ps.sve, lds); 134 let Inst{15-14} = seg; 135 136 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 137 let Inst{17} = cpol{CPolBit.SLC}; 138 let Inst{24-18} = op; 139 let Inst{31-26} = 0x37; // Encoding. 140 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 141 let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?); 142 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 143 144 // 54-48 is reserved. 145 let Inst{55} = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A. 146 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?); 147} 148 149class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 150 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 151 Enc96 { 152 153 let FLAT = 1; 154 155 // copy relevant pseudo op flags 156 let SubtargetPredicate = ps.SubtargetPredicate; 157 let WaveSizePredicate = ps.WaveSizePredicate; 158 let AsmMatchConverter = ps.AsmMatchConverter; 159 let OtherPredicates = ps.OtherPredicates; 160 let TSFlags = ps.TSFlags; 161 let UseNamedOperandTable = ps.UseNamedOperandTable; 162 let SchedRW = ps.SchedRW; 163 let mayLoad = ps.mayLoad; 164 let mayStore = ps.mayStore; 165 let IsAtomicRet = ps.IsAtomicRet; 166 let IsAtomicNoRet = ps.IsAtomicNoRet; 167 let VM_CNT = ps.VM_CNT; 168 let LGKM_CNT = ps.LGKM_CNT; 169 let VALU = ps.VALU; 170 let Uses = ps.Uses; 171 let Defs = ps.Defs; 172 173 bits<7> saddr; 174 bits<8> vdst; 175 bits<6> cpol; 176 bits<8> vdata; // vsrc 177 bits<8> vaddr; 178 bits<24> offset; 179 180 let Inst{6-0} = !if(ps.enabled_saddr, saddr, SGPR_NULL_gfx11plus.Index); 181 let Inst{21-14} = op; 182 let Inst{31-26} = 0x3b; 183 let Inst{39-32} = !if(ps.has_vdst, vdst, ?); 184 let Inst{49} = ps.sve; 185 let Inst{54-53} = cpol{2-1}; // th{2-1} 186 let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0} 187 let Inst{51-50} = cpol{4-3}; // scope 188 let Inst{62-55} = !if(ps.has_data, vdata{7-0}, ?); 189 let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?); 190 let Inst{95-72} = !if(ps.has_offset, offset, ?); 191} 192 193class GlobalSaddrTable <bit is_saddr, string Name = ""> { 194 bit IsSaddr = is_saddr; 195 string SaddrOp = Name; 196} 197 198// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 199// same encoding value as exec_hi, so it isn't possible to use that if 200// saddr is 32-bit (which isn't handled here yet). 201class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 202 bit HasTiedOutput = 0, 203 bit HasSaddr = 0, bit EnableSaddr = 0, 204 RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo< 205 opName, 206 (outs vdata_op:$vdst), 207 !con( 208 !con( 209 !if(EnableSaddr, 210 (ins SReg_64:$saddr, VGPR_32:$vaddr), 211 (ins VReg_64:$vaddr)), 212 (ins flat_offset:$offset)), 213 // FIXME: Operands with default values do not work with following non-optional operands. 214 !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), 215 (ins CPol_0:$cpol))), 216 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 217 let has_data = 0; 218 let mayLoad = 1; 219 let has_saddr = HasSaddr; 220 let enabled_saddr = EnableSaddr; 221 222 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 223 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 224} 225 226class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 227 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 228 opName, 229 (outs), 230 !con( 231 !if(EnableSaddr, 232 (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr), 233 (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)), 234 (ins flat_offset:$offset, CPol_0:$cpol)), 235 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 236 let mayLoad = 0; 237 let mayStore = 1; 238 let has_vdst = 0; 239 let has_saddr = HasSaddr; 240 let enabled_saddr = EnableSaddr; 241} 242 243multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 244 let is_flat_global = 1 in { 245 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 246 GlobalSaddrTable<0, opName>; 247 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 248 GlobalSaddrTable<1, opName>; 249 } 250} 251 252class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 253 bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 254 opName, 255 (outs regClass:$vdst), 256 !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), 257 (ins flat_offset:$offset, CPol_0:$cpol), 258 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 259 " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 260 let is_flat_global = 1; 261 let has_data = 0; 262 let mayLoad = 1; 263 let has_vaddr = 0; 264 let has_saddr = 1; 265 let enabled_saddr = EnableSaddr; 266 267 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 268 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 269} 270 271multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass, 272 bit HasTiedOutput = 0> { 273 def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>, 274 GlobalSaddrTable<0, opName>; 275 def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>, 276 GlobalSaddrTable<1, opName>; 277} 278 279multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 280 let is_flat_global = 1 in { 281 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 282 GlobalSaddrTable<0, opName>; 283 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 284 GlobalSaddrTable<1, opName>; 285 } 286} 287 288class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo< 289 opName, 290 (outs ), 291 !con( 292 !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), 293 (ins flat_offset:$offset, CPol_0:$cpol)), 294 " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { 295 let LGKM_CNT = 1; 296 let is_flat_global = 1; 297 let lds = 1; 298 let has_data = 0; 299 let has_vdst = 0; 300 let mayLoad = 1; 301 let mayStore = 1; 302 let has_saddr = 1; 303 let enabled_saddr = EnableSaddr; 304 let VALU = 1; 305 let Uses = [M0, EXEC]; 306 let SchedRW = [WriteVMEM, WriteLDS]; 307} 308 309multiclass FLAT_Global_Load_LDS_Pseudo<string opName> { 310 def "" : FLAT_Global_Load_LDS_Pseudo<opName>, 311 GlobalSaddrTable<0, opName>; 312 def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>, 313 GlobalSaddrTable<1, opName>; 314} 315 316class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 317 bit EnableSaddr = 0> : FLAT_Pseudo< 318 opName, 319 (outs), 320 !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), 321 (ins flat_offset:$offset, CPol:$cpol)), 322 " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 323 let is_flat_global = 1; 324 let mayLoad = 0; 325 let mayStore = 1; 326 let has_vdst = 0; 327 let has_vaddr = 0; 328 let has_saddr = 1; 329 let enabled_saddr = EnableSaddr; 330} 331 332multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> { 333 def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>, 334 GlobalSaddrTable<0, opName>; 335 def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>, 336 GlobalSaddrTable<1, opName>; 337} 338 339class FLAT_Global_Invalidate_Writeback<string opName, SDPatternOperator node = null_frag> : 340 FLAT_Pseudo<opName, (outs), (ins CPol:$cpol), "$cpol", [(node)]> { 341 342 let AsmMatchConverter = ""; 343 344 let hasSideEffects = 1; 345 let mayLoad = 0; 346 let mayStore = 0; 347 let is_flat_global = 1; 348 349 let has_offset = 0; 350 let has_saddr = 0; 351 let enabled_saddr = 0; 352 let saddr_value = 0; 353 let has_vdst = 0; 354 let has_data = 0; 355 let has_vaddr = 0; 356 let has_glc = 0; 357 let has_dlc = 0; 358 let glcValue = 0; 359 let dlcValue = 0; 360 let has_sccb = 0; 361 let sccbValue = 0; 362 let has_sve = 0; 363 let lds = 0; 364 let sve = 0; 365} 366 367class FlatScratchInst <string sv_op, string mode> { 368 string SVOp = sv_op; 369 string Mode = mode; 370} 371 372class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 373 bit HasTiedOutput = 0, 374 bit EnableSaddr = 0, 375 bit EnableSVE = 0, 376 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> 377 : FLAT_Pseudo< 378 opName, 379 (outs getLdStRegisterOperand<regClass>.ret:$vdst), 380 !con( 381 !if(EnableSVE, 382 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 383 !if(EnableSaddr, 384 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 385 !if(EnableVaddr, 386 (ins VGPR_32:$vaddr, flat_offset:$offset), 387 (ins flat_offset:$offset)))), 388 !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in), 389 (ins CPol_0:$cpol))), 390 " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 391 let is_flat_scratch = 1; 392 let has_data = 0; 393 let mayLoad = 1; 394 let has_saddr = 1; 395 let enabled_saddr = EnableSaddr; 396 let has_vaddr = EnableVaddr; 397 let has_sve = EnableSVE; 398 let sve = EnableVaddr; 399 400 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 401 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 402} 403 404class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0, 405 bit EnableSVE = 0, 406 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)), 407 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo< 408 opName, 409 (outs), 410 !if(EnableSVE, 411 (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 412 !if(EnableSaddr, 413 (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 414 !if(EnableVaddr, 415 (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), 416 (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))), 417 " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 418 let is_flat_scratch = 1; 419 let mayLoad = 0; 420 let mayStore = 1; 421 let has_vdst = 0; 422 let has_saddr = 1; 423 let enabled_saddr = EnableSaddr; 424 let has_vaddr = EnableVaddr; 425 let has_sve = EnableSVE; 426 let sve = EnableVaddr; 427} 428 429multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { 430 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>, 431 FlatScratchInst<opName, "SV">; 432 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>, 433 FlatScratchInst<opName, "SS">; 434 435 let SubtargetPredicate = HasFlatScratchSVSMode in 436 def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>, 437 FlatScratchInst<opName, "SVS">; 438 439 let SubtargetPredicate = HasFlatScratchSTMode in 440 def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>, 441 FlatScratchInst<opName, "ST">; 442} 443 444multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 445 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>, 446 FlatScratchInst<opName, "SV">; 447 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>, 448 FlatScratchInst<opName, "SS">; 449 450 let SubtargetPredicate = HasFlatScratchSVSMode in 451 def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>, 452 FlatScratchInst<opName, "SVS">; 453 454 let SubtargetPredicate = HasFlatScratchSTMode in 455 def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>, 456 FlatScratchInst<opName, "ST">; 457} 458 459class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0, 460 bit EnableSVE = 0, 461 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo< 462 opName, 463 (outs ), 464 !if(EnableSVE, 465 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 466 !if(EnableSaddr, 467 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 468 !if(EnableVaddr, 469 (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol), 470 (ins flat_offset:$offset, CPol:$cpol)))), 471 " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 472 473 let LGKM_CNT = 1; 474 let is_flat_scratch = 1; 475 let lds = 1; 476 let has_data = 0; 477 let has_vdst = 0; 478 let mayLoad = 1; 479 let mayStore = 1; 480 let has_saddr = 1; 481 let enabled_saddr = EnableSaddr; 482 let has_vaddr = EnableVaddr; 483 let has_sve = EnableSVE; 484 let sve = EnableVaddr; 485 let VALU = 1; 486 let Uses = [M0, EXEC]; 487 let SchedRW = [WriteVMEM, WriteLDS]; 488} 489 490multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> { 491 def "" : FLAT_Scratch_Load_LDS_Pseudo<opName>, 492 FlatScratchInst<opName, "SV">; 493 def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>, 494 FlatScratchInst<opName, "SS">; 495 def _SVS : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>, 496 FlatScratchInst<opName, "SVS">; 497 def _ST : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>, 498 FlatScratchInst<opName, "ST">; 499} 500 501class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 502 string asm, list<dag> pattern = []> : 503 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 504 let mayLoad = 1; 505 let mayStore = 1; 506 let has_glc = 0; 507 let glcValue = 0; 508 let has_vdst = 0; 509 let has_sccb = 1; 510 let sccbValue = 0; 511 let IsAtomicNoRet = 1; 512} 513 514class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 515 string asm, list<dag> pattern = []> 516 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 517 let hasPostISelHook = 1; 518 let has_vdst = 1; 519 let glcValue = 1; 520 let sccbValue = 0; 521 let IsAtomicNoRet = 0; 522 let IsAtomicRet = 1; 523} 524 525multiclass FLAT_Atomic_Pseudo_NO_RTN< 526 string opName, 527 RegisterClass vdst_rc, 528 ValueType vt, 529 ValueType data_vt = vt, 530 RegisterClass data_rc = vdst_rc, 531 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 532 def "" : FLAT_AtomicNoRet_Pseudo <opName, 533 (outs), 534 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 535 " $vaddr, $vdata$offset$cpol">, 536 GlobalSaddrTable<0, opName> { 537 let FPAtomic = data_vt.isFP; 538 let AddedComplexity = -1; // Prefer global atomics if available 539 } 540} 541 542multiclass FLAT_Atomic_Pseudo_RTN< 543 string opName, 544 RegisterClass vdst_rc, 545 ValueType vt, 546 ValueType data_vt = vt, 547 RegisterClass data_rc = vdst_rc, 548 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 549 def _RTN : FLAT_AtomicRet_Pseudo <opName, 550 (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst), 551 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 552 " $vdst, $vaddr, $vdata$offset$cpol">, 553 GlobalSaddrTable<0, opName#"_rtn"> { 554 let FPAtomic = data_vt.isFP; 555 let AddedComplexity = -1; // Prefer global atomics if available 556 } 557} 558 559multiclass FLAT_Atomic_Pseudo< 560 string opName, 561 RegisterClass vdst_rc, 562 ValueType vt, 563 ValueType data_vt = vt, 564 RegisterClass data_rc = vdst_rc, 565 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 566 defm "" : FLAT_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc, data_op>; 567 defm "" : FLAT_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc, data_op>; 568} 569 570multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 571 string opName, 572 RegisterClass vdst_rc, 573 ValueType vt, 574 ValueType data_vt = vt, 575 RegisterClass data_rc = vdst_rc, 576 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 577 578 let is_flat_global = 1 in { 579 def "" : FLAT_AtomicNoRet_Pseudo <opName, 580 (outs), 581 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 582 " $vaddr, $vdata, off$offset$cpol">, 583 GlobalSaddrTable<0, opName> { 584 let has_saddr = 1; 585 let FPAtomic = data_vt.isFP; 586 } 587 588 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 589 (outs), 590 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol), 591 " $vaddr, $vdata, $saddr$offset$cpol">, 592 GlobalSaddrTable<1, opName> { 593 let has_saddr = 1; 594 let enabled_saddr = 1; 595 let FPAtomic = data_vt.isFP; 596 } 597 } 598} 599 600multiclass FLAT_Global_Atomic_Pseudo_RTN< 601 string opName, 602 RegisterClass vdst_rc, 603 ValueType vt, 604 ValueType data_vt = vt, 605 RegisterClass data_rc = vdst_rc, 606 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret, 607 RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> { 608 609 let is_flat_global = 1 in { 610 def _RTN : FLAT_AtomicRet_Pseudo <opName, 611 (outs vdst_op:$vdst), 612 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 613 " $vdst, $vaddr, $vdata, off$offset$cpol">, 614 GlobalSaddrTable<0, opName#"_rtn"> { 615 let has_saddr = 1; 616 let FPAtomic = data_vt.isFP; 617 } 618 619 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 620 (outs vdst_op:$vdst), 621 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), 622 " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, 623 GlobalSaddrTable<1, opName#"_rtn"> { 624 let has_saddr = 1; 625 let enabled_saddr = 1; 626 let FPAtomic = data_vt.isFP; 627 } 628 } 629} 630 631multiclass FLAT_Global_Atomic_Pseudo< 632 string opName, 633 RegisterClass vdst_rc, 634 ValueType vt, 635 ValueType data_vt = vt, 636 RegisterClass data_rc = vdst_rc> { 637 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 638 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 639} 640 641//===----------------------------------------------------------------------===// 642// Flat Instructions 643//===----------------------------------------------------------------------===// 644 645def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 646def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 647def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 648def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 649def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 650def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 651def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 652def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 653 654def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 655def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 656def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 657def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 658def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 659def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 660 661let SubtargetPredicate = HasD16LoadStore in { 662let TiedSourceNotRead = 1 in { 663def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 664def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 665def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 666def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 667def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 668def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 669} 670 671def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 672def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 673} 674 675defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 676 VGPR_32, i32, v2i32, VReg_64>; 677 678defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 679 VReg_64, i64, v2i64, VReg_128>; 680 681defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 682 VGPR_32, i32>; 683 684defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 685 VReg_64, i64>; 686 687defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 688 VGPR_32, i32>; 689 690defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 691 VGPR_32, i32>; 692 693defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 694 VGPR_32, i32>; 695 696defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 697 VGPR_32, i32>; 698 699defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 700 VGPR_32, i32>; 701 702defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 703 VGPR_32, i32>; 704 705defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 706 VGPR_32, i32>; 707 708defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 709 VGPR_32, i32>; 710 711defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 712 VGPR_32, i32>; 713 714defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 715 VGPR_32, i32>; 716 717defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 718 VGPR_32, i32>; 719 720defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 721 VReg_64, i64>; 722 723defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 724 VReg_64, i64>; 725 726defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 727 VReg_64, i64>; 728 729defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 730 VReg_64, i64>; 731 732defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 733 VReg_64, i64>; 734 735defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 736 VReg_64, i64>; 737 738defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 739 VReg_64, i64>; 740 741defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 742 VReg_64, i64>; 743 744defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 745 VReg_64, i64>; 746 747defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 748 VReg_64, i64>; 749 750defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 751 VReg_64, i64>; 752 753// GFX7-, GFX10-only flat instructions. 754let SubtargetPredicate = isGFX7GFX10 in { 755defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 756 VReg_64, f64, v2f64, VReg_128>; 757} // End SubtargetPredicate = isGFX7GFX10 758 759 760// The names may be flat_atomic_fmin_x2 on some subtargets, but we 761// choose this as the canonical name. 762let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in { 763defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo <"flat_atomic_min_f64", 764 VReg_64, f64>; 765 766defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo <"flat_atomic_max_f64", 767 VReg_64, f64>; 768} 769 770let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in { 771defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>; 772defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; 773} 774 775let SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst in { 776 defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>; 777 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>; 778} // End SubtargetPredicate = HasFlatBufferGlobalAtomicFaddF64Inst 779 780let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in { 781 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; 782 let FPAtomic = 1 in 783 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2i16>; 784} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts 785 786let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, FPAtomic = 1 in 787 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2i16>; 788 789// GFX7-, GFX10-, GFX11-only flat instructions. 790let SubtargetPredicate = isGFX7GFX10GFX11 in { 791 792defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 793 VGPR_32, f32, v2f32, VReg_64>; 794 795defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 796 VGPR_32, f32>; 797 798defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 799 VGPR_32, f32>; 800 801} // End SubtargetPredicate = isGFX7GFX10GFX11 802 803// GFX940-, GFX11-only flat instructions. 804let SubtargetPredicate = HasFlatAtomicFaddF32Inst in { 805 defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; 806} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst 807 808let SubtargetPredicate = isGFX12Plus in { 809 defm FLAT_ATOMIC_CSUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_csub_u32", VGPR_32, i32>; 810 defm FLAT_ATOMIC_COND_SUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_cond_sub_u32", VGPR_32, i32>; 811} // End SubtargetPredicate = isGFX12Plus 812 813defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 814defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 815defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 816defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 817defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 818defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 819defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 820defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 821 822let TiedSourceNotRead = 1 in { 823defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 824defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 825defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 826defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 827defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 828defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 829} 830 831let OtherPredicates = [HasGFX10_BEncoding] in 832defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 833 834defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 835defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 836defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 837defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 838defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 839defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 840let OtherPredicates = [HasGFX10_BEncoding] in 841defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 842 843defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 844defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 845 846defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 847 VGPR_32, i32, v2i32, VReg_64>; 848 849defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 850 VReg_64, i64, v2i64, VReg_128>; 851 852defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 853 VGPR_32, i32>; 854 855defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 856 VReg_64, i64>; 857 858defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 859 VGPR_32, i32>; 860 861defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 862 VGPR_32, i32>; 863 864defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 865 VGPR_32, i32>; 866 867defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 868 VGPR_32, i32>; 869 870defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 871 VGPR_32, i32>; 872 873defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 874 VGPR_32, i32>; 875 876defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 877 VGPR_32, i32>; 878 879defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 880 VGPR_32, i32>; 881 882defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 883 VGPR_32, i32>; 884 885defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 886 VGPR_32, i32>; 887 888defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 889 VGPR_32, i32>; 890 891defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 892 VReg_64, i64>; 893 894defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 895 VReg_64, i64>; 896 897defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 898 VReg_64, i64>; 899 900defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 901 VReg_64, i64>; 902 903defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 904 VReg_64, i64>; 905 906defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 907 VReg_64, i64>; 908 909defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 910 VReg_64, i64>; 911 912defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 913 VReg_64, i64>; 914 915defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 916 VReg_64, i64>; 917 918defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 919 VReg_64, i64>; 920 921defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 922 VReg_64, i64>; 923 924let SubtargetPredicate = HasGFX10_BEncoding in { 925 defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo <"global_atomic_csub", 926 VGPR_32, i32>; 927} 928 929defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; 930defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; 931defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; 932defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; 933defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; 934 935let SubtargetPredicate = isGFX12Plus in { 936 defm GLOBAL_ATOMIC_COND_SUB_U32 : FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>; 937 defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; 938 939 def GLOBAL_INV : FLAT_Global_Invalidate_Writeback<"global_inv">; 940 def GLOBAL_WB : FLAT_Global_Invalidate_Writeback<"global_wb">; 941 def GLOBAL_WBINV : FLAT_Global_Invalidate_Writeback<"global_wbinv">; 942} // End SubtargetPredicate = isGFX12Plus 943 944defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 945defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 946defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 947defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 948defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 949defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 950defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 951defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 952 953let TiedSourceNotRead = 1 in { 954defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; 955defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; 956defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; 957defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; 958defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; 959defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; 960} 961 962defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 963defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 964defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 965defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 966defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 967defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 968 969defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 970defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 971 972defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; 973defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; 974defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; 975defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; 976defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; 977 978let SubtargetPredicate = isGFX12Plus in { 979 let Uses = [EXEC, M0] in { 980 defm GLOBAL_LOAD_BLOCK : FLAT_Global_Load_Pseudo <"global_load_block", VReg_1024>; 981 defm GLOBAL_STORE_BLOCK : FLAT_Global_Store_Pseudo <"global_store_block", VReg_1024>; 982 } 983 let Uses = [EXEC, FLAT_SCR, M0] in { 984 defm SCRATCH_LOAD_BLOCK : FLAT_Scratch_Load_Pseudo <"scratch_load_block", VReg_1024>; 985 defm SCRATCH_STORE_BLOCK : FLAT_Scratch_Store_Pseudo <"scratch_store_block", VReg_1024>; 986 } 987 988 let WaveSizePredicate = isWave32 in { 989 let Mnemonic = "global_load_tr_b128" in 990 defm GLOBAL_LOAD_TR_B128_w32 : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w32", VReg_128>; 991 let Mnemonic = "global_load_tr_b64" in 992 defm GLOBAL_LOAD_TR_B64_w32 : FLAT_Global_Load_Pseudo <"global_load_tr_b64_w32", VReg_64>; 993 } 994 let WaveSizePredicate = isWave64 in { 995 let Mnemonic = "global_load_tr_b128" in 996 defm GLOBAL_LOAD_TR_B128_w64 : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w64", VReg_64>; 997 let Mnemonic = "global_load_tr_b64" in 998 defm GLOBAL_LOAD_TR_B64_w64 : FLAT_Global_Load_Pseudo <"global_load_tr_b64_w64", VGPR_32>; 999 } 1000} // End SubtargetPredicate = isGFX12Plus 1001 1002let SubtargetPredicate = isGFX10Plus in { 1003 defm GLOBAL_ATOMIC_FCMPSWAP : 1004 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>; 1005 defm GLOBAL_ATOMIC_FMIN : 1006 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 1007 defm GLOBAL_ATOMIC_FMAX : 1008 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 1009 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 1010 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>; 1011} // End SubtargetPredicate = isGFX10Plus 1012 1013let OtherPredicates = [HasAtomicFaddNoRtnInsts] in 1014 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 1015 "global_atomic_add_f32", VGPR_32, f32 1016 >; 1017let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in 1018 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 1019 "global_atomic_pk_add_f16", VGPR_32, v2f16 1020 >; 1021let OtherPredicates = [HasAtomicFaddRtnInsts] in 1022 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < 1023 "global_atomic_add_f32", VGPR_32, f32 1024 >; 1025let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in 1026 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < 1027 "global_atomic_pk_add_f16", VGPR_32, v2f16 1028 >; 1029 1030//===----------------------------------------------------------------------===// 1031// Flat Patterns 1032//===----------------------------------------------------------------------===// 1033 1034// Patterns for global loads with no offset. 1035class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1036 (vt (node (FlatOffset i64:$vaddr, i32:$offset))), 1037 (inst $vaddr, $offset) 1038>; 1039 1040class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1041 (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), 1042 (inst $vaddr, $offset, 0, $in) 1043>; 1044 1045class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1046 (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), 1047 (inst $vaddr, $offset, 0, $in) 1048>; 1049 1050class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1051 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)), 1052 (inst $saddr, $voffset, $offset, 0, $in) 1053>; 1054 1055class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1056 (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))), 1057 (inst $vaddr, $offset) 1058>; 1059 1060class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1061 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))), 1062 (inst $saddr, $voffset, $offset, 0) 1063>; 1064 1065class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1066 ValueType vt> : GCNPat < 1067 (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)), 1068 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1069>; 1070 1071class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1072 ValueType vt, ValueType data_vt = vt> : GCNPat < 1073 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)), 1074 (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset) 1075>; 1076 1077class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1078 ValueType vt> : GCNPat < 1079 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data), 1080 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1081>; 1082 1083class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1084 (node vt:$data, (FlatOffset i64:$vaddr, i32:$offset)), 1085 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1086>; 1087 1088class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1089 (node vt:$data, (GlobalOffset i64:$vaddr, i32:$offset)), 1090 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1091>; 1092 1093class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, 1094 ValueType vt, ValueType data_vt = vt> : GCNPat < 1095 // atomic store follows atomic binop convention so the address comes 1096 // first. 1097 (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data), 1098 (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1099>; 1100 1101multiclass FlatAtomicNoRtnPatBase <string inst, string node, ValueType vt, 1102 ValueType data_vt = vt> { 1103 1104 defvar noRtnNode = !cast<PatFrags>(node); 1105 1106 let AddedComplexity = 1 in 1107 def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1108 (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1109} 1110 1111multiclass FlatAtomicNoRtnPatWithAddrSpace<string inst, string node, string addrSpaceSuffix, 1112 ValueType vt> : 1113 FlatAtomicNoRtnPatBase<inst, node # "_noret_" # addrSpaceSuffix, vt, vt>; 1114 1115multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt, 1116 ValueType data_vt = vt, bit isIntr = 0> : 1117 FlatAtomicNoRtnPatBase<inst, node # "_noret" # !if(isIntr, "", "_"#vt), vt, data_vt>; 1118 1119 1120multiclass FlatAtomicRtnPatBase <string inst, string node, ValueType vt, 1121 ValueType data_vt = vt> { 1122 1123 defvar rtnNode = !cast<SDPatternOperator>(node); 1124 1125 def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1126 (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1127} 1128 1129multiclass FlatAtomicRtnPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1130 ValueType vt> : 1131 FlatAtomicRtnPatBase<inst, intr # "_" # addrSpaceSuffix, vt, vt>; 1132 1133multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt, 1134 ValueType data_vt = vt, bit isIntr = 0> : 1135 FlatAtomicRtnPatBase<inst, node # !if(isIntr, "", "_"#vt), vt, data_vt>; 1136 1137 1138multiclass FlatAtomicPat <string inst, string node, ValueType vt, 1139 ValueType data_vt = vt, bit isIntr = 0> : 1140 FlatAtomicRtnPat<inst, node, vt, data_vt, isIntr>, 1141 FlatAtomicNoRtnPat<inst, node, vt, data_vt, isIntr>; 1142 1143multiclass FlatAtomicIntrNoRtnPat <string inst, string node, ValueType vt, 1144 ValueType data_vt = vt> { 1145 defm : FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1146} 1147 1148multiclass FlatAtomicIntrRtnPat <string inst, string node, ValueType vt, 1149 ValueType data_vt = vt> { 1150 defm : FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1151} 1152 1153multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt, 1154 ValueType data_vt = vt> : 1155 FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>, 1156 FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1157 1158class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node, 1159 ValueType vt, ValueType data_vt = vt> : GCNPat < 1160 (vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1161 (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1162>; 1163 1164multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt, 1165 ValueType data_vt = vt, int complexity = 0, 1166 bit isIntr = 0> { 1167 defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt)); 1168 defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt)); 1169 1170 let AddedComplexity = complexity in 1171 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1172 1173 let AddedComplexity = !add(complexity, 1) in 1174 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>; 1175} 1176 1177class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1178 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))), 1179 (inst $vaddr, $offset) 1180>; 1181 1182class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1183 (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset), vt:$in), 1184 (inst $vaddr, $offset, 0, $in) 1185>; 1186 1187class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1188 (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)), 1189 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset) 1190>; 1191 1192class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1193 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))), 1194 (inst $saddr, $offset) 1195>; 1196 1197class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1198 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset), vt:$in)), 1199 (inst $saddr, $offset, 0, $in) 1200>; 1201 1202class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1203 ValueType vt> : GCNPat < 1204 (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)), 1205 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1206>; 1207 1208class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1209 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))), 1210 (inst $vaddr, $saddr, $offset, 0) 1211>; 1212 1213class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1214 ValueType vt> : GCNPat < 1215 (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)), 1216 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset) 1217>; 1218 1219class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1220 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)), 1221 (inst $vaddr, $saddr, $offset, 0, $in) 1222>; 1223 1224multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1225 def : FlatLoadSignedPat <inst, node, vt> { 1226 let AddedComplexity = 10; 1227 } 1228 1229 def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1230 let AddedComplexity = 11; 1231 } 1232} 1233 1234multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1235 def : FlatSignedLoadPat_D16 <inst, node, vt> { 1236 let AddedComplexity = 10; 1237 } 1238 1239 def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1240 let AddedComplexity = 11; 1241 } 1242} 1243 1244multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1245 ValueType vt> { 1246 def : FlatStoreSignedPat <inst, node, vt> { 1247 let AddedComplexity = 10; 1248 } 1249 1250 def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1251 let AddedComplexity = 11; 1252 } 1253} 1254 1255multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt, 1256 ValueType data_vt = vt> { 1257 let AddedComplexity = 11 in 1258 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>; 1259 1260 let AddedComplexity = 13 in 1261 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>; 1262} 1263 1264multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt, 1265 ValueType data_vt = vt, bit isPatFrags = 0> { 1266 defvar rtnNode = !if(isPatFrags, !cast<PatFrags>(node), !cast<SDPatternOperator>(node)); 1267 1268 let AddedComplexity = 10 in 1269 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1270 1271 let AddedComplexity = 12 in 1272 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>; 1273} 1274 1275multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt, 1276 ValueType data_vt = vt, bit isIntr = 0> : 1277 GlobalFLATAtomicPatsNoRtnBase<inst, node # "_noret" # !if(isIntr, "", "_" # vt), vt, data_vt>; 1278 1279multiclass GlobalFLATAtomicPatsRtn<string inst, string node, ValueType vt, 1280 ValueType data_vt = vt, bit isIntr = 0> : 1281 GlobalFLATAtomicPatsRtnBase<inst, node # !if(isIntr, "", "_" # vt), vt, data_vt>; 1282 1283multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt, 1284 ValueType data_vt = vt, bit isIntr = 0> : 1285 GlobalFLATAtomicPatsNoRtn<inst, node, vt, data_vt, isIntr>, 1286 GlobalFLATAtomicPatsRtn<inst, node, vt, data_vt, isIntr>; 1287 1288multiclass GlobalFLATAtomicPatsNoRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1289 ValueType vt, ValueType data_vt = vt> : 1290 GlobalFLATAtomicPatsNoRtnBase<inst, intr # "_noret_" # addrSpaceSuffix, vt, data_vt>; 1291 1292multiclass GlobalFLATAtomicPatsRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1293 ValueType vt, ValueType data_vt = vt> : 1294 GlobalFLATAtomicPatsRtnBase<inst, intr # "_" # addrSpaceSuffix, vt, data_vt, /*isPatFrags*/ 1>; 1295 1296multiclass GlobalFLATAtomicPatsWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1297 ValueType vt, ValueType data_vt = vt> : 1298 GlobalFLATAtomicPatsNoRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>, 1299 GlobalFLATAtomicPatsRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>; 1300 1301multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt, 1302 ValueType data_vt = vt> { 1303 defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>; 1304} 1305 1306multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1307 def : ScratchLoadSignedPat <inst, node, vt> { 1308 let AddedComplexity = 25; 1309 } 1310 1311 def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1312 let AddedComplexity = 26; 1313 } 1314 1315 def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1316 let SubtargetPredicate = HasFlatScratchSVSMode; 1317 let AddedComplexity = 27; 1318 } 1319} 1320 1321multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1322 ValueType vt> { 1323 def : ScratchStoreSignedPat <inst, node, vt> { 1324 let AddedComplexity = 25; 1325 } 1326 1327 def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1328 let AddedComplexity = 26; 1329 } 1330 1331 def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1332 let SubtargetPredicate = HasFlatScratchSVSMode; 1333 let AddedComplexity = 27; 1334 } 1335} 1336 1337multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1338 def : ScratchLoadSignedPat_D16 <inst, node, vt> { 1339 let AddedComplexity = 25; 1340 } 1341 1342 def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1343 let AddedComplexity = 26; 1344 } 1345 1346 def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1347 let SubtargetPredicate = HasFlatScratchSVSMode; 1348 let AddedComplexity = 27; 1349 } 1350} 1351 1352let OtherPredicates = [HasFlatAddressSpace] in { 1353 1354def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>; 1355def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>; 1356def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>; 1357def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>; 1358def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 1359def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 1360def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 1361def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 1362def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 1363def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 1364def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 1365def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 1366def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 1367def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 1368def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 1369 1370def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 1371def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 1372 1373def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 1374def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 1375 1376foreach vt = Reg32Types.types in { 1377def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 1378def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 1379} 1380 1381foreach vt = VReg_64.RegTypes in { 1382def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>; 1383def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 1384} 1385 1386def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; 1387 1388foreach vt = VReg_128.RegTypes in { 1389def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 1390def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>; 1391} 1392 1393def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>; 1394def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>; 1395def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>; 1396def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>; 1397def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>; 1398def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>; 1399 1400foreach as = [ "flat", "global" ] in { 1401defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; 1402defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; 1403defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>; 1404defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>; 1405defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; 1406defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; 1407defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; 1408defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>; 1409defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>; 1410defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>; 1411defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>; 1412defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>; 1413defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; 1414 1415defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; 1416defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; 1417defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>; 1418defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>; 1419defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; 1420defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; 1421defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; 1422defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>; 1423defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>; 1424defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>; 1425defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>; 1426defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>; 1427defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>; 1428 1429let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in { 1430defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_"#as, f32>; 1431defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_"#as, f32>; 1432} 1433 1434let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in { 1435defm : FlatAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_"#as, f64>; 1436defm : FlatAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_"#as, f64>; 1437} 1438 1439} // end foreach as 1440 1441let SubtargetPredicate = isGFX12Plus in { 1442 defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >; 1443 1444 let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1445 defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>; 1446} 1447 1448def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 1449def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 1450 1451let OtherPredicates = [HasD16LoadStore] in { 1452def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 1453def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 1454} 1455 1456let OtherPredicates = [D16PreservesUnusedBits] in { 1457def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 1458def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 1459def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 1460def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 1461def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 1462def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 1463 1464def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 1465def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 1466def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 1467def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 1468def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 1469def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 1470} 1471 1472} // End OtherPredicates = [HasFlatAddressSpace] 1473 1474let OtherPredicates = [HasFlatGlobalInsts] in { 1475 1476defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>; 1477defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>; 1478defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>; 1479defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>; 1480defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 1481defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 1482defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 1483defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 1484defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 1485defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 1486defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 1487defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 1488defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 1489defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; 1490 1491foreach vt = Reg32Types.types in { 1492defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; 1493defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>; 1494} 1495 1496foreach vt = VReg_64.RegTypes in { 1497defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>; 1498defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>; 1499} 1500 1501defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 1502 1503foreach vt = VReg_128.RegTypes in { 1504defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>; 1505defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>; 1506} 1507 1508// There is no distinction for atomic load lowering during selection; 1509// the memory legalizer will set the cache bits and insert the 1510// appropriate waits. 1511defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 1512defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 1513 1514defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; 1515defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; 1516defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; 1517defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; 1518defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; 1519 1520let OtherPredicates = [HasD16LoadStore] in { 1521defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 1522defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 1523} 1524 1525let OtherPredicates = [D16PreservesUnusedBits] in { 1526defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 1527defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 1528defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 1529defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 1530defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 1531defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 1532 1533defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 1534defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 1535defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 1536defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 1537defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 1538defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 1539} 1540 1541defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>; 1542defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>; 1543defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>; 1544defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>; 1545defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>; 1546defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>; 1547 1548defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; 1549defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; 1550defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>; 1551defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>; 1552defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>; 1553defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>; 1554defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>; 1555defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>; 1556defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>; 1557defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>; 1558defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>; 1559defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>; 1560defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>; 1561defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1562 1563let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1564defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1565 1566defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; 1567defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; 1568defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>; 1569defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>; 1570defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>; 1571defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>; 1572defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>; 1573defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>; 1574defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>; 1575defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>; 1576defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>; 1577defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>; 1578defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; 1579 1580let SubtargetPredicate = isGFX12Plus in { 1581 defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>; 1582 1583 let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1584 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>; 1585} 1586 1587let OtherPredicates = [isGFX12Plus] in { 1588 defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>; 1589 1590 let WaveSizePredicate = isWave32 in { 1591 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w32, int_amdgcn_global_load_tr_b64, v2i32>; 1592 foreach vt = [v8i16, v8f16, v8bf16] in 1593 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr_b128, vt>; 1594 } 1595 let WaveSizePredicate = isWave64 in { 1596 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w64, int_amdgcn_global_load_tr_b64, i32>; 1597 foreach vt = [v4i16, v4f16, v4bf16] in 1598 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr_b128, vt>; 1599 } 1600} 1601 1602let SubtargetPredicate = HasAtomicFMinFMaxF32GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in { 1603defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; 1604defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; 1605defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>; 1606defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>; 1607} 1608 1609let SubtargetPredicate = HasAtomicFMinFMaxF32FlatInsts in { 1610defm : FlatAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>; 1611defm : FlatAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>; 1612defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>; 1613defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>; 1614} 1615 1616let OtherPredicates = [isGFX12Only] in { 1617 // FIXME: Remove these intrinsics 1618 defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>; 1619 defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>; 1620 defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>; 1621 defm : FlatAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>; 1622} 1623 1624let OtherPredicates = [HasAtomicFaddNoRtnInsts] in { 1625defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1626defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>; 1627defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; 1628} 1629 1630let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in { 1631defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>; 1632defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; 1633defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; 1634} 1635 1636let OtherPredicates = [HasAtomicFaddRtnInsts] in { 1637defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1638defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>; 1639defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; 1640} 1641 1642let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in { 1643defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; 1644defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; 1645defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_F16", "atomic_load_fadd_global", v2f16>; 1646} 1647 1648let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts, OtherPredicates = [HasFlatGlobalInsts] in { 1649defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>; 1650defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>; 1651defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>; 1652defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>; 1653} 1654 1655let SubtargetPredicate = HasAtomicFMinFMaxF64FlatInsts in { 1656defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>; 1657defm : FlatAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>; 1658} 1659 1660let OtherPredicates = [HasFlatBufferGlobalAtomicFaddF64Inst] in { 1661defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>; 1662defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>; 1663defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>; 1664defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>; 1665defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", f64>; 1666} 1667 1668let OtherPredicates = [HasFlatAtomicFaddF32Inst] in { 1669defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>; 1670defm : FlatAtomicIntrPat <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", f32>; 1671} 1672 1673let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in { 1674defm : FlatAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", v2f16>; 1675defm : FlatAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>; 1676defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_F16", "atomic_load_fadd_flat", v2f16>; 1677defm : FlatAtomicPat <"FLAT_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_flat", v2bf16>; 1678} 1679 1680let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in 1681defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>; 1682defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "atomic_load_fadd_global", v2bf16>; 1683} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 1684 1685let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in { 1686 1687defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>; 1688defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>; 1689defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>; 1690defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; 1691defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; 1692defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; 1693defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; 1694defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; 1695defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; 1696defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; 1697 1698foreach vt = Reg32Types.types in { 1699defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; 1700defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; 1701} 1702 1703foreach vt = VReg_64.RegTypes in { 1704defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>; 1705defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>; 1706} 1707 1708defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>; 1709 1710foreach vt = VReg_128.RegTypes in { 1711defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>; 1712defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>; 1713} 1714 1715defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>; 1716defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>; 1717defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; 1718defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; 1719defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; 1720 1721let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in { 1722defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; 1723defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; 1724} 1725 1726let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { 1727defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>; 1728defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>; 1729defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>; 1730defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>; 1731defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>; 1732defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>; 1733 1734defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>; 1735defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>; 1736defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>; 1737defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>; 1738defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>; 1739defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>; 1740} 1741 1742} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch] 1743 1744//===----------------------------------------------------------------------===// 1745// Target 1746//===----------------------------------------------------------------------===// 1747 1748//===----------------------------------------------------------------------===// 1749// CI 1750//===----------------------------------------------------------------------===// 1751 1752class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps, string asmName = ps.Mnemonic> : 1753 FLAT_Real <op, ps, asmName>, 1754 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 1755 let AssemblerPredicate = isGFX7Only; 1756 let DecoderNamespace="GFX7"; 1757} 1758 1759def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1760def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1761def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1762def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1763def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1764def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1765def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1766def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1767 1768def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1769def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1770def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1771def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1772def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1773def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1774 1775multiclass FLAT_Real_Atomics_ci <bits<7> op, string opName = NAME, 1776 string asmName = !cast<FLAT_Pseudo>(opName).Mnemonic> { 1777 defvar ps = !cast<FLAT_Pseudo>(opName); 1778 defvar ps_rtn = !cast<FLAT_Pseudo>(opName#"_RTN"); 1779 1780 def _ci : FLAT_Real_ci<op, ps, asmName>; 1781 def _RTN_ci : FLAT_Real_ci<op, ps_rtn, asmName>; 1782} 1783 1784defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30>; 1785defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31>; 1786defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32>; 1787defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33>; 1788defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35>; 1789defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36>; 1790defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37>; 1791defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38>; 1792defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39>; 1793defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a>; 1794defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b>; 1795defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c>; 1796defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d>; 1797defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50>; 1798defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51>; 1799defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52>; 1800defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53>; 1801defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55>; 1802defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56>; 1803defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57>; 1804defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58>; 1805defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59>; 1806defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a>; 1807defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b>; 1808defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c>; 1809defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d>; 1810 1811// CI Only flat instructions 1812defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e>; 1813defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f>; 1814defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40>; 1815defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e>; 1816defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, "FLAT_ATOMIC_MIN_F64", "flat_atomic_fmin_x2">; 1817defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, "FLAT_ATOMIC_MAX_F64", "flat_atomic_fmax_x2">; 1818 1819 1820//===----------------------------------------------------------------------===// 1821// VI 1822//===----------------------------------------------------------------------===// 1823 1824class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : 1825 FLAT_Real <op, ps>, 1826 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1827 let AssemblerPredicate = isGFX8GFX9; 1828 let DecoderNamespace = "GFX8"; 1829 1830 let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1831 let AsmString = ps.Mnemonic # 1832 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); 1833} 1834 1835multiclass FLAT_Real_AllAddr_vi<bits<7> op, 1836 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1837 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; 1838 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; 1839} 1840 1841class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> : 1842 FLAT_Real <op, ps>, 1843 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> { 1844 let AssemblerPredicate = isGFX940Plus; 1845 let DecoderNamespace = "GFX9"; 1846 let Inst{13} = ps.sve; 1847 let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1848} 1849 1850multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> { 1851 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> { 1852 let AssemblerPredicate = isGFX8GFX9NotGFX940; 1853 let OtherPredicates = [isGFX8GFX9NotGFX940]; 1854 } 1855 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> { 1856 let DecoderNamespace = "GFX9"; 1857 } 1858 let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in { 1859 def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1860 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1861 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1862 } 1863} 1864 1865multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op, 1866 string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic), 1867 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1868 1869 let OtherPredicates = [isGFX8GFX9NotGFX940] in { 1870 def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> { 1871 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds"; 1872 } 1873 def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> { 1874 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds"; 1875 } 1876 } 1877 1878 let SubtargetPredicate = isGFX940Plus in { 1879 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1880 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1881 } 1882} 1883 1884multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> { 1885 defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>; 1886 let SubtargetPredicate = isGFX940Plus in { 1887 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1888 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1889 } 1890} 1891 1892def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1893def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1894def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1895def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1896def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1897def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1898def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1899def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1900 1901def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1902def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1903def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1904def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1905def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1906def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1907def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1908def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1909 1910def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1911def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1912def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1913def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1914def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1915def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1916 1917multiclass FLAT_Real_Atomics_vi <bits<7> op, 1918 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1919 defvar ps = !cast<FLAT_Pseudo>(NAME); 1920 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; 1921 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; 1922} 1923 1924multiclass FLAT_Global_Real_Atomics_vi<bits<7> op, 1925 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : 1926 FLAT_Real_AllAddr_vi<op, has_sccb> { 1927 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; 1928 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; 1929} 1930 1931 1932defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40>; 1933defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41>; 1934defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42>; 1935defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43>; 1936defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44>; 1937defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45>; 1938defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46>; 1939defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47>; 1940defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48>; 1941defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49>; 1942defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a>; 1943defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b>; 1944defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c>; 1945defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60>; 1946defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61>; 1947defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62>; 1948defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63>; 1949defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64>; 1950defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65>; 1951defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66>; 1952defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67>; 1953defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68>; 1954defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69>; 1955defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a>; 1956defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b>; 1957defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c>; 1958 1959defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1960defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1961defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1962defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1963defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1964defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1965defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1966defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1967 1968defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1969defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1970defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1971defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1972defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1973defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1974 1975defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1976defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1977defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1978defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1979defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1980defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1981defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1982defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1983 1984defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS <0x026, 0x10>; 1985defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS <0x027, 0x11>; 1986defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; 1987defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; 1988defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; 1989 1990defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1991defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1992defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1993defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1994defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1995defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1996defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1997defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1998defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1999defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 2000defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 2001defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 2002defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 2003defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 2004defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 2005defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 2006defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 2007defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 2008defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 2009defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 2010defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 2011defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 2012defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 2013defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 2014defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 2015defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 2016 2017defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>; 2018defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>; 2019defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>; 2020defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>; 2021defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>; 2022 2023defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; 2024defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; 2025defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>; 2026defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x13>; 2027defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_SVE_vi <0x14>; 2028defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x15>; 2029defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x16>; 2030defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x17>; 2031defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_SVE_vi <0x18>; 2032defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x19>; 2033defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x20>; 2034defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x21>; 2035defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x22>; 2036defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x23>; 2037defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_SVE_vi <0x24>; 2038defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x25>; 2039defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_SVE_vi <0x1a>; 2040defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>; 2041defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_SVE_vi <0x1c>; 2042defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>; 2043defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; 2044defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; 2045 2046let SubtargetPredicate = isGFX8GFX9NotGFX940 in { 2047 // These instructions are encoded differently on gfx90* and gfx940. 2048 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; 2049 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; 2050} 2051 2052let SubtargetPredicate = isGFX90AOnly in { 2053 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, 0>; 2054 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, 0>; 2055 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, 0>; 2056 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>; 2057 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>; 2058 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; 2059} // End SubtargetPredicate = isGFX90AOnly 2060 2061multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> { 2062 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 2063 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 2064} 2065 2066multiclass FLAT_Real_Atomics_gfx940 <bits<7> op> { 2067 defvar ps = !cast<FLAT_Pseudo>(NAME); 2068 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 2069 def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 2070} 2071 2072multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> : 2073 FLAT_Real_AllAddr_gfx940<op> { 2074 def _RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 2075 def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 2076} 2077 2078let SubtargetPredicate = isGFX940Plus in { 2079 // These instructions are encoded differently on gfx90* and gfx940. 2080 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>; 2081 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>; 2082 2083 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f>; 2084 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50>; 2085 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51>; 2086 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; 2087 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; 2088 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; 2089 defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>; 2090 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>; 2091 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>; 2092 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; 2093} // End SubtargetPredicate = isGFX940Plus 2094 2095//===----------------------------------------------------------------------===// 2096// GFX10. 2097//===----------------------------------------------------------------------===// 2098 2099class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 2100 FLAT_Real<op, ps, opName>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 2101 let AssemblerPredicate = isGFX10Only; 2102 let DecoderNamespace = "GFX10"; 2103 2104 let Inst{11-0} = offset{11-0}; 2105 let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2106 let Inst{54-48} = !cond(ps.enabled_saddr : saddr, 2107 !and(ps.is_flat_scratch, !not(ps.has_vaddr)) : EXEC_HI.Index{6-0}, // ST mode 2108 true : SGPR_NULL_gfxpre11.Index{6-0}); 2109 let Inst{55} = 0; 2110} 2111 2112multiclass FLAT_Real_Base_gfx10<bits<7> op, string psName = NAME, 2113 string asmName = !cast<FLAT_Pseudo>(psName).Mnemonic> { 2114 def _gfx10 : 2115 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName), asmName>; 2116} 2117 2118multiclass FLAT_Real_RTN_gfx10<bits<7> op, string psName = NAME, 2119 string asmName = !cast<FLAT_Pseudo>(psName).Mnemonic> { 2120 def _RTN_gfx10 : 2121 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_RTN"), asmName>; 2122} 2123 2124multiclass FLAT_Real_SADDR_gfx10<bits<7> op, string psName = NAME, 2125 string asmName = !cast<FLAT_Pseudo>(psName#"_SADDR").Mnemonic> { 2126 def _SADDR_gfx10 : 2127 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_SADDR"), asmName>; 2128} 2129 2130multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op, string psName = NAME, 2131 string asmName = !cast<FLAT_Pseudo>(psName#"_SADDR_RTN").Mnemonic> { 2132 def _SADDR_RTN_gfx10 : 2133 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(psName#"_SADDR_RTN"), asmName>; 2134} 2135 2136multiclass FLAT_Real_ST_gfx10<bits<7> op> { 2137 def _ST_gfx10 : 2138 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 2139} 2140 2141multiclass FLAT_Real_AllAddr_gfx10<bits<7> op, string OpName = NAME, 2142 string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> : 2143 FLAT_Real_Base_gfx10<op, OpName, asmName>, 2144 FLAT_Real_SADDR_gfx10<op, OpName, asmName>; 2145 2146multiclass FLAT_Real_Atomics_gfx10<bits<7> op, string OpName = NAME, 2147 string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> : 2148 FLAT_Real_Base_gfx10<op, OpName, asmName>, 2149 FLAT_Real_RTN_gfx10<op, OpName, asmName>; 2150 2151multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op, string OpName = NAME, 2152 string asmName = !cast<FLAT_Pseudo>(OpName).Mnemonic> : 2153 FLAT_Real_AllAddr_gfx10<op, OpName, asmName>, 2154 FLAT_Real_RTN_gfx10<op, OpName, asmName>, 2155 FLAT_Real_SADDR_RTN_gfx10<op, OpName, asmName>; 2156 2157multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op, string OpName = NAME> : 2158 FLAT_Real_RTN_gfx10<op, OpName>, 2159 FLAT_Real_SADDR_RTN_gfx10<op, OpName>; 2160 2161multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> : 2162 FLAT_Real_Base_gfx10<op>, 2163 FLAT_Real_SADDR_gfx10<op>, 2164 FLAT_Real_ST_gfx10<op>; 2165 2166multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op, 2167 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic)> { 2168 let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in 2169 defm "" : FLAT_Real_Base_gfx10<op>; 2170 2171 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in 2172 defm "" : FLAT_Real_SADDR_gfx10<op>; 2173} 2174 2175multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op, 2176 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic)> { 2177 defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>; 2178 2179 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in 2180 defm "" : FLAT_Real_ST_gfx10<op>; 2181} 2182 2183// ENC_FLAT. 2184defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 2185defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 2186defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 2187defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 2188defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 2189defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 2190defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 2191defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 2192defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 2193defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 2194defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 2195defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 2196defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 2197defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 2198defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 2199defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 2200defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 2201defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 2202defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 2203defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 2204defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 2205defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 2206defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 2207defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 2208defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 2209defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 2210defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 2211defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 2212defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 2213defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 2214defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 2215defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 2216defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 2217defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 2218defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 2219defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 2220defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 2221defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 2222defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 2223defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 2224defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 2225defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 2226defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 2227defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 2228defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 2229defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 2230defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 2231defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 2232defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 2233defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 2234defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 2235defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 2236defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f, "FLAT_ATOMIC_MIN_F64", "flat_atomic_fmin_x2">; 2237defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060, "FLAT_ATOMIC_MAX_F64", "flat_atomic_fmax_x2">; 2238 2239 2240// ENC_FLAT_GLBL. 2241defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 2242defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 2243defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 2244defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 2245defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 2246defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 2247defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 2248defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 2249defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 2250defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 2251defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 2252defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 2253defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 2254defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 2255defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 2256defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 2257defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 2258defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 2259defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 2260defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 2261defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 2262defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 2263defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 2264defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 2265defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 2266defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 2267defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_gfx10<0x034>; 2268defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 2269defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 2270defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 2271defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 2272defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 2273defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 2274defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 2275defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 2276defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 2277defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 2278defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 2279defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 2280defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 2281defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 2282defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 2283defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 2284defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 2285defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 2286defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 2287defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 2288defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 2289defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 2290defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 2291defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 2292defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 2293defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 2294defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f, "GLOBAL_ATOMIC_MIN_F64", "global_atomic_fmin_x2">; 2295defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060, "GLOBAL_ATOMIC_MAX_F64", "global_atomic_fmax_x2">; 2296defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; 2297defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; 2298 2299defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x008>; 2300defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x009>; 2301defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>; 2302defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>; 2303defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>; 2304 2305// ENC_FLAT_SCRATCH. 2306defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; 2307defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; 2308defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>; 2309defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>; 2310defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>; 2311defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>; 2312defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>; 2313defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>; 2314defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>; 2315defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>; 2316defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>; 2317defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>; 2318defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>; 2319defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>; 2320defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>; 2321defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>; 2322defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>; 2323defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>; 2324defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; 2325defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; 2326defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; 2327defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; 2328 2329defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>; 2330defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>; 2331defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>; 2332defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>; 2333defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>; 2334 2335//===----------------------------------------------------------------------===// 2336// GFX11 2337//===----------------------------------------------------------------------===// 2338 2339class get_FLAT_ps<string name> { 2340 string Mnemonic = !cast<FLAT_Pseudo>(name).Mnemonic; 2341} 2342 2343multiclass FLAT_Real_gfx11 <bits<7> op, 2344 string name = get_FLAT_ps<NAME>.Mnemonic> { 2345 defvar ps = !cast<FLAT_Pseudo>(NAME); 2346 def _gfx11 : FLAT_Real <op, ps, name>, 2347 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> { 2348 let AssemblerPredicate = isGFX11Only; 2349 let DecoderNamespace = "GFX11"; 2350 2351 let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2352 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 2353 let Inst{15} = cpol{CPolBit.SLC}; 2354 let Inst{17-16} = seg; 2355 let Inst{54-48} = !if(ps.enabled_saddr, saddr, SGPR_NULL_gfx11plus.Index); 2356 let Inst{55} = ps.sve; 2357 } 2358} 2359 2360multiclass FLAT_Aliases_gfx11<string name> { 2361 defvar ps = get_FLAT_ps<NAME>; 2362 if !ne(ps.Mnemonic, name) then 2363 def : AMDGPUMnemonicAlias<ps.Mnemonic, name> { 2364 let AssemblerPredicate = isGFX11Only; 2365 } 2366} 2367 2368multiclass FLAT_Real_Base_gfx11<bits<7> op, 2369 string name = get_FLAT_ps<NAME>.Mnemonic> : 2370 FLAT_Aliases_gfx11<name>, 2371 FLAT_Real_gfx11<op, name>; 2372 2373multiclass FLAT_Real_Atomics_gfx11<bits<7> op, 2374 string name = get_FLAT_ps<NAME>.Mnemonic> : 2375 FLAT_Real_Base_gfx11<op, name> { 2376 defm _RTN : FLAT_Real_gfx11<op, name>; 2377} 2378 2379multiclass GLOBAL_Real_AllAddr_gfx11<bits<7> op, 2380 string name = get_FLAT_ps<NAME>.Mnemonic> : 2381 FLAT_Real_Base_gfx11<op, name> { 2382 defm _SADDR : FLAT_Real_gfx11<op, name>; 2383} 2384 2385multiclass GLOBAL_Real_Atomics_gfx11<bits<7> op, 2386 string name = get_FLAT_ps<NAME>.Mnemonic> : 2387 GLOBAL_Real_AllAddr_gfx11<op, name> { 2388 defm _RTN : FLAT_Real_gfx11<op, name>; 2389 defm _SADDR_RTN : FLAT_Real_gfx11<op, name>; 2390} 2391 2392multiclass SCRATCH_Real_AllAddr_gfx11<bits<7> op, 2393 string name = get_FLAT_ps<NAME>.Mnemonic> : 2394 FLAT_Real_Base_gfx11<op, name> { 2395 defm _SADDR : FLAT_Real_gfx11<op, name>; 2396 defm _ST : FLAT_Real_gfx11<op, name>; 2397 defm _SVS : FLAT_Real_gfx11<op, name>; 2398} 2399 2400// ENC_FLAT. 2401defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx11<0x010, "flat_load_u8">; 2402defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx11<0x011, "flat_load_i8">; 2403defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx11<0x012, "flat_load_u16">; 2404defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx11<0x013, "flat_load_i16">; 2405defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx11<0x014, "flat_load_b32">; 2406defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx11<0x015, "flat_load_b64">; 2407defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx11<0x016, "flat_load_b96">; 2408defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx11<0x017, "flat_load_b128">; 2409defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx11<0x018, "flat_store_b8">; 2410defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx11<0x019, "flat_store_b16">; 2411defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx11<0x01a, "flat_store_b32">; 2412defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx11<0x01b, "flat_store_b64">; 2413defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx11<0x01c, "flat_store_b96">; 2414defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx11<0x01d, "flat_store_b128">; 2415defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx11<0x01e, "flat_load_d16_u8">; 2416defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx11<0x01f, "flat_load_d16_i8">; 2417defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx11<0x020, "flat_load_d16_b16">; 2418defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx11<0x021, "flat_load_d16_hi_u8">; 2419defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx11<0x022, "flat_load_d16_hi_i8">; 2420defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx11<0x023, "flat_load_d16_hi_b16">; 2421defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx11<0x024, "flat_store_d16_hi_b8">; 2422defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx11<0x025, "flat_store_d16_hi_b16">; 2423defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx11<0x033, "flat_atomic_swap_b32">; 2424defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx11<0x034, "flat_atomic_cmpswap_b32">; 2425defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx11<0x035, "flat_atomic_add_u32">; 2426defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx11<0x036, "flat_atomic_sub_u32">; 2427defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx11<0x038, "flat_atomic_min_i32">; 2428defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx11<0x039, "flat_atomic_min_u32">; 2429defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx11<0x03a, "flat_atomic_max_i32">; 2430defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx11<0x03b, "flat_atomic_max_u32">; 2431defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx11<0x03c, "flat_atomic_and_b32">; 2432defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx11<0x03d, "flat_atomic_or_b32">; 2433defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx11<0x03e, "flat_atomic_xor_b32">; 2434defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx11<0x03f, "flat_atomic_inc_u32">; 2435defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx11<0x040, "flat_atomic_dec_u32">; 2436defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx11<0x041, "flat_atomic_swap_b64">; 2437defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx11<0x042, "flat_atomic_cmpswap_b64">; 2438defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx11<0x043, "flat_atomic_add_u64">; 2439defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx11<0x044, "flat_atomic_sub_u64">; 2440defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx11<0x045, "flat_atomic_min_i64">; 2441defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx11<0x046, "flat_atomic_min_u64">; 2442defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx11<0x047, "flat_atomic_max_i64">; 2443defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx11<0x048, "flat_atomic_max_u64">; 2444defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx11<0x049, "flat_atomic_and_b64">; 2445defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx11<0x04a, "flat_atomic_or_b64">; 2446defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx11<0x04b, "flat_atomic_xor_b64">; 2447defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx11<0x04c, "flat_atomic_inc_u64">; 2448defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx11<0x04d, "flat_atomic_dec_u64">; 2449defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx11<0x050, "flat_atomic_cmpswap_f32">; 2450defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx11<0x051, "flat_atomic_min_f32">; 2451defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx11<0x052, "flat_atomic_max_f32">; 2452defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_gfx11<0x056>; 2453 2454// ENC_FLAT_GLBL. 2455defm GLOBAL_LOAD_UBYTE : GLOBAL_Real_AllAddr_gfx11<0x010, "global_load_u8">; 2456defm GLOBAL_LOAD_SBYTE : GLOBAL_Real_AllAddr_gfx11<0x011, "global_load_i8">; 2457defm GLOBAL_LOAD_USHORT : GLOBAL_Real_AllAddr_gfx11<0x012, "global_load_u16">; 2458defm GLOBAL_LOAD_SSHORT : GLOBAL_Real_AllAddr_gfx11<0x013, "global_load_i16">; 2459defm GLOBAL_LOAD_DWORD : GLOBAL_Real_AllAddr_gfx11<0x014, "global_load_b32">; 2460defm GLOBAL_LOAD_DWORDX2 : GLOBAL_Real_AllAddr_gfx11<0x015, "global_load_b64">; 2461defm GLOBAL_LOAD_DWORDX3 : GLOBAL_Real_AllAddr_gfx11<0x016, "global_load_b96">; 2462defm GLOBAL_LOAD_DWORDX4 : GLOBAL_Real_AllAddr_gfx11<0x017, "global_load_b128">; 2463defm GLOBAL_STORE_BYTE : GLOBAL_Real_AllAddr_gfx11<0x018, "global_store_b8">; 2464defm GLOBAL_STORE_SHORT : GLOBAL_Real_AllAddr_gfx11<0x019, "global_store_b16">; 2465defm GLOBAL_STORE_DWORD : GLOBAL_Real_AllAddr_gfx11<0x01a, "global_store_b32">; 2466defm GLOBAL_STORE_DWORDX2 : GLOBAL_Real_AllAddr_gfx11<0x01b, "global_store_b64">; 2467defm GLOBAL_STORE_DWORDX3 : GLOBAL_Real_AllAddr_gfx11<0x01c, "global_store_b96">; 2468defm GLOBAL_STORE_DWORDX4 : GLOBAL_Real_AllAddr_gfx11<0x01d, "global_store_b128">; 2469defm GLOBAL_LOAD_UBYTE_D16 : GLOBAL_Real_AllAddr_gfx11<0x01e, "global_load_d16_u8">; 2470defm GLOBAL_LOAD_SBYTE_D16 : GLOBAL_Real_AllAddr_gfx11<0x01f, "global_load_d16_i8">; 2471defm GLOBAL_LOAD_SHORT_D16 : GLOBAL_Real_AllAddr_gfx11<0x020, "global_load_d16_b16">; 2472defm GLOBAL_LOAD_UBYTE_D16_HI : GLOBAL_Real_AllAddr_gfx11<0x021, "global_load_d16_hi_u8">; 2473defm GLOBAL_LOAD_SBYTE_D16_HI : GLOBAL_Real_AllAddr_gfx11<0x022, "global_load_d16_hi_i8">; 2474defm GLOBAL_LOAD_SHORT_D16_HI : GLOBAL_Real_AllAddr_gfx11<0x023, "global_load_d16_hi_b16">; 2475defm GLOBAL_STORE_BYTE_D16_HI : GLOBAL_Real_AllAddr_gfx11<0x024, "global_store_d16_hi_b8">; 2476defm GLOBAL_STORE_SHORT_D16_HI : GLOBAL_Real_AllAddr_gfx11<0x025, "global_store_d16_hi_b16">; 2477defm GLOBAL_LOAD_DWORD_ADDTID : GLOBAL_Real_AllAddr_gfx11<0x028, "global_load_addtid_b32">; 2478defm GLOBAL_STORE_DWORD_ADDTID : GLOBAL_Real_AllAddr_gfx11<0x029, "global_store_addtid_b32">; 2479defm GLOBAL_ATOMIC_SWAP : GLOBAL_Real_Atomics_gfx11<0x033, "global_atomic_swap_b32">; 2480defm GLOBAL_ATOMIC_CMPSWAP : GLOBAL_Real_Atomics_gfx11<0x034, "global_atomic_cmpswap_b32">; 2481defm GLOBAL_ATOMIC_ADD : GLOBAL_Real_Atomics_gfx11<0x035, "global_atomic_add_u32">; 2482defm GLOBAL_ATOMIC_SUB : GLOBAL_Real_Atomics_gfx11<0x036, "global_atomic_sub_u32">; 2483defm GLOBAL_ATOMIC_CSUB : GLOBAL_Real_Atomics_gfx11<0x037, "global_atomic_csub_u32">; 2484defm GLOBAL_ATOMIC_SMIN : GLOBAL_Real_Atomics_gfx11<0x038, "global_atomic_min_i32">; 2485defm GLOBAL_ATOMIC_UMIN : GLOBAL_Real_Atomics_gfx11<0x039, "global_atomic_min_u32">; 2486defm GLOBAL_ATOMIC_SMAX : GLOBAL_Real_Atomics_gfx11<0x03a, "global_atomic_max_i32">; 2487defm GLOBAL_ATOMIC_UMAX : GLOBAL_Real_Atomics_gfx11<0x03b, "global_atomic_max_u32">; 2488defm GLOBAL_ATOMIC_AND : GLOBAL_Real_Atomics_gfx11<0x03c, "global_atomic_and_b32">; 2489defm GLOBAL_ATOMIC_OR : GLOBAL_Real_Atomics_gfx11<0x03d, "global_atomic_or_b32">; 2490defm GLOBAL_ATOMIC_XOR : GLOBAL_Real_Atomics_gfx11<0x03e, "global_atomic_xor_b32">; 2491defm GLOBAL_ATOMIC_INC : GLOBAL_Real_Atomics_gfx11<0x03f, "global_atomic_inc_u32">; 2492defm GLOBAL_ATOMIC_DEC : GLOBAL_Real_Atomics_gfx11<0x040, "global_atomic_dec_u32">; 2493defm GLOBAL_ATOMIC_SWAP_X2 : GLOBAL_Real_Atomics_gfx11<0x041, "global_atomic_swap_b64">; 2494defm GLOBAL_ATOMIC_CMPSWAP_X2 : GLOBAL_Real_Atomics_gfx11<0x042, "global_atomic_cmpswap_b64">; 2495defm GLOBAL_ATOMIC_ADD_X2 : GLOBAL_Real_Atomics_gfx11<0x043, "global_atomic_add_u64">; 2496defm GLOBAL_ATOMIC_SUB_X2 : GLOBAL_Real_Atomics_gfx11<0x044, "global_atomic_sub_u64">; 2497defm GLOBAL_ATOMIC_SMIN_X2 : GLOBAL_Real_Atomics_gfx11<0x045, "global_atomic_min_i64">; 2498defm GLOBAL_ATOMIC_UMIN_X2 : GLOBAL_Real_Atomics_gfx11<0x046, "global_atomic_min_u64">; 2499defm GLOBAL_ATOMIC_SMAX_X2 : GLOBAL_Real_Atomics_gfx11<0x047, "global_atomic_max_i64">; 2500defm GLOBAL_ATOMIC_UMAX_X2 : GLOBAL_Real_Atomics_gfx11<0x048, "global_atomic_max_u64">; 2501defm GLOBAL_ATOMIC_AND_X2 : GLOBAL_Real_Atomics_gfx11<0x049, "global_atomic_and_b64">; 2502defm GLOBAL_ATOMIC_OR_X2 : GLOBAL_Real_Atomics_gfx11<0x04a, "global_atomic_or_b64">; 2503defm GLOBAL_ATOMIC_XOR_X2 : GLOBAL_Real_Atomics_gfx11<0x04b, "global_atomic_xor_b64">; 2504defm GLOBAL_ATOMIC_INC_X2 : GLOBAL_Real_Atomics_gfx11<0x04c, "global_atomic_inc_u64">; 2505defm GLOBAL_ATOMIC_DEC_X2 : GLOBAL_Real_Atomics_gfx11<0x04d, "global_atomic_dec_u64">; 2506defm GLOBAL_ATOMIC_FCMPSWAP : GLOBAL_Real_Atomics_gfx11<0x050, "global_atomic_cmpswap_f32">; 2507defm GLOBAL_ATOMIC_FMIN : GLOBAL_Real_Atomics_gfx11<0x051, "global_atomic_min_f32">; 2508defm GLOBAL_ATOMIC_FMAX : GLOBAL_Real_Atomics_gfx11<0x052, "global_atomic_max_f32">; 2509defm GLOBAL_ATOMIC_ADD_F32 : GLOBAL_Real_Atomics_gfx11<0x056>; 2510 2511// ENC_FLAT_SCRATCH. 2512defm SCRATCH_LOAD_UBYTE : SCRATCH_Real_AllAddr_gfx11<0x10, "scratch_load_u8">; 2513defm SCRATCH_LOAD_SBYTE : SCRATCH_Real_AllAddr_gfx11<0x11, "scratch_load_i8">; 2514defm SCRATCH_LOAD_USHORT : SCRATCH_Real_AllAddr_gfx11<0x12, "scratch_load_u16">; 2515defm SCRATCH_LOAD_SSHORT : SCRATCH_Real_AllAddr_gfx11<0x13, "scratch_load_i16">; 2516defm SCRATCH_LOAD_DWORD : SCRATCH_Real_AllAddr_gfx11<0x14, "scratch_load_b32">; 2517defm SCRATCH_LOAD_DWORDX2 : SCRATCH_Real_AllAddr_gfx11<0x15, "scratch_load_b64">; 2518defm SCRATCH_LOAD_DWORDX3 : SCRATCH_Real_AllAddr_gfx11<0x16, "scratch_load_b96">; 2519defm SCRATCH_LOAD_DWORDX4 : SCRATCH_Real_AllAddr_gfx11<0x17, "scratch_load_b128">; 2520defm SCRATCH_STORE_BYTE : SCRATCH_Real_AllAddr_gfx11<0x18, "scratch_store_b8">; 2521defm SCRATCH_STORE_SHORT : SCRATCH_Real_AllAddr_gfx11<0x19, "scratch_store_b16">; 2522defm SCRATCH_STORE_DWORD : SCRATCH_Real_AllAddr_gfx11<0x1a, "scratch_store_b32">; 2523defm SCRATCH_STORE_DWORDX2 : SCRATCH_Real_AllAddr_gfx11<0x1b, "scratch_store_b64">; 2524defm SCRATCH_STORE_DWORDX3 : SCRATCH_Real_AllAddr_gfx11<0x1c, "scratch_store_b96">; 2525defm SCRATCH_STORE_DWORDX4 : SCRATCH_Real_AllAddr_gfx11<0x1d, "scratch_store_b128">; 2526defm SCRATCH_LOAD_UBYTE_D16 : SCRATCH_Real_AllAddr_gfx11<0x1e, "scratch_load_d16_u8">; 2527defm SCRATCH_LOAD_SBYTE_D16 : SCRATCH_Real_AllAddr_gfx11<0x1f, "scratch_load_d16_i8">; 2528defm SCRATCH_LOAD_SHORT_D16 : SCRATCH_Real_AllAddr_gfx11<0x20, "scratch_load_d16_b16">; 2529defm SCRATCH_LOAD_UBYTE_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x21, "scratch_load_d16_hi_u8">; 2530defm SCRATCH_LOAD_SBYTE_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x22, "scratch_load_d16_hi_i8">; 2531defm SCRATCH_LOAD_SHORT_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x23, "scratch_load_d16_hi_b16">; 2532defm SCRATCH_STORE_BYTE_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x24, "scratch_store_d16_hi_b8">; 2533defm SCRATCH_STORE_SHORT_D16_HI : SCRATCH_Real_AllAddr_gfx11<0x25, "scratch_store_d16_hi_b16">; 2534 2535//===----------------------------------------------------------------------===// 2536// GFX12 2537//===----------------------------------------------------------------------===// 2538 2539multiclass VFLAT_Real_gfx12 <bits<8> op, string name = get_FLAT_ps<NAME>.Mnemonic> { 2540 defvar ps = !cast<FLAT_Pseudo>(NAME); 2541 def _gfx12 : VFLAT_Real <op, ps, name>, 2542 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX12> { 2543 let AssemblerPredicate = isGFX12Only; 2544 let DecoderNamespace = "GFX12"; 2545 2546 let Inst{25-24} = {ps.is_flat_global, ps.is_flat_scratch}; 2547 } 2548} 2549 2550multiclass VFLAT_Aliases_gfx12<string name, string alias = name> { 2551 defvar ps = get_FLAT_ps<NAME>; 2552 let AssemblerPredicate = isGFX12Only in { 2553 if !ne(ps.Mnemonic, name) then 2554 def : AMDGPUMnemonicAlias<ps.Mnemonic, name>; 2555 if !ne(alias, name) then 2556 def : AMDGPUMnemonicAlias<alias, name>; 2557 } 2558} 2559 2560multiclass VFLAT_Real_Base_gfx12<bits<8> op, 2561 string name = get_FLAT_ps<NAME>.Mnemonic, 2562 string alias = name> : 2563 VFLAT_Aliases_gfx12<name, alias>, 2564 VFLAT_Real_gfx12<op, name>; 2565 2566multiclass VFLAT_Real_Atomics_gfx12<bits<8> op, 2567 string name = get_FLAT_ps<NAME>.Mnemonic, 2568 string alias = name> : 2569 VFLAT_Real_Base_gfx12<op, name, alias> { 2570 defm _RTN : VFLAT_Real_gfx12<op, name>; 2571} 2572 2573multiclass VGLOBAL_Real_AllAddr_gfx12<bits<8> op, 2574 string name = get_FLAT_ps<NAME>.Mnemonic, 2575 string alias = name> : 2576 VFLAT_Real_Base_gfx12<op, name, alias> { 2577 defm _SADDR : VFLAT_Real_gfx12<op, name>; 2578} 2579 2580multiclass VGLOBAL_Real_AllAddr_gfx12_w64<bits<8> op, 2581 string name = get_FLAT_ps<NAME>.Mnemonic> : 2582 VFLAT_Aliases_gfx12<name> { 2583 let DecoderNamespace = "GFX12W64" in { 2584 defm "" : VFLAT_Real_gfx12<op, name>; 2585 defm _SADDR : VFLAT_Real_gfx12<op, name>; 2586 } 2587} 2588 2589multiclass VGLOBAL_Real_Atomics_gfx12<bits<8> op, 2590 string name = get_FLAT_ps<NAME>.Mnemonic, 2591 string alias = name> : 2592 VGLOBAL_Real_AllAddr_gfx12<op, name, alias> { 2593 defm _RTN : VFLAT_Real_gfx12<op, name>; 2594 defm _SADDR_RTN : VFLAT_Real_gfx12<op, name>; 2595} 2596 2597multiclass VSCRATCH_Real_AllAddr_gfx12<bits<8> op, 2598 string name = get_FLAT_ps<NAME>.Mnemonic> : 2599 VFLAT_Real_Base_gfx12<op, name> { 2600 defm _SADDR : VFLAT_Real_gfx12<op, name>; 2601 defm _ST : VFLAT_Real_gfx12<op, name>; 2602 defm _SVS : VFLAT_Real_gfx12<op, name>; 2603} 2604 2605// ENC_VFLAT. 2606defm FLAT_LOAD_UBYTE : VFLAT_Real_Base_gfx12<0x010, "flat_load_u8">; 2607defm FLAT_LOAD_SBYTE : VFLAT_Real_Base_gfx12<0x011, "flat_load_i8">; 2608defm FLAT_LOAD_USHORT : VFLAT_Real_Base_gfx12<0x012, "flat_load_u16">; 2609defm FLAT_LOAD_SSHORT : VFLAT_Real_Base_gfx12<0x013, "flat_load_i16">; 2610defm FLAT_LOAD_DWORD : VFLAT_Real_Base_gfx12<0x014, "flat_load_b32">; 2611defm FLAT_LOAD_DWORDX2 : VFLAT_Real_Base_gfx12<0x015, "flat_load_b64">; 2612defm FLAT_LOAD_DWORDX3 : VFLAT_Real_Base_gfx12<0x016, "flat_load_b96">; 2613defm FLAT_LOAD_DWORDX4 : VFLAT_Real_Base_gfx12<0x017, "flat_load_b128">; 2614defm FLAT_STORE_BYTE : VFLAT_Real_Base_gfx12<0x018, "flat_store_b8">; 2615defm FLAT_STORE_SHORT : VFLAT_Real_Base_gfx12<0x019, "flat_store_b16">; 2616defm FLAT_STORE_DWORD : VFLAT_Real_Base_gfx12<0x01a, "flat_store_b32">; 2617defm FLAT_STORE_DWORDX2 : VFLAT_Real_Base_gfx12<0x01b, "flat_store_b64">; 2618defm FLAT_STORE_DWORDX3 : VFLAT_Real_Base_gfx12<0x01c, "flat_store_b96">; 2619defm FLAT_STORE_DWORDX4 : VFLAT_Real_Base_gfx12<0x01d, "flat_store_b128">; 2620defm FLAT_LOAD_UBYTE_D16 : VFLAT_Real_Base_gfx12<0x01e, "flat_load_d16_u8">; 2621defm FLAT_LOAD_SBYTE_D16 : VFLAT_Real_Base_gfx12<0x01f, "flat_load_d16_i8">; 2622defm FLAT_LOAD_SHORT_D16 : VFLAT_Real_Base_gfx12<0x020, "flat_load_d16_b16">; 2623defm FLAT_LOAD_UBYTE_D16_HI : VFLAT_Real_Base_gfx12<0x021, "flat_load_d16_hi_u8">; 2624defm FLAT_LOAD_SBYTE_D16_HI : VFLAT_Real_Base_gfx12<0x022, "flat_load_d16_hi_i8">; 2625defm FLAT_LOAD_SHORT_D16_HI : VFLAT_Real_Base_gfx12<0x023, "flat_load_d16_hi_b16">; 2626defm FLAT_STORE_BYTE_D16_HI : VFLAT_Real_Base_gfx12<0x024, "flat_store_d16_hi_b8">; 2627defm FLAT_STORE_SHORT_D16_HI : VFLAT_Real_Base_gfx12<0x025, "flat_store_d16_hi_b16">; 2628defm FLAT_ATOMIC_SWAP : VFLAT_Real_Atomics_gfx12<0x033, "flat_atomic_swap_b32">; 2629defm FLAT_ATOMIC_CMPSWAP : VFLAT_Real_Atomics_gfx12<0x034, "flat_atomic_cmpswap_b32">; 2630defm FLAT_ATOMIC_ADD : VFLAT_Real_Atomics_gfx12<0x035, "flat_atomic_add_u32">; 2631defm FLAT_ATOMIC_SUB : VFLAT_Real_Atomics_gfx12<0x036, "flat_atomic_sub_u32">; 2632defm FLAT_ATOMIC_CSUB_U32 : VFLAT_Real_Atomics_gfx12<0x037, "flat_atomic_sub_clamp_u32">; 2633defm FLAT_ATOMIC_SMIN : VFLAT_Real_Atomics_gfx12<0x038, "flat_atomic_min_i32">; 2634defm FLAT_ATOMIC_UMIN : VFLAT_Real_Atomics_gfx12<0x039, "flat_atomic_min_u32">; 2635defm FLAT_ATOMIC_SMAX : VFLAT_Real_Atomics_gfx12<0x03a, "flat_atomic_max_i32">; 2636defm FLAT_ATOMIC_UMAX : VFLAT_Real_Atomics_gfx12<0x03b, "flat_atomic_max_u32">; 2637defm FLAT_ATOMIC_AND : VFLAT_Real_Atomics_gfx12<0x03c, "flat_atomic_and_b32">; 2638defm FLAT_ATOMIC_OR : VFLAT_Real_Atomics_gfx12<0x03d, "flat_atomic_or_b32">; 2639defm FLAT_ATOMIC_XOR : VFLAT_Real_Atomics_gfx12<0x03e, "flat_atomic_xor_b32">; 2640defm FLAT_ATOMIC_INC : VFLAT_Real_Atomics_gfx12<0x03f, "flat_atomic_inc_u32">; 2641defm FLAT_ATOMIC_DEC : VFLAT_Real_Atomics_gfx12<0x040, "flat_atomic_dec_u32">; 2642defm FLAT_ATOMIC_SWAP_X2 : VFLAT_Real_Atomics_gfx12<0x041, "flat_atomic_swap_b64">; 2643defm FLAT_ATOMIC_CMPSWAP_X2 : VFLAT_Real_Atomics_gfx12<0x042, "flat_atomic_cmpswap_b64">; 2644defm FLAT_ATOMIC_ADD_X2 : VFLAT_Real_Atomics_gfx12<0x043, "flat_atomic_add_u64">; 2645defm FLAT_ATOMIC_SUB_X2 : VFLAT_Real_Atomics_gfx12<0x044, "flat_atomic_sub_u64">; 2646defm FLAT_ATOMIC_SMIN_X2 : VFLAT_Real_Atomics_gfx12<0x045, "flat_atomic_min_i64">; 2647defm FLAT_ATOMIC_UMIN_X2 : VFLAT_Real_Atomics_gfx12<0x046, "flat_atomic_min_u64">; 2648defm FLAT_ATOMIC_SMAX_X2 : VFLAT_Real_Atomics_gfx12<0x047, "flat_atomic_max_i64">; 2649defm FLAT_ATOMIC_UMAX_X2 : VFLAT_Real_Atomics_gfx12<0x048, "flat_atomic_max_u64">; 2650defm FLAT_ATOMIC_AND_X2 : VFLAT_Real_Atomics_gfx12<0x049, "flat_atomic_and_b64">; 2651defm FLAT_ATOMIC_OR_X2 : VFLAT_Real_Atomics_gfx12<0x04a, "flat_atomic_or_b64">; 2652defm FLAT_ATOMIC_XOR_X2 : VFLAT_Real_Atomics_gfx12<0x04b, "flat_atomic_xor_b64">; 2653defm FLAT_ATOMIC_INC_X2 : VFLAT_Real_Atomics_gfx12<0x04c, "flat_atomic_inc_u64">; 2654defm FLAT_ATOMIC_DEC_X2 : VFLAT_Real_Atomics_gfx12<0x04d, "flat_atomic_dec_u64">; 2655defm FLAT_ATOMIC_COND_SUB_U32 : VFLAT_Real_Atomics_gfx12<0x050>; 2656defm FLAT_ATOMIC_FMIN : VFLAT_Real_Atomics_gfx12<0x051, "flat_atomic_min_num_f32", "flat_atomic_min_f32">; 2657defm FLAT_ATOMIC_FMAX : VFLAT_Real_Atomics_gfx12<0x052, "flat_atomic_max_num_f32", "flat_atomic_max_f32">; 2658defm FLAT_ATOMIC_ADD_F32 : VFLAT_Real_Atomics_gfx12<0x056>; 2659defm FLAT_ATOMIC_PK_ADD_F16 : VFLAT_Real_Atomics_gfx12<0x059>; 2660defm FLAT_ATOMIC_PK_ADD_BF16 : VFLAT_Real_Atomics_gfx12<0x05a>; 2661 2662// ENC_VGLOBAL. 2663defm GLOBAL_LOAD_UBYTE : VGLOBAL_Real_AllAddr_gfx12<0x010, "global_load_u8">; 2664defm GLOBAL_LOAD_SBYTE : VGLOBAL_Real_AllAddr_gfx12<0x011, "global_load_i8">; 2665defm GLOBAL_LOAD_USHORT : VGLOBAL_Real_AllAddr_gfx12<0x012, "global_load_u16">; 2666defm GLOBAL_LOAD_SSHORT : VGLOBAL_Real_AllAddr_gfx12<0x013, "global_load_i16">; 2667defm GLOBAL_LOAD_DWORD : VGLOBAL_Real_AllAddr_gfx12<0x014, "global_load_b32">; 2668defm GLOBAL_LOAD_DWORDX2 : VGLOBAL_Real_AllAddr_gfx12<0x015, "global_load_b64">; 2669defm GLOBAL_LOAD_DWORDX3 : VGLOBAL_Real_AllAddr_gfx12<0x016, "global_load_b96">; 2670defm GLOBAL_LOAD_DWORDX4 : VGLOBAL_Real_AllAddr_gfx12<0x017, "global_load_b128">; 2671defm GLOBAL_STORE_BYTE : VGLOBAL_Real_AllAddr_gfx12<0x018, "global_store_b8">; 2672defm GLOBAL_STORE_SHORT : VGLOBAL_Real_AllAddr_gfx12<0x019, "global_store_b16">; 2673defm GLOBAL_STORE_DWORD : VGLOBAL_Real_AllAddr_gfx12<0x01a, "global_store_b32">; 2674defm GLOBAL_STORE_DWORDX2 : VGLOBAL_Real_AllAddr_gfx12<0x01b, "global_store_b64">; 2675defm GLOBAL_STORE_DWORDX3 : VGLOBAL_Real_AllAddr_gfx12<0x01c, "global_store_b96">; 2676defm GLOBAL_STORE_DWORDX4 : VGLOBAL_Real_AllAddr_gfx12<0x01d, "global_store_b128">; 2677defm GLOBAL_LOAD_UBYTE_D16 : VGLOBAL_Real_AllAddr_gfx12<0x01e, "global_load_d16_u8">; 2678defm GLOBAL_LOAD_SBYTE_D16 : VGLOBAL_Real_AllAddr_gfx12<0x01f, "global_load_d16_i8">; 2679defm GLOBAL_LOAD_SHORT_D16 : VGLOBAL_Real_AllAddr_gfx12<0x020, "global_load_d16_b16">; 2680defm GLOBAL_LOAD_UBYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x021, "global_load_d16_hi_u8">; 2681defm GLOBAL_LOAD_SBYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x022, "global_load_d16_hi_i8">; 2682defm GLOBAL_LOAD_SHORT_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x023, "global_load_d16_hi_b16">; 2683defm GLOBAL_STORE_BYTE_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x024, "global_store_d16_hi_b8">; 2684defm GLOBAL_STORE_SHORT_D16_HI : VGLOBAL_Real_AllAddr_gfx12<0x025, "global_store_d16_hi_b16">; 2685defm GLOBAL_LOAD_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x028, "global_load_addtid_b32">; 2686defm GLOBAL_STORE_DWORD_ADDTID : VGLOBAL_Real_AllAddr_gfx12<0x029, "global_store_addtid_b32">; 2687defm GLOBAL_LOAD_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x053>; 2688defm GLOBAL_STORE_BLOCK : VGLOBAL_Real_AllAddr_gfx12<0x054>; 2689 2690defm GLOBAL_ATOMIC_SWAP : VGLOBAL_Real_Atomics_gfx12<0x033, "global_atomic_swap_b32">; 2691defm GLOBAL_ATOMIC_CMPSWAP : VGLOBAL_Real_Atomics_gfx12<0x034, "global_atomic_cmpswap_b32">; 2692defm GLOBAL_ATOMIC_ADD : VGLOBAL_Real_Atomics_gfx12<0x035, "global_atomic_add_u32">; 2693defm GLOBAL_ATOMIC_SUB : VGLOBAL_Real_Atomics_gfx12<0x036, "global_atomic_sub_u32">; 2694defm GLOBAL_ATOMIC_CSUB : VGLOBAL_Real_Atomics_gfx12<0x037, "global_atomic_sub_clamp_u32", "global_atomic_csub_u32">; 2695defm GLOBAL_ATOMIC_SMIN : VGLOBAL_Real_Atomics_gfx12<0x038, "global_atomic_min_i32">; 2696defm GLOBAL_ATOMIC_UMIN : VGLOBAL_Real_Atomics_gfx12<0x039, "global_atomic_min_u32">; 2697defm GLOBAL_ATOMIC_SMAX : VGLOBAL_Real_Atomics_gfx12<0x03a, "global_atomic_max_i32">; 2698defm GLOBAL_ATOMIC_UMAX : VGLOBAL_Real_Atomics_gfx12<0x03b, "global_atomic_max_u32">; 2699defm GLOBAL_ATOMIC_AND : VGLOBAL_Real_Atomics_gfx12<0x03c, "global_atomic_and_b32">; 2700defm GLOBAL_ATOMIC_OR : VGLOBAL_Real_Atomics_gfx12<0x03d, "global_atomic_or_b32">; 2701defm GLOBAL_ATOMIC_XOR : VGLOBAL_Real_Atomics_gfx12<0x03e, "global_atomic_xor_b32">; 2702defm GLOBAL_ATOMIC_INC : VGLOBAL_Real_Atomics_gfx12<0x03f, "global_atomic_inc_u32">; 2703defm GLOBAL_ATOMIC_DEC : VGLOBAL_Real_Atomics_gfx12<0x040, "global_atomic_dec_u32">; 2704defm GLOBAL_ATOMIC_SWAP_X2 : VGLOBAL_Real_Atomics_gfx12<0x041, "global_atomic_swap_b64">; 2705defm GLOBAL_ATOMIC_CMPSWAP_X2 : VGLOBAL_Real_Atomics_gfx12<0x042, "global_atomic_cmpswap_b64">; 2706defm GLOBAL_ATOMIC_ADD_X2 : VGLOBAL_Real_Atomics_gfx12<0x043, "global_atomic_add_u64">; 2707defm GLOBAL_ATOMIC_SUB_X2 : VGLOBAL_Real_Atomics_gfx12<0x044, "global_atomic_sub_u64">; 2708defm GLOBAL_ATOMIC_SMIN_X2 : VGLOBAL_Real_Atomics_gfx12<0x045, "global_atomic_min_i64">; 2709defm GLOBAL_ATOMIC_UMIN_X2 : VGLOBAL_Real_Atomics_gfx12<0x046, "global_atomic_min_u64">; 2710defm GLOBAL_ATOMIC_SMAX_X2 : VGLOBAL_Real_Atomics_gfx12<0x047, "global_atomic_max_i64">; 2711defm GLOBAL_ATOMIC_UMAX_X2 : VGLOBAL_Real_Atomics_gfx12<0x048, "global_atomic_max_u64">; 2712defm GLOBAL_ATOMIC_AND_X2 : VGLOBAL_Real_Atomics_gfx12<0x049, "global_atomic_and_b64">; 2713defm GLOBAL_ATOMIC_OR_X2 : VGLOBAL_Real_Atomics_gfx12<0x04a, "global_atomic_or_b64">; 2714defm GLOBAL_ATOMIC_XOR_X2 : VGLOBAL_Real_Atomics_gfx12<0x04b, "global_atomic_xor_b64">; 2715defm GLOBAL_ATOMIC_INC_X2 : VGLOBAL_Real_Atomics_gfx12<0x04c, "global_atomic_inc_u64">; 2716defm GLOBAL_ATOMIC_DEC_X2 : VGLOBAL_Real_Atomics_gfx12<0x04d, "global_atomic_dec_u64">; 2717defm GLOBAL_ATOMIC_COND_SUB_U32 : VGLOBAL_Real_Atomics_gfx12<0x050>; 2718defm GLOBAL_ATOMIC_FMIN : VGLOBAL_Real_Atomics_gfx12<0x051, "global_atomic_min_num_f32", "global_atomic_min_f32">; 2719defm GLOBAL_ATOMIC_FMAX : VGLOBAL_Real_Atomics_gfx12<0x052, "global_atomic_max_num_f32", "global_atomic_max_f32">; 2720defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056>; 2721 2722defm GLOBAL_LOAD_TR_B128_w32 : VGLOBAL_Real_AllAddr_gfx12<0x057>; 2723defm GLOBAL_LOAD_TR_B64_w32 : VGLOBAL_Real_AllAddr_gfx12<0x058>; 2724 2725defm GLOBAL_LOAD_TR_B128_w64 : VGLOBAL_Real_AllAddr_gfx12_w64<0x057>; 2726defm GLOBAL_LOAD_TR_B64_w64 : VGLOBAL_Real_AllAddr_gfx12_w64<0x058>; 2727 2728defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073>; 2729defm GLOBAL_ATOMIC_PK_ADD_F16 : VGLOBAL_Real_Atomics_gfx12<0x059>; 2730defm GLOBAL_ATOMIC_PK_ADD_BF16 : VGLOBAL_Real_Atomics_gfx12<0x05a>; 2731 2732defm GLOBAL_INV : VFLAT_Real_Base_gfx12<0x02b>; 2733defm GLOBAL_WB : VFLAT_Real_Base_gfx12<0x02c>; 2734defm GLOBAL_WBINV : VFLAT_Real_Base_gfx12<0x04f>; 2735 2736// ENC_VSCRATCH. 2737defm SCRATCH_LOAD_UBYTE : VSCRATCH_Real_AllAddr_gfx12<0x10, "scratch_load_u8">; 2738defm SCRATCH_LOAD_SBYTE : VSCRATCH_Real_AllAddr_gfx12<0x11, "scratch_load_i8">; 2739defm SCRATCH_LOAD_USHORT : VSCRATCH_Real_AllAddr_gfx12<0x12, "scratch_load_u16">; 2740defm SCRATCH_LOAD_SSHORT : VSCRATCH_Real_AllAddr_gfx12<0x13, "scratch_load_i16">; 2741defm SCRATCH_LOAD_DWORD : VSCRATCH_Real_AllAddr_gfx12<0x14, "scratch_load_b32">; 2742defm SCRATCH_LOAD_DWORDX2 : VSCRATCH_Real_AllAddr_gfx12<0x15, "scratch_load_b64">; 2743defm SCRATCH_LOAD_DWORDX3 : VSCRATCH_Real_AllAddr_gfx12<0x16, "scratch_load_b96">; 2744defm SCRATCH_LOAD_DWORDX4 : VSCRATCH_Real_AllAddr_gfx12<0x17, "scratch_load_b128">; 2745defm SCRATCH_STORE_BYTE : VSCRATCH_Real_AllAddr_gfx12<0x18, "scratch_store_b8">; 2746defm SCRATCH_STORE_SHORT : VSCRATCH_Real_AllAddr_gfx12<0x19, "scratch_store_b16">; 2747defm SCRATCH_STORE_DWORD : VSCRATCH_Real_AllAddr_gfx12<0x1a, "scratch_store_b32">; 2748defm SCRATCH_STORE_DWORDX2 : VSCRATCH_Real_AllAddr_gfx12<0x1b, "scratch_store_b64">; 2749defm SCRATCH_STORE_DWORDX3 : VSCRATCH_Real_AllAddr_gfx12<0x1c, "scratch_store_b96">; 2750defm SCRATCH_STORE_DWORDX4 : VSCRATCH_Real_AllAddr_gfx12<0x1d, "scratch_store_b128">; 2751defm SCRATCH_LOAD_UBYTE_D16 : VSCRATCH_Real_AllAddr_gfx12<0x1e, "scratch_load_d16_u8">; 2752defm SCRATCH_LOAD_SBYTE_D16 : VSCRATCH_Real_AllAddr_gfx12<0x1f, "scratch_load_d16_i8">; 2753defm SCRATCH_LOAD_SHORT_D16 : VSCRATCH_Real_AllAddr_gfx12<0x20, "scratch_load_d16_b16">; 2754defm SCRATCH_LOAD_UBYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x21, "scratch_load_d16_hi_u8">; 2755defm SCRATCH_LOAD_SBYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x22, "scratch_load_d16_hi_i8">; 2756defm SCRATCH_LOAD_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x23, "scratch_load_d16_hi_b16">; 2757defm SCRATCH_STORE_BYTE_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x24, "scratch_store_d16_hi_b8">; 2758defm SCRATCH_STORE_SHORT_D16_HI : VSCRATCH_Real_AllAddr_gfx12<0x25, "scratch_store_d16_hi_b16">; 2759 2760defm SCRATCH_LOAD_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x53>; 2761defm SCRATCH_STORE_BLOCK : VSCRATCH_Real_AllAddr_gfx12<0x54>; 2762