1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>; 10def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>; 11def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>; 12 13def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>; 14def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>; 15def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>; 16 17//===----------------------------------------------------------------------===// 18// FLAT classes 19//===----------------------------------------------------------------------===// 20 21class FLAT_Pseudo<string opName, dag outs, dag ins, 22 string asmOps, list<dag> pattern=[]> : 23 InstSI<outs, ins, "", pattern>, 24 SIMCInstr<opName, SIEncodingFamily.NONE> { 25 26 let isPseudo = 1; 27 let isCodeGenOnly = 1; 28 29 let FLAT = 1; 30 31 let UseNamedOperandTable = 1; 32 let hasSideEffects = 0; 33 let SchedRW = [WriteVMEM]; 34 35 string Mnemonic = opName; 36 string AsmOperands = asmOps; 37 38 bits<1> is_flat_global = 0; 39 bits<1> is_flat_scratch = 0; 40 41 bits<1> has_vdst = 1; 42 43 // We need to distinguish having saddr and enabling saddr because 44 // saddr is only valid for scratch and global instructions. Pre-gfx9 45 // these bits were reserved, so we also don't necessarily want to 46 // set these bits to the disabled value for the original flat 47 // segment instructions. 48 bits<1> has_saddr = 0; 49 bits<1> enabled_saddr = 0; 50 bits<7> saddr_value = 0; 51 bits<1> has_vaddr = 1; 52 53 bits<1> has_data = 1; 54 bits<1> has_glc = 1; 55 bits<1> glcValue = 0; 56 bits<1> has_dlc = 1; 57 bits<1> dlcValue = 0; 58 bits<1> has_sccb = 1; 59 bits<1> sccbValue = 0; 60 bits<1> has_sve = 0; // Scratch VGPR Enable 61 bits<1> lds = 0; 62 bits<1> sve = 0; 63 bits<1> has_offset = 1; 64 65 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 66 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 67 68 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 69 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 70 71 // Internally, FLAT instruction are executed as both an LDS and a 72 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 73 // and are not considered done until both have been decremented. 74 let VM_CNT = 1; 75 let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch)); 76 77 let FlatGlobal = is_flat_global; 78 79 let FlatScratch = is_flat_scratch; 80} 81 82class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 83 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 84 Enc64 { 85 86 let isPseudo = 0; 87 let isCodeGenOnly = 0; 88 89 let FLAT = 1; 90 91 // copy relevant pseudo op flags 92 let SubtargetPredicate = ps.SubtargetPredicate; 93 let AsmMatchConverter = ps.AsmMatchConverter; 94 let OtherPredicates = ps.OtherPredicates; 95 let TSFlags = ps.TSFlags; 96 let UseNamedOperandTable = ps.UseNamedOperandTable; 97 let SchedRW = ps.SchedRW; 98 let mayLoad = ps.mayLoad; 99 let mayStore = ps.mayStore; 100 let IsAtomicRet = ps.IsAtomicRet; 101 let IsAtomicNoRet = ps.IsAtomicNoRet; 102 let VM_CNT = ps.VM_CNT; 103 let LGKM_CNT = ps.LGKM_CNT; 104 let VALU = ps.VALU; 105 106 // encoding fields 107 bits<8> vaddr; 108 bits<10> vdata; 109 bits<7> saddr; 110 bits<10> vdst; 111 112 bits<5> cpol; 113 114 // Only valid on gfx9 115 bits<1> lds = ps.lds; // LDS DMA for global and scratch 116 117 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 118 bits<2> seg = !if(ps.is_flat_global, 0b10, 119 !if(ps.is_flat_scratch, 0b01, 0)); 120 121 // Signed offset. Highest bit ignored for flat and treated as 12-bit 122 // unsigned for flat accesses. 123 bits<13> offset; 124 // GFX90A+ only: instruction uses AccVGPR for data 125 bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0)); 126 127 // We don't use tfe right now, and it was removed in gfx9. 128 bits<1> tfe = 0; 129 130 // Only valid on GFX9+ 131 let Inst{12-0} = offset; 132 let Inst{13} = !if(ps.has_sve, ps.sve, lds); 133 let Inst{15-14} = seg; 134 135 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 136 let Inst{17} = cpol{CPolBit.SLC}; 137 let Inst{24-18} = op; 138 let Inst{31-26} = 0x37; // Encoding. 139 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 140 let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?); 141 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 142 143 // 54-48 is reserved. 144 let Inst{55} = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A. 145 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?); 146} 147 148class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 149 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 150 Enc96 { 151 152 let FLAT = 1; 153 154 // copy relevant pseudo op flags 155 let SubtargetPredicate = ps.SubtargetPredicate; 156 let AsmMatchConverter = ps.AsmMatchConverter; 157 let OtherPredicates = ps.OtherPredicates; 158 let TSFlags = ps.TSFlags; 159 let UseNamedOperandTable = ps.UseNamedOperandTable; 160 let SchedRW = ps.SchedRW; 161 let mayLoad = ps.mayLoad; 162 let mayStore = ps.mayStore; 163 let IsAtomicRet = ps.IsAtomicRet; 164 let IsAtomicNoRet = ps.IsAtomicNoRet; 165 let VM_CNT = ps.VM_CNT; 166 let LGKM_CNT = ps.LGKM_CNT; 167 let VALU = ps.VALU; 168 169 bits<7> saddr; 170 bits<8> vdst; 171 bits<6> cpol; 172 bits<8> vdata; // vsrc 173 bits<8> vaddr; 174 bits<24> offset; 175 176 let Inst{6-0} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 177 let Inst{21-14} = op; 178 let Inst{31-26} = 0x3b; 179 let Inst{39-32} = !if(ps.has_vdst, vdst, ?); 180 let Inst{49} = ps.sve; 181 let Inst{54-53} = cpol{2-1}; // th{2-1} 182 let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0} 183 let Inst{51-50} = cpol{4-3}; // scope 184 let Inst{62-55} = !if(ps.has_data, vdata{7-0}, ?); 185 let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?); 186 let Inst{95-72} = !if(ps.has_offset, offset, ?); 187} 188 189class GlobalSaddrTable <bit is_saddr, string Name = ""> { 190 bit IsSaddr = is_saddr; 191 string SaddrOp = Name; 192} 193 194// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 195// same encoding value as exec_hi, so it isn't possible to use that if 196// saddr is 32-bit (which isn't handled here yet). 197class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 198 bit HasTiedOutput = 0, 199 bit HasSaddr = 0, bit EnableSaddr = 0, 200 RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo< 201 opName, 202 (outs vdata_op:$vdst), 203 !con( 204 !con( 205 !if(EnableSaddr, 206 (ins SReg_64:$saddr, VGPR_32:$vaddr), 207 (ins VReg_64:$vaddr)), 208 (ins flat_offset:$offset)), 209 // FIXME: Operands with default values do not work with following non-optional operands. 210 !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), 211 (ins CPol_0:$cpol))), 212 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 213 let has_data = 0; 214 let mayLoad = 1; 215 let has_saddr = HasSaddr; 216 let enabled_saddr = EnableSaddr; 217 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 218 219 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 220 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 221} 222 223class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 224 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 225 opName, 226 (outs), 227 !con( 228 !if(EnableSaddr, 229 (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr), 230 (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)), 231 (ins flat_offset:$offset, CPol_0:$cpol)), 232 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 233 let mayLoad = 0; 234 let mayStore = 1; 235 let has_vdst = 0; 236 let has_saddr = HasSaddr; 237 let enabled_saddr = EnableSaddr; 238 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 239} 240 241multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 242 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 243 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 244 GlobalSaddrTable<0, opName>; 245 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 246 GlobalSaddrTable<1, opName>; 247 } 248} 249 250class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 251 bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 252 opName, 253 (outs regClass:$vdst), 254 !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), 255 (ins flat_offset:$offset, CPol_0:$cpol), 256 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 257 " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 258 let is_flat_global = 1; 259 let has_data = 0; 260 let mayLoad = 1; 261 let has_vaddr = 0; 262 let has_saddr = 1; 263 let enabled_saddr = EnableSaddr; 264 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 265 266 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 267 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 268} 269 270multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass, 271 bit HasTiedOutput = 0> { 272 def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>, 273 GlobalSaddrTable<0, opName>; 274 def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>, 275 GlobalSaddrTable<1, opName>; 276} 277 278multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 279 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 280 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 281 GlobalSaddrTable<0, opName>; 282 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 283 GlobalSaddrTable<1, opName>; 284 } 285} 286 287class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo< 288 opName, 289 (outs ), 290 !con( 291 !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), 292 (ins flat_offset:$offset, CPol_0:$cpol)), 293 " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { 294 let LGKM_CNT = 1; 295 let is_flat_global = 1; 296 let lds = 1; 297 let has_data = 0; 298 let has_vdst = 0; 299 let mayLoad = 1; 300 let mayStore = 1; 301 let has_saddr = 1; 302 let enabled_saddr = EnableSaddr; 303 let VALU = 1; 304 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 305 let Uses = [M0, EXEC]; 306 let SchedRW = [WriteVMEM, WriteLDS]; 307} 308 309multiclass FLAT_Global_Load_LDS_Pseudo<string opName> { 310 def "" : FLAT_Global_Load_LDS_Pseudo<opName>, 311 GlobalSaddrTable<0, opName>; 312 def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>, 313 GlobalSaddrTable<1, opName>; 314} 315 316class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 317 bit EnableSaddr = 0> : FLAT_Pseudo< 318 opName, 319 (outs), 320 !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), 321 (ins flat_offset:$offset, CPol:$cpol)), 322 " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 323 let is_flat_global = 1; 324 let mayLoad = 0; 325 let mayStore = 1; 326 let has_vdst = 0; 327 let has_vaddr = 0; 328 let has_saddr = 1; 329 let enabled_saddr = EnableSaddr; 330 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 331} 332 333multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> { 334 def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>, 335 GlobalSaddrTable<0, opName>; 336 def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>, 337 GlobalSaddrTable<1, opName>; 338} 339 340class FLAT_Global_Invalidate_Writeback<string opName, SDPatternOperator node = null_frag> : 341 FLAT_Pseudo<opName, (outs), (ins CPol:$cpol), "$cpol", [(node)]> { 342 343 let AsmMatchConverter = ""; 344 345 let hasSideEffects = 1; 346 let mayLoad = 0; 347 let mayStore = 0; 348 let is_flat_global = 1; 349 350 let has_offset = 0; 351 let has_saddr = 0; 352 let enabled_saddr = 0; 353 let saddr_value = 0; 354 let has_vdst = 0; 355 let has_data = 0; 356 let has_vaddr = 0; 357 let has_glc = 0; 358 let has_dlc = 0; 359 let glcValue = 0; 360 let dlcValue = 0; 361 let has_sccb = 0; 362 let sccbValue = 0; 363 let has_sve = 0; 364 let lds = 0; 365 let sve = 0; 366} 367 368class FlatScratchInst <string sv_op, string mode> { 369 string SVOp = sv_op; 370 string Mode = mode; 371} 372 373class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 374 bit HasTiedOutput = 0, 375 bit EnableSaddr = 0, 376 bit EnableSVE = 0, 377 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> 378 : FLAT_Pseudo< 379 opName, 380 (outs getLdStRegisterOperand<regClass>.ret:$vdst), 381 !con( 382 !if(EnableSVE, 383 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 384 !if(EnableSaddr, 385 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 386 !if(EnableVaddr, 387 (ins VGPR_32:$vaddr, flat_offset:$offset), 388 (ins flat_offset:$offset)))), 389 !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in), 390 (ins CPol_0:$cpol))), 391 " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 392 let has_data = 0; 393 let mayLoad = 1; 394 let has_saddr = 1; 395 let enabled_saddr = EnableSaddr; 396 let has_vaddr = EnableVaddr; 397 let has_sve = EnableSVE; 398 let sve = EnableVaddr; 399 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 400 401 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 402 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 403} 404 405class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0, 406 bit EnableSVE = 0, 407 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)), 408 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo< 409 opName, 410 (outs), 411 !if(EnableSVE, 412 (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 413 !if(EnableSaddr, 414 (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 415 !if(EnableVaddr, 416 (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), 417 (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))), 418 " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 419 let mayLoad = 0; 420 let mayStore = 1; 421 let has_vdst = 0; 422 let has_saddr = 1; 423 let enabled_saddr = EnableSaddr; 424 let has_vaddr = EnableVaddr; 425 let has_sve = EnableSVE; 426 let sve = EnableVaddr; 427 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 428} 429 430multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { 431 let is_flat_scratch = 1 in { 432 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>, 433 FlatScratchInst<opName, "SV">; 434 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>, 435 FlatScratchInst<opName, "SS">; 436 437 let SubtargetPredicate = HasFlatScratchSVSMode in 438 def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>, 439 FlatScratchInst<opName, "SVS">; 440 441 let SubtargetPredicate = HasFlatScratchSTMode in 442 def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>, 443 FlatScratchInst<opName, "ST">; 444 } 445} 446 447multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 448 let is_flat_scratch = 1 in { 449 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>, 450 FlatScratchInst<opName, "SV">; 451 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>, 452 FlatScratchInst<opName, "SS">; 453 454 let SubtargetPredicate = HasFlatScratchSVSMode in 455 def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>, 456 FlatScratchInst<opName, "SVS">; 457 458 let SubtargetPredicate = HasFlatScratchSTMode in 459 def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>, 460 FlatScratchInst<opName, "ST">; 461 } 462} 463 464class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0, 465 bit EnableSVE = 0, 466 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo< 467 opName, 468 (outs ), 469 !if(EnableSVE, 470 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 471 !if(EnableSaddr, 472 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 473 !if(EnableVaddr, 474 (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol), 475 (ins flat_offset:$offset, CPol:$cpol)))), 476 " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 477 478 let LGKM_CNT = 1; 479 let is_flat_scratch = 1; 480 let lds = 1; 481 let has_data = 0; 482 let has_vdst = 0; 483 let mayLoad = 1; 484 let mayStore = 1; 485 let has_saddr = 1; 486 let enabled_saddr = EnableSaddr; 487 let has_vaddr = EnableVaddr; 488 let has_sve = EnableSVE; 489 let sve = EnableVaddr; 490 let VALU = 1; 491 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 492 let Uses = [M0, EXEC]; 493 let SchedRW = [WriteVMEM, WriteLDS]; 494} 495 496multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> { 497 def "" : FLAT_Scratch_Load_LDS_Pseudo<opName>, 498 FlatScratchInst<opName, "SV">; 499 def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>, 500 FlatScratchInst<opName, "SS">; 501 def _SVS : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>, 502 FlatScratchInst<opName, "SVS">; 503 def _ST : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>, 504 FlatScratchInst<opName, "ST">; 505} 506 507class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 508 string asm, list<dag> pattern = []> : 509 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 510 let mayLoad = 1; 511 let mayStore = 1; 512 let has_glc = 0; 513 let glcValue = 0; 514 let has_vdst = 0; 515 let has_sccb = 1; 516 let sccbValue = 0; 517 let IsAtomicNoRet = 1; 518} 519 520class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 521 string asm, list<dag> pattern = []> 522 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 523 let hasPostISelHook = 1; 524 let has_vdst = 1; 525 let glcValue = 1; 526 let sccbValue = 0; 527 let IsAtomicNoRet = 0; 528 let IsAtomicRet = 1; 529 let PseudoInstr = NAME # "_RTN"; 530} 531 532multiclass FLAT_Atomic_Pseudo_NO_RTN< 533 string opName, 534 RegisterClass vdst_rc, 535 ValueType vt, 536 ValueType data_vt = vt, 537 RegisterClass data_rc = vdst_rc, 538 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 539 def "" : FLAT_AtomicNoRet_Pseudo <opName, 540 (outs), 541 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 542 " $vaddr, $vdata$offset$cpol">, 543 GlobalSaddrTable<0, opName>, 544 AtomicNoRet <opName, 0> { 545 let PseudoInstr = NAME; 546 let FPAtomic = data_vt.isFP; 547 let AddedComplexity = -1; // Prefer global atomics if available 548 } 549} 550 551multiclass FLAT_Atomic_Pseudo_RTN< 552 string opName, 553 RegisterClass vdst_rc, 554 ValueType vt, 555 ValueType data_vt = vt, 556 RegisterClass data_rc = vdst_rc, 557 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 558 def _RTN : FLAT_AtomicRet_Pseudo <opName, 559 (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst), 560 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 561 " $vdst, $vaddr, $vdata$offset$cpol">, 562 GlobalSaddrTable<0, opName#"_rtn">, 563 AtomicNoRet <opName, 1> { 564 let FPAtomic = data_vt.isFP; 565 let AddedComplexity = -1; // Prefer global atomics if available 566 } 567} 568 569multiclass FLAT_Atomic_Pseudo< 570 string opName, 571 RegisterClass vdst_rc, 572 ValueType vt, 573 ValueType data_vt = vt, 574 RegisterClass data_rc = vdst_rc, 575 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 576 defm "" : FLAT_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc, data_op>; 577 defm "" : FLAT_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc, data_op>; 578} 579 580multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 581 string opName, 582 RegisterClass vdst_rc, 583 ValueType vt, 584 ValueType data_vt = vt, 585 RegisterClass data_rc = vdst_rc, 586 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 587 588 def "" : FLAT_AtomicNoRet_Pseudo <opName, 589 (outs), 590 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 591 " $vaddr, $vdata, off$offset$cpol">, 592 GlobalSaddrTable<0, opName>, 593 AtomicNoRet <opName, 0> { 594 let has_saddr = 1; 595 let PseudoInstr = NAME; 596 let FPAtomic = data_vt.isFP; 597 } 598 599 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 600 (outs), 601 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol), 602 " $vaddr, $vdata, $saddr$offset$cpol">, 603 GlobalSaddrTable<1, opName>, 604 AtomicNoRet <opName#"_saddr", 0> { 605 let has_saddr = 1; 606 let enabled_saddr = 1; 607 let PseudoInstr = NAME#"_SADDR"; 608 let FPAtomic = data_vt.isFP; 609 } 610} 611 612multiclass FLAT_Global_Atomic_Pseudo_RTN< 613 string opName, 614 RegisterClass vdst_rc, 615 ValueType vt, 616 ValueType data_vt = vt, 617 RegisterClass data_rc = vdst_rc, 618 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret, 619 RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> { 620 621 def _RTN : FLAT_AtomicRet_Pseudo <opName, 622 (outs vdst_op:$vdst), 623 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 624 " $vdst, $vaddr, $vdata, off$offset$cpol">, 625 GlobalSaddrTable<0, opName#"_rtn">, 626 AtomicNoRet <opName, 1> { 627 let has_saddr = 1; 628 let FPAtomic = data_vt.isFP; 629 } 630 631 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 632 (outs vdst_op:$vdst), 633 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), 634 " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, 635 GlobalSaddrTable<1, opName#"_rtn">, 636 AtomicNoRet <opName#"_saddr", 1> { 637 let has_saddr = 1; 638 let enabled_saddr = 1; 639 let PseudoInstr = NAME#"_SADDR_RTN"; 640 let FPAtomic = data_vt.isFP; 641 } 642} 643 644multiclass FLAT_Global_Atomic_Pseudo< 645 string opName, 646 RegisterClass vdst_rc, 647 ValueType vt, 648 ValueType data_vt = vt, 649 RegisterClass data_rc = vdst_rc> { 650 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 651 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 652 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 653 } 654} 655 656//===----------------------------------------------------------------------===// 657// Flat Instructions 658//===----------------------------------------------------------------------===// 659 660def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 661def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 662def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 663def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 664def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 665def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 666def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 667def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 668 669def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 670def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 671def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 672def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 673def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 674def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 675 676let SubtargetPredicate = HasD16LoadStore in { 677let TiedSourceNotRead = 1 in { 678def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 679def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 680def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 681def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 682def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 683def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 684} 685 686def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 687def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 688} 689 690defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 691 VGPR_32, i32, v2i32, VReg_64>; 692 693defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 694 VReg_64, i64, v2i64, VReg_128>; 695 696defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 697 VGPR_32, i32>; 698 699defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 700 VReg_64, i64>; 701 702defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 703 VGPR_32, i32>; 704 705defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 706 VGPR_32, i32>; 707 708defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 709 VGPR_32, i32>; 710 711defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 712 VGPR_32, i32>; 713 714defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 715 VGPR_32, i32>; 716 717defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 718 VGPR_32, i32>; 719 720defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 721 VGPR_32, i32>; 722 723defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 724 VGPR_32, i32>; 725 726defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 727 VGPR_32, i32>; 728 729defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 730 VGPR_32, i32>; 731 732defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 733 VGPR_32, i32>; 734 735defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 736 VReg_64, i64>; 737 738defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 739 VReg_64, i64>; 740 741defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 742 VReg_64, i64>; 743 744defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 745 VReg_64, i64>; 746 747defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 748 VReg_64, i64>; 749 750defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 751 VReg_64, i64>; 752 753defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 754 VReg_64, i64>; 755 756defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 757 VReg_64, i64>; 758 759defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 760 VReg_64, i64>; 761 762defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 763 VReg_64, i64>; 764 765defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 766 VReg_64, i64>; 767 768// GFX7-, GFX10-only flat instructions. 769let SubtargetPredicate = isGFX7GFX10 in { 770 771defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 772 VReg_64, f64, v2f64, VReg_128>; 773 774defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 775 VReg_64, f64>; 776 777defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 778 VReg_64, f64>; 779 780} // End SubtargetPredicate = isGFX7GFX10 781 782let SubtargetPredicate = isGFX90APlus in { 783 defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>; 784 defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>; 785 defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>; 786 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>; 787 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>; 788 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; 789} // End SubtargetPredicate = isGFX90APlus 790 791let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in { 792 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; 793 let FPAtomic = 1 in 794 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2i16>; 795} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts 796 797let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, FPAtomic = 1 in 798 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2i16>; 799 800// GFX7-, GFX10-, GFX11-only flat instructions. 801let SubtargetPredicate = isGFX7GFX10GFX11 in { 802 803defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 804 VGPR_32, f32, v2f32, VReg_64>; 805 806defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 807 VGPR_32, f32>; 808 809defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 810 VGPR_32, f32>; 811 812} // End SubtargetPredicate = isGFX7GFX10GFX11 813 814// GFX940-, GFX11-only flat instructions. 815let SubtargetPredicate = HasFlatAtomicFaddF32Inst in { 816 defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; 817} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst 818 819let SubtargetPredicate = isGFX12Plus in { 820 defm FLAT_ATOMIC_CSUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_csub_u32", VGPR_32, i32>; 821 defm FLAT_ATOMIC_COND_SUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_cond_sub_u32", VGPR_32, i32>; 822} // End SubtargetPredicate = isGFX12Plus 823 824defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 825defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 826defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 827defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 828defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 829defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 830defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 831defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 832 833let TiedSourceNotRead = 1 in { 834defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 835defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 836defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 837defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 838defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 839defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 840} 841 842let OtherPredicates = [HasGFX10_BEncoding] in 843defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 844 845defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 846defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 847defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 848defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 849defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 850defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 851let OtherPredicates = [HasGFX10_BEncoding] in 852defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 853 854defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 855defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 856 857let is_flat_global = 1 in { 858defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 859 VGPR_32, i32, v2i32, VReg_64>; 860 861defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 862 VReg_64, i64, v2i64, VReg_128>; 863 864defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 865 VGPR_32, i32>; 866 867defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 868 VReg_64, i64>; 869 870defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 871 VGPR_32, i32>; 872 873defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 874 VGPR_32, i32>; 875 876defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 877 VGPR_32, i32>; 878 879defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 880 VGPR_32, i32>; 881 882defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 883 VGPR_32, i32>; 884 885defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 886 VGPR_32, i32>; 887 888defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 889 VGPR_32, i32>; 890 891defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 892 VGPR_32, i32>; 893 894defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 895 VGPR_32, i32>; 896 897defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 898 VGPR_32, i32>; 899 900defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 901 VGPR_32, i32>; 902 903defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 904 VReg_64, i64>; 905 906defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 907 VReg_64, i64>; 908 909defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 910 VReg_64, i64>; 911 912defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 913 VReg_64, i64>; 914 915defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 916 VReg_64, i64>; 917 918defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 919 VReg_64, i64>; 920 921defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 922 VReg_64, i64>; 923 924defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 925 VReg_64, i64>; 926 927defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 928 VReg_64, i64>; 929 930defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 931 VReg_64, i64>; 932 933defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 934 VReg_64, i64>; 935 936let SubtargetPredicate = HasGFX10_BEncoding in { 937 defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo <"global_atomic_csub", 938 VGPR_32, i32>; 939} 940 941defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; 942defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; 943defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; 944defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; 945defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; 946 947let SubtargetPredicate = isGFX12Plus in { 948 defm GLOBAL_ATOMIC_COND_SUB_U32 : FLAT_Global_Atomic_Pseudo <"global_atomic_cond_sub_u32", VGPR_32, i32>; 949 defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; 950 951 def GLOBAL_INV : FLAT_Global_Invalidate_Writeback<"global_inv">; 952 def GLOBAL_WB : FLAT_Global_Invalidate_Writeback<"global_wb">; 953 def GLOBAL_WBINV : FLAT_Global_Invalidate_Writeback<"global_wbinv">; 954} // End SubtargetPredicate = isGFX12Plus 955 956} // End is_flat_global = 1 957 958let SubtargetPredicate = HasFlatScratchInsts in { 959defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 960defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 961defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 962defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 963defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 964defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 965defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 966defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 967 968let TiedSourceNotRead = 1 in { 969defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; 970defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; 971defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; 972defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; 973defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; 974defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; 975} 976 977defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 978defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 979defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 980defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 981defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 982defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 983 984defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 985defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 986 987defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; 988defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; 989defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; 990defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; 991defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; 992 993} // End SubtargetPredicate = HasFlatScratchInsts 994 995let SubtargetPredicate = isGFX12Plus in { 996 let WaveSizePredicate = isWave32 in { 997 defm GLOBAL_LOAD_TR_B128_w32 : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w32", VReg_128>; 998 defm GLOBAL_LOAD_TR_B64_w32 : FLAT_Global_Load_Pseudo <"global_load_tr_b64_w32", VReg_64>; 999 } 1000 let WaveSizePredicate = isWave64 in { 1001 defm GLOBAL_LOAD_TR_B128_w64 : FLAT_Global_Load_Pseudo <"global_load_tr_b128_w64", VReg_64>; 1002 defm GLOBAL_LOAD_TR_B64_w64 : FLAT_Global_Load_Pseudo <"global_load_tr_b64_w64", VGPR_32>; 1003 } 1004} // End SubtargetPredicate = isGFX12Plus 1005 1006let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 1007 defm GLOBAL_ATOMIC_FCMPSWAP : 1008 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>; 1009 defm GLOBAL_ATOMIC_FMIN : 1010 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 1011 defm GLOBAL_ATOMIC_FMAX : 1012 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 1013 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 1014 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>; 1015 defm GLOBAL_ATOMIC_FMIN_X2 : 1016 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 1017 defm GLOBAL_ATOMIC_FMAX_X2 : 1018 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 1019} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 1020 1021let is_flat_global = 1 in { 1022let OtherPredicates = [HasAtomicFaddNoRtnInsts] in 1023 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 1024 "global_atomic_add_f32", VGPR_32, f32 1025 >; 1026let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in 1027 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 1028 "global_atomic_pk_add_f16", VGPR_32, v2f16 1029 >; 1030let OtherPredicates = [HasAtomicFaddRtnInsts] in 1031 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < 1032 "global_atomic_add_f32", VGPR_32, f32 1033 >; 1034let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in 1035 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < 1036 "global_atomic_pk_add_f16", VGPR_32, v2f16 1037 >; 1038} // End is_flat_global = 1 1039 1040//===----------------------------------------------------------------------===// 1041// Flat Patterns 1042//===----------------------------------------------------------------------===// 1043 1044// Patterns for global loads with no offset. 1045class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1046 (vt (node (FlatOffset i64:$vaddr, i32:$offset))), 1047 (inst $vaddr, $offset) 1048>; 1049 1050class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1051 (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), 1052 (inst $vaddr, $offset, 0, $in) 1053>; 1054 1055class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1056 (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), 1057 (inst $vaddr, $offset, 0, $in) 1058>; 1059 1060class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1061 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)), 1062 (inst $saddr, $voffset, $offset, 0, $in) 1063>; 1064 1065class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1066 (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))), 1067 (inst $vaddr, $offset) 1068>; 1069 1070class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1071 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))), 1072 (inst $saddr, $voffset, $offset, 0) 1073>; 1074 1075class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1076 ValueType vt> : GCNPat < 1077 (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)), 1078 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1079>; 1080 1081class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1082 ValueType vt, ValueType data_vt = vt> : GCNPat < 1083 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)), 1084 (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset) 1085>; 1086 1087class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1088 ValueType vt> : GCNPat < 1089 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data), 1090 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1091>; 1092 1093class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1094 (node vt:$data, (FlatOffset i64:$vaddr, i32:$offset)), 1095 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1096>; 1097 1098class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1099 (node vt:$data, (GlobalOffset i64:$vaddr, i32:$offset)), 1100 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1101>; 1102 1103class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, 1104 ValueType vt, ValueType data_vt = vt> : GCNPat < 1105 // atomic store follows atomic binop convention so the address comes 1106 // first. 1107 (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data), 1108 (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1109>; 1110 1111multiclass FlatAtomicNoRtnPatBase <string inst, string node, ValueType vt, 1112 ValueType data_vt = vt> { 1113 1114 defvar noRtnNode = !cast<PatFrags>(node); 1115 1116 let AddedComplexity = 1 in 1117 def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1118 (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1119} 1120 1121multiclass FlatAtomicNoRtnPatWithAddrSpace<string inst, string node, string addrSpaceSuffix, 1122 ValueType vt> : 1123 FlatAtomicNoRtnPatBase<inst, node # "_noret_" # addrSpaceSuffix, vt, vt>; 1124 1125multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt, 1126 ValueType data_vt = vt, bit isIntr = 0> : 1127 FlatAtomicNoRtnPatBase<inst, node # "_noret" # !if(isIntr, "", "_"#vt.Size), vt, data_vt>; 1128 1129 1130multiclass FlatAtomicRtnPatBase <string inst, string node, ValueType vt, 1131 ValueType data_vt = vt> { 1132 1133 defvar rtnNode = !cast<SDPatternOperator>(node); 1134 1135 def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1136 (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1137} 1138 1139multiclass FlatAtomicRtnPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1140 ValueType vt> : 1141 FlatAtomicRtnPatBase<inst, intr # "_" # addrSpaceSuffix, vt, vt>; 1142 1143multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt, 1144 ValueType data_vt = vt, bit isIntr = 0> : 1145 FlatAtomicRtnPatBase<inst, node # !if(isIntr, "", "_"#vt.Size), vt, data_vt>; 1146 1147 1148multiclass FlatAtomicPat <string inst, string node, ValueType vt, 1149 ValueType data_vt = vt, bit isIntr = 0> : 1150 FlatAtomicRtnPat<inst, node, vt, data_vt, isIntr>, 1151 FlatAtomicNoRtnPat<inst, node, vt, data_vt, isIntr>; 1152 1153multiclass FlatAtomicIntrNoRtnPat <string inst, string node, ValueType vt, 1154 ValueType data_vt = vt> { 1155 defm : FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1156} 1157 1158multiclass FlatAtomicIntrRtnPat <string inst, string node, ValueType vt, 1159 ValueType data_vt = vt> { 1160 defm : FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1161} 1162 1163multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt, 1164 ValueType data_vt = vt> : 1165 FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>, 1166 FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1167 1168class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node, 1169 ValueType vt, ValueType data_vt = vt> : GCNPat < 1170 (vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1171 (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1172>; 1173 1174multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt, 1175 ValueType data_vt = vt, int complexity = 0, 1176 bit isIntr = 0> { 1177 defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size)); 1178 defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size)); 1179 1180 let AddedComplexity = complexity in 1181 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1182 1183 let AddedComplexity = !add(complexity, 1) in 1184 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>; 1185} 1186 1187multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt, 1188 ValueType data_vt = vt> { 1189 defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>; 1190} 1191 1192multiclass FlatSignedAtomicPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1193 ValueType vt, ValueType data_vt = vt> { 1194 defvar noRtnNode = !cast<PatFrags>(intr # "_noret_" # addrSpaceSuffix); 1195 defvar rtnNode = !cast<PatFrags>(intr # "_" # addrSpaceSuffix); 1196 1197 let AddedComplexity = 1 in 1198 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>; 1199 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1200} 1201 1202class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1203 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))), 1204 (inst $vaddr, $offset) 1205>; 1206 1207class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1208 (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset), vt:$in), 1209 (inst $vaddr, $offset, 0, $in) 1210>; 1211 1212class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1213 (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)), 1214 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset) 1215>; 1216 1217class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1218 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))), 1219 (inst $saddr, $offset) 1220>; 1221 1222class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1223 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset), vt:$in)), 1224 (inst $saddr, $offset, 0, $in) 1225>; 1226 1227class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1228 ValueType vt> : GCNPat < 1229 (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)), 1230 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1231>; 1232 1233class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1234 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))), 1235 (inst $vaddr, $saddr, $offset, 0) 1236>; 1237 1238class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1239 ValueType vt> : GCNPat < 1240 (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)), 1241 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset) 1242>; 1243 1244class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1245 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)), 1246 (inst $vaddr, $saddr, $offset, 0, $in) 1247>; 1248 1249let OtherPredicates = [HasFlatAddressSpace] in { 1250 1251def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>; 1252def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>; 1253def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>; 1254def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>; 1255def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 1256def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 1257def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 1258def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 1259def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 1260def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 1261def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 1262def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 1263def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 1264def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 1265def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 1266 1267def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 1268def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 1269 1270def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 1271def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 1272 1273foreach vt = Reg32Types.types in { 1274def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 1275def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 1276} 1277 1278foreach vt = VReg_64.RegTypes in { 1279def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>; 1280def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 1281} 1282 1283def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; 1284 1285foreach vt = VReg_128.RegTypes in { 1286def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 1287def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>; 1288} 1289 1290def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>; 1291def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>; 1292def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>; 1293def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>; 1294def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>; 1295def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>; 1296 1297foreach as = [ "flat", "global" ] in { 1298defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; 1299defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; 1300defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>; 1301defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>; 1302defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; 1303defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; 1304defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; 1305defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>; 1306defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>; 1307defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>; 1308defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>; 1309defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>; 1310defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; 1311 1312defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; 1313defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; 1314defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>; 1315defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>; 1316defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; 1317defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; 1318defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; 1319defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>; 1320defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>; 1321defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>; 1322defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>; 1323defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>; 1324defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>; 1325} // end foreach as 1326 1327let SubtargetPredicate = isGFX12Plus in { 1328 defm : FlatAtomicRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32 >; 1329 1330 let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1331 defm : FlatAtomicNoRtnPatWithAddrSpace<"FLAT_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "flat_addrspace", i32>; 1332} 1333 1334def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 1335def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 1336 1337let OtherPredicates = [HasD16LoadStore] in { 1338def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 1339def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 1340} 1341 1342let OtherPredicates = [D16PreservesUnusedBits] in { 1343def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 1344def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 1345def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 1346def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 1347def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 1348def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 1349 1350def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 1351def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 1352def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 1353def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 1354def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 1355def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 1356} 1357 1358} // End OtherPredicates = [HasFlatAddressSpace] 1359 1360 1361multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1362 def : FlatLoadSignedPat <inst, node, vt> { 1363 let AddedComplexity = 10; 1364 } 1365 1366 def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1367 let AddedComplexity = 11; 1368 } 1369} 1370 1371multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1372 def : FlatSignedLoadPat_D16 <inst, node, vt> { 1373 let AddedComplexity = 10; 1374 } 1375 1376 def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1377 let AddedComplexity = 11; 1378 } 1379} 1380 1381multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1382 ValueType vt> { 1383 def : FlatStoreSignedPat <inst, node, vt> { 1384 let AddedComplexity = 10; 1385 } 1386 1387 def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1388 let AddedComplexity = 11; 1389 } 1390} 1391 1392multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt, 1393 ValueType data_vt = vt> { 1394 let AddedComplexity = 11 in 1395 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>; 1396 1397 let AddedComplexity = 13 in 1398 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>; 1399} 1400 1401multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt, 1402 ValueType data_vt = vt, bit isPatFrags = 0> { 1403 defvar rtnNode = !if(isPatFrags, !cast<PatFrags>(node), !cast<SDPatternOperator>(node)); 1404 1405 let AddedComplexity = 10 in 1406 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1407 1408 let AddedComplexity = 12 in 1409 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>; 1410} 1411 1412multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt, 1413 ValueType data_vt = vt, bit isIntr = 0> : 1414 GlobalFLATAtomicPatsNoRtnBase<inst, node # "_noret" # !if(isIntr, "", "_" # vt.Size), vt, data_vt>; 1415 1416multiclass GlobalFLATAtomicPatsRtn<string inst, string node, ValueType vt, 1417 ValueType data_vt = vt, bit isIntr = 0> : 1418 GlobalFLATAtomicPatsRtnBase<inst, node # !if(isIntr, "", "_" # vt.Size), vt, data_vt>; 1419 1420multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt, 1421 ValueType data_vt = vt, bit isIntr = 0> : 1422 GlobalFLATAtomicPatsNoRtn<inst, node, vt, data_vt, isIntr>, 1423 GlobalFLATAtomicPatsRtn<inst, node, vt, data_vt, isIntr>; 1424 1425multiclass GlobalFLATAtomicPatsNoRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1426 ValueType vt, ValueType data_vt = vt> : 1427 GlobalFLATAtomicPatsNoRtnBase<inst, intr # "_noret_" # addrSpaceSuffix, vt, data_vt>; 1428 1429multiclass GlobalFLATAtomicPatsRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1430 ValueType vt, ValueType data_vt = vt> : 1431 GlobalFLATAtomicPatsRtnBase<inst, intr # "_" # addrSpaceSuffix, vt, data_vt, /*isPatFrags*/ 1>; 1432 1433multiclass GlobalFLATAtomicPatsWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1434 ValueType vt, ValueType data_vt = vt> : 1435 GlobalFLATAtomicPatsNoRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>, 1436 GlobalFLATAtomicPatsRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>; 1437 1438multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt, 1439 ValueType data_vt = vt> { 1440 defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>; 1441} 1442 1443multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1444 def : ScratchLoadSignedPat <inst, node, vt> { 1445 let AddedComplexity = 25; 1446 } 1447 1448 def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1449 let AddedComplexity = 26; 1450 } 1451 1452 def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1453 let SubtargetPredicate = HasFlatScratchSVSMode; 1454 let AddedComplexity = 27; 1455 } 1456} 1457 1458multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1459 ValueType vt> { 1460 def : ScratchStoreSignedPat <inst, node, vt> { 1461 let AddedComplexity = 25; 1462 } 1463 1464 def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1465 let AddedComplexity = 26; 1466 } 1467 1468 def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1469 let SubtargetPredicate = HasFlatScratchSVSMode; 1470 let AddedComplexity = 27; 1471 } 1472} 1473 1474multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1475 def : ScratchLoadSignedPat_D16 <inst, node, vt> { 1476 let AddedComplexity = 25; 1477 } 1478 1479 def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1480 let AddedComplexity = 26; 1481 } 1482 1483 def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1484 let SubtargetPredicate = HasFlatScratchSVSMode; 1485 let AddedComplexity = 27; 1486 } 1487} 1488 1489let OtherPredicates = [HasFlatGlobalInsts] in { 1490 1491defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>; 1492defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>; 1493defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>; 1494defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>; 1495defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 1496defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 1497defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 1498defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 1499defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 1500defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 1501defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 1502defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 1503defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 1504defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; 1505 1506foreach vt = Reg32Types.types in { 1507defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; 1508defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>; 1509} 1510 1511foreach vt = VReg_64.RegTypes in { 1512defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>; 1513defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>; 1514} 1515 1516defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 1517 1518foreach vt = VReg_128.RegTypes in { 1519defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>; 1520defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>; 1521} 1522 1523// There is no distinction for atomic load lowering during selection; 1524// the memory legalizer will set the cache bits and insert the 1525// appropriate waits. 1526defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 1527defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 1528 1529defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; 1530defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; 1531defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; 1532defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; 1533defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; 1534 1535let OtherPredicates = [HasD16LoadStore] in { 1536defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 1537defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 1538} 1539 1540let OtherPredicates = [D16PreservesUnusedBits] in { 1541defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 1542defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 1543defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 1544defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 1545defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 1546defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 1547 1548defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 1549defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 1550defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 1551defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 1552defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 1553defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 1554} 1555 1556defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>; 1557defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>; 1558defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>; 1559defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>; 1560defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>; 1561defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>; 1562 1563defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; 1564defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; 1565defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>; 1566defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>; 1567defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>; 1568defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>; 1569defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>; 1570defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>; 1571defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>; 1572defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>; 1573defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>; 1574defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>; 1575defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>; 1576defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1577 1578let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1579defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1580 1581defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; 1582defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; 1583defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>; 1584defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>; 1585defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>; 1586defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>; 1587defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>; 1588defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>; 1589defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>; 1590defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>; 1591defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>; 1592defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>; 1593defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; 1594 1595let SubtargetPredicate = isGFX12Plus in { 1596 defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>; 1597 1598 let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1599 defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_COND_SUB_U32", "int_amdgcn_atomic_cond_sub_u32", "global_addrspace", i32>; 1600} 1601 1602let OtherPredicates = [isGFX12Plus] in { 1603 defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>; 1604 1605 let WaveSizePredicate = isWave32 in { 1606 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w32, int_amdgcn_global_load_tr, v2i32>; 1607 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr, v8i16>; 1608 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr, v8f16>; 1609 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w32, int_amdgcn_global_load_tr, v8bf16>; 1610 } 1611 let WaveSizePredicate = isWave64 in { 1612 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B64_w64, int_amdgcn_global_load_tr, i32>; 1613 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr, v4i16>; 1614 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr, v4f16>; 1615 defm : GlobalFLATLoadPats <GLOBAL_LOAD_TR_B128_w64, int_amdgcn_global_load_tr, v4bf16>; 1616 } 1617} 1618 1619let OtherPredicates = [isGFX10Plus] in { 1620defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; 1621defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; 1622defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>; 1623defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>; 1624} 1625 1626let OtherPredicates = [isGFX10GFX11] in { 1627defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>; 1628defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>; 1629 1630defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>; 1631defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>; 1632} 1633 1634let OtherPredicates = [isGFX10Only] in { 1635defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>; 1636defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>; 1637defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>; 1638defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>; 1639defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN_X2", "atomic_load_fmin_flat", f64>; 1640defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX_X2", "atomic_load_fmax_flat", f64>; 1641defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN_X2", "int_amdgcn_flat_atomic_fmin", f64>; 1642defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX_X2", "int_amdgcn_flat_atomic_fmax", f64>; 1643} 1644 1645let OtherPredicates = [isGFX12Only] in { 1646 defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>; 1647 defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>; 1648 defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>; 1649 defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>; 1650} 1651 1652let OtherPredicates = [HasAtomicFaddNoRtnInsts] in { 1653defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1654defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>; 1655defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; 1656} 1657 1658let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in { 1659defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; 1660defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; 1661} 1662 1663let OtherPredicates = [HasAtomicFaddRtnInsts] in { 1664defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1665defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>; 1666defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; 1667} 1668 1669let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in { 1670defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; 1671defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; 1672} 1673 1674let OtherPredicates = [isGFX90APlus] in { 1675defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>; 1676defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>; 1677defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>; 1678defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>; 1679defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>; 1680defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>; 1681defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>; 1682defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>; 1683defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>; 1684defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>; 1685defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f64>; 1686defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>; 1687defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>; 1688} 1689 1690let OtherPredicates = [HasFlatAtomicFaddF32Inst] in { 1691defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>; 1692defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f32>; 1693} 1694 1695let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in { 1696defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", v2f16>; 1697defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>; 1698} 1699 1700let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in 1701defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>; 1702 1703} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 1704 1705let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in { 1706 1707defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>; 1708defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>; 1709defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>; 1710defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; 1711defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; 1712defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; 1713defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; 1714defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; 1715defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; 1716defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; 1717 1718foreach vt = Reg32Types.types in { 1719defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; 1720defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; 1721} 1722 1723foreach vt = VReg_64.RegTypes in { 1724defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>; 1725defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>; 1726} 1727 1728defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>; 1729 1730foreach vt = VReg_128.RegTypes in { 1731defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>; 1732defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>; 1733} 1734 1735defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>; 1736defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>; 1737defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; 1738defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; 1739defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; 1740 1741let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in { 1742defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; 1743defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; 1744} 1745 1746let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { 1747defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>; 1748defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>; 1749defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>; 1750defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>; 1751defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>; 1752defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>; 1753 1754defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>; 1755defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>; 1756defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>; 1757defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>; 1758defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>; 1759defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>; 1760} 1761 1762} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch] 1763 1764//===----------------------------------------------------------------------===// 1765// Target 1766//===----------------------------------------------------------------------===// 1767 1768//===----------------------------------------------------------------------===// 1769// CI 1770//===----------------------------------------------------------------------===// 1771 1772class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 1773 FLAT_Real <op, ps>, 1774 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 1775 let AssemblerPredicate = isGFX7Only; 1776 let DecoderNamespace="GFX7"; 1777} 1778 1779def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1780def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1781def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1782def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1783def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1784def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1785def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1786def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1787 1788def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1789def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1790def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1791def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1792def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1793def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1794 1795multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 1796 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1797 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1798} 1799 1800defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 1801defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 1802defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 1803defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 1804defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 1805defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 1806defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 1807defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 1808defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 1809defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 1810defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 1811defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 1812defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 1813defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 1814defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 1815defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 1816defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 1817defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 1818defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 1819defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 1820defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 1821defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 1822defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 1823defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 1824defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1825defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1826 1827// CI Only flat instructions 1828defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1829defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1830defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1831defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1832defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1833defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1834 1835 1836//===----------------------------------------------------------------------===// 1837// VI 1838//===----------------------------------------------------------------------===// 1839 1840class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : 1841 FLAT_Real <op, ps>, 1842 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1843 let AssemblerPredicate = isGFX8GFX9; 1844 let DecoderNamespace = "GFX8"; 1845 1846 let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1847 let AsmString = ps.Mnemonic # 1848 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); 1849} 1850 1851multiclass FLAT_Real_AllAddr_vi<bits<7> op, 1852 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1853 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; 1854 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; 1855} 1856 1857class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> : 1858 FLAT_Real <op, ps>, 1859 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> { 1860 let AssemblerPredicate = isGFX940Plus; 1861 let DecoderNamespace = "GFX9"; 1862 let Inst{13} = ps.sve; 1863 let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1864} 1865 1866multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> { 1867 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> { 1868 let AssemblerPredicate = isGFX8GFX9NotGFX940; 1869 let OtherPredicates = [isGFX8GFX9NotGFX940]; 1870 } 1871 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> { 1872 let DecoderNamespace = "GFX9"; 1873 } 1874 let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in { 1875 def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1876 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1877 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1878 } 1879} 1880 1881multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op, 1882 string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr), 1883 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1884 1885 let OtherPredicates = [isGFX8GFX9NotGFX940] in { 1886 def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> { 1887 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds"; 1888 } 1889 def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> { 1890 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds"; 1891 } 1892 } 1893 1894 let SubtargetPredicate = isGFX940Plus in { 1895 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1896 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1897 } 1898} 1899 1900multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> { 1901 defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>; 1902 let SubtargetPredicate = isGFX940Plus in { 1903 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1904 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1905 } 1906} 1907 1908def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1909def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1910def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1911def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1912def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1913def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1914def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1915def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1916 1917def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1918def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1919def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1920def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1921def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1922def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1923def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1924def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1925 1926def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1927def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1928def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1929def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1930def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1931def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1932 1933multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps, 1934 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1935 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; 1936 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; 1937} 1938 1939multiclass FLAT_Global_Real_Atomics_vi<bits<7> op, 1940 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : 1941 FLAT_Real_AllAddr_vi<op, has_sccb> { 1942 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; 1943 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; 1944} 1945 1946 1947defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1948defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1949defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1950defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1951defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1952defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1953defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1954defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1955defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1956defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1957defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1958defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1959defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1960defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1961defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1962defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1963defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1964defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1965defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1966defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1967defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1968defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1969defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1970defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1971defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1972defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1973 1974defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1975defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1976defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1977defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1978defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1979defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1980defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1981defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1982 1983defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1984defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1985defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1986defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1987defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1988defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1989 1990defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1991defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1992defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1993defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1994defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1995defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1996defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1997defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1998 1999defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS <0x026, 0x10>; 2000defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS <0x027, 0x11>; 2001defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; 2002defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; 2003defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; 2004 2005defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 2006defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 2007defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 2008defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 2009defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 2010defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 2011defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 2012defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 2013defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 2014defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 2015defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 2016defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 2017defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 2018defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 2019defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 2020defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 2021defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 2022defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 2023defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 2024defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 2025defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 2026defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 2027defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 2028defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 2029defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 2030defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 2031 2032defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>; 2033defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>; 2034defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>; 2035defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>; 2036defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>; 2037 2038defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; 2039defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; 2040defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>; 2041defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x13>; 2042defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_SVE_vi <0x14>; 2043defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x15>; 2044defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x16>; 2045defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x17>; 2046defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_SVE_vi <0x18>; 2047defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x19>; 2048defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x20>; 2049defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x21>; 2050defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x22>; 2051defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x23>; 2052defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_SVE_vi <0x24>; 2053defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x25>; 2054defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_SVE_vi <0x1a>; 2055defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>; 2056defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_SVE_vi <0x1c>; 2057defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>; 2058defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; 2059defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; 2060 2061let SubtargetPredicate = isGFX8GFX9NotGFX940 in { 2062 // These instructions are encoded differently on gfx90* and gfx940. 2063 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; 2064 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; 2065} 2066 2067let SubtargetPredicate = isGFX90AOnly in { 2068 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>; 2069 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>; 2070 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>; 2071 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>; 2072 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>; 2073 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; 2074} // End SubtargetPredicate = isGFX90AOnly 2075 2076multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> { 2077 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 2078 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 2079} 2080 2081multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> { 2082 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 2083 def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 2084} 2085 2086multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> : 2087 FLAT_Real_AllAddr_gfx940<op> { 2088 def _RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 2089 def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 2090} 2091 2092let SubtargetPredicate = isGFX940Plus in { 2093 // These instructions are encoded differently on gfx90* and gfx940. 2094 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>; 2095 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>; 2096 2097 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>; 2098 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>; 2099 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>; 2100 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; 2101 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; 2102 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; 2103 defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>; 2104 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>; 2105 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>; 2106 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; 2107} // End SubtargetPredicate = isGFX940Plus 2108 2109//===----------------------------------------------------------------------===// 2110// GFX10. 2111//===----------------------------------------------------------------------===// 2112 2113class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 2114 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 2115 let AssemblerPredicate = isGFX10Only; 2116 let DecoderNamespace = "GFX10"; 2117 2118 let Inst{11-0} = offset{11-0}; 2119 let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2120 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 2121 let Inst{55} = 0; 2122} 2123 2124 2125multiclass FLAT_Real_Base_gfx10<bits<7> op> { 2126 def _gfx10 : 2127 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 2128} 2129 2130multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 2131 def _RTN_gfx10 : 2132 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 2133} 2134 2135multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 2136 def _SADDR_gfx10 : 2137 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 2138} 2139 2140multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 2141 def _SADDR_RTN_gfx10 : 2142 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 2143} 2144 2145multiclass FLAT_Real_ST_gfx10<bits<7> op> { 2146 def _ST_gfx10 : 2147 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> { 2148 let Inst{54-48} = EXEC_HI.Index; 2149 let OtherPredicates = [HasFlatScratchSTMode]; 2150 } 2151} 2152 2153multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 2154 FLAT_Real_Base_gfx10<op>, 2155 FLAT_Real_SADDR_gfx10<op>; 2156 2157multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 2158 FLAT_Real_Base_gfx10<op>, 2159 FLAT_Real_RTN_gfx10<op>; 2160 2161multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 2162 FLAT_Real_AllAddr_gfx10<op>, 2163 FLAT_Real_RTN_gfx10<op>, 2164 FLAT_Real_SADDR_RTN_gfx10<op>; 2165 2166multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> : 2167 FLAT_Real_RTN_gfx10<op>, 2168 FLAT_Real_SADDR_RTN_gfx10<op>; 2169 2170multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> : 2171 FLAT_Real_Base_gfx10<op>, 2172 FLAT_Real_SADDR_gfx10<op>, 2173 FLAT_Real_ST_gfx10<op>; 2174 2175multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op, 2176 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> { 2177 let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in 2178 defm "" : FLAT_Real_Base_gfx10<op>; 2179 2180 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in 2181 defm "" : FLAT_Real_SADDR_gfx10<op>; 2182} 2183 2184multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op, 2185 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> { 2186 defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>; 2187 2188 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in 2189 defm "" : FLAT_Real_ST_gfx10<op>; 2190} 2191 2192// ENC_FLAT. 2193defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 2194defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 2195defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 2196defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 2197defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 2198defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 2199defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 2200defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 2201defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 2202defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 2203defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 2204defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 2205defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 2206defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 2207defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 2208defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 2209defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 2210defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 2211defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 2212defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 2213defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 2214defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 2215defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 2216defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 2217defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 2218defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 2219defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 2220defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 2221defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 2222defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 2223defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 2224defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 2225defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 2226defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 2227defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 2228defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 2229defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 2230defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 2231defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 2232defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 2233defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 2234defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 2235defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 2236defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 2237defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 2238defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 2239defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 2240defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 2241defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 2242defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 2243defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 2244defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 2245defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 2246defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 2247 2248 2249// ENC_FLAT_GLBL. 2250defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 2251defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 2252defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 2253defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 2254defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 2255defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 2256defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 2257defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 2258defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 2259defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 2260defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 2261defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 2262defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 2263defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 2264defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 2265defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 2266defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 2267defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 2268defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 2269defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 2270defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 2271defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 2272defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 2273defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 2274defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 2275defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 2276defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_gfx10<0x034>; 2277defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 2278defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 2279defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 2280defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 2281defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 2282defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 2283defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 2284defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 2285defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 2286defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 2287defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 2288defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 2289defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 2290defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 2291defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 2292defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 2293defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 2294defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 2295defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 2296defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 2297defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 2298defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 2299defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 2300defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 2301defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 2302defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 2303defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 2304defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 2305defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; 2306defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; 2307 2308defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x008>; 2309defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x009>; 2310defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>; 2311defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>; 2312defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>; 2313 2314// ENC_FLAT_SCRATCH. 2315defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; 2316defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; 2317defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>; 2318defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>; 2319defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>; 2320defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>; 2321defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>; 2322defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>; 2323defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>; 2324defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>; 2325defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>; 2326defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>; 2327defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>; 2328defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>; 2329defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>; 2330defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>; 2331defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>; 2332defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>; 2333defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; 2334defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; 2335defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; 2336defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; 2337 2338defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>; 2339defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>; 2340defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>; 2341defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>; 2342defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>; 2343 2344//===----------------------------------------------------------------------===// 2345// GFX11 2346//===----------------------------------------------------------------------===// 2347 2348class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 2349 FLAT_Real <op, ps, opName>, 2350 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> { 2351 let AssemblerPredicate = isGFX11Only; 2352 let DecoderNamespace = "GFX11"; 2353 2354 let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2355 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 2356 let Inst{15} = cpol{CPolBit.SLC}; 2357 let Inst{17-16} = seg; 2358 let Inst{55} = ps.sve; 2359} 2360 2361multiclass FLAT_Aliases_gfx11<string ps, string opName, int renamed> { 2362 if renamed then 2363 def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Only]>; 2364} 2365 2366multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2367 FLAT_Aliases_gfx11<ps, opName, renamed> { 2368 def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> { 2369 let Inst{54-48} = SGPR_NULL_gfx11plus.Index; 2370 } 2371} 2372 2373multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> { 2374 def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> { 2375 let Inst{54-48} = SGPR_NULL_gfx11plus.Index; 2376 } 2377} 2378 2379multiclass FLAT_Real_SADDR_gfx11<bits<7> op, string ps, string opName> { 2380 def _SADDR_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>; 2381} 2382 2383multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> { 2384 def _SADDR_RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>; 2385} 2386 2387multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> { 2388 def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> { 2389 let Inst{54-48} = SGPR_NULL_gfx11plus.Index; 2390 let OtherPredicates = [HasFlatScratchSTMode]; 2391 } 2392} 2393 2394multiclass FLAT_Real_SVS_gfx11<bits<7> op, string ps, string opName> { 2395 def _SVS_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> { 2396 let OtherPredicates = [HasFlatScratchSVSMode]; 2397 } 2398} 2399 2400multiclass FLAT_Real_AllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2401 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2402 FLAT_Real_SADDR_gfx11<op, ps, opName>; 2403 2404multiclass FLAT_Real_Atomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2405 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2406 FLAT_Real_RTN_gfx11<op, ps, opName>; 2407 2408multiclass FLAT_Real_GlblAtomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2409 FLAT_Real_AllAddr_gfx11<op, ps, opName, renamed>, 2410 FLAT_Real_RTN_gfx11<op, ps, opName>, 2411 FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>; 2412 2413multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2414 FLAT_Aliases_gfx11<ps#"_RTN", opName, renamed>, 2415 FLAT_Real_RTN_gfx11<op, ps, opName>, 2416 FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>; 2417 2418multiclass FLAT_Real_ScratchAllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2419 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2420 FLAT_Real_SADDR_gfx11<op, ps, opName>, 2421 FLAT_Real_ST_gfx11<op, ps, opName>, 2422 FLAT_Real_SVS_gfx11<op, ps, opName>; 2423 2424// ENC_FLAT. 2425defm FLAT_LOAD_U8 : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>; 2426defm FLAT_LOAD_I8 : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>; 2427defm FLAT_LOAD_U16 : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>; 2428defm FLAT_LOAD_I16 : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>; 2429defm FLAT_LOAD_B32 : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>; 2430defm FLAT_LOAD_B64 : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>; 2431defm FLAT_LOAD_B96 : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>; 2432defm FLAT_LOAD_B128 : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>; 2433defm FLAT_STORE_B8 : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>; 2434defm FLAT_STORE_B16 : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>; 2435defm FLAT_STORE_B32 : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>; 2436defm FLAT_STORE_B64 : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>; 2437defm FLAT_STORE_B96 : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>; 2438defm FLAT_STORE_B128 : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>; 2439defm FLAT_LOAD_D16_U8 : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">; 2440defm FLAT_LOAD_D16_I8 : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">; 2441defm FLAT_LOAD_D16_B16 : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">; 2442defm FLAT_LOAD_D16_HI_U8 : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">; 2443defm FLAT_LOAD_D16_HI_I8 : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">; 2444defm FLAT_LOAD_D16_HI_B16 : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">; 2445defm FLAT_STORE_D16_HI_B8 : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">; 2446defm FLAT_STORE_D16_HI_B16 : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">; 2447defm FLAT_ATOMIC_SWAP_B32 : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>; 2448defm FLAT_ATOMIC_CMPSWAP_B32 : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>; 2449defm FLAT_ATOMIC_ADD_U32 : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>; 2450defm FLAT_ATOMIC_SUB_U32 : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>; 2451defm FLAT_ATOMIC_MIN_I32 : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>; 2452defm FLAT_ATOMIC_MIN_U32 : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>; 2453defm FLAT_ATOMIC_MAX_I32 : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>; 2454defm FLAT_ATOMIC_MAX_U32 : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>; 2455defm FLAT_ATOMIC_AND_B32 : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>; 2456defm FLAT_ATOMIC_OR_B32 : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>; 2457defm FLAT_ATOMIC_XOR_B32 : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>; 2458defm FLAT_ATOMIC_INC_U32 : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>; 2459defm FLAT_ATOMIC_DEC_U32 : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>; 2460defm FLAT_ATOMIC_SWAP_B64 : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>; 2461defm FLAT_ATOMIC_CMPSWAP_B64 : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>; 2462defm FLAT_ATOMIC_ADD_U64 : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>; 2463defm FLAT_ATOMIC_SUB_U64 : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>; 2464defm FLAT_ATOMIC_MIN_I64 : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>; 2465defm FLAT_ATOMIC_MIN_U64 : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>; 2466defm FLAT_ATOMIC_MAX_I64 : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>; 2467defm FLAT_ATOMIC_MAX_U64 : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>; 2468defm FLAT_ATOMIC_AND_B64 : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>; 2469defm FLAT_ATOMIC_OR_B64 : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>; 2470defm FLAT_ATOMIC_XOR_B64 : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>; 2471defm FLAT_ATOMIC_INC_U64 : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>; 2472defm FLAT_ATOMIC_DEC_U64 : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>; 2473defm FLAT_ATOMIC_CMPSWAP_F32 : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">; 2474defm FLAT_ATOMIC_MIN_F32 : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">; 2475defm FLAT_ATOMIC_MAX_F32 : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">; 2476defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">; 2477 2478// ENC_FLAT_GLBL. 2479defm GLOBAL_LOAD_U8 : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>; 2480defm GLOBAL_LOAD_I8 : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>; 2481defm GLOBAL_LOAD_U16 : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>; 2482defm GLOBAL_LOAD_I16 : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>; 2483defm GLOBAL_LOAD_B32 : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>; 2484defm GLOBAL_LOAD_B64 : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>; 2485defm GLOBAL_LOAD_B96 : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>; 2486defm GLOBAL_LOAD_B128 : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>; 2487defm GLOBAL_STORE_B8 : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>; 2488defm GLOBAL_STORE_B16 : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>; 2489defm GLOBAL_STORE_B32 : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>; 2490defm GLOBAL_STORE_B64 : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>; 2491defm GLOBAL_STORE_B96 : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>; 2492defm GLOBAL_STORE_B128 : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>; 2493defm GLOBAL_LOAD_D16_U8 : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">; 2494defm GLOBAL_LOAD_D16_I8 : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">; 2495defm GLOBAL_LOAD_D16_B16 : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">; 2496defm GLOBAL_LOAD_D16_HI_U8 : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">; 2497defm GLOBAL_LOAD_D16_HI_I8 : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">; 2498defm GLOBAL_LOAD_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">; 2499defm GLOBAL_STORE_D16_HI_B8 : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">; 2500defm GLOBAL_STORE_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">; 2501defm GLOBAL_LOAD_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">; 2502defm GLOBAL_STORE_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">; 2503defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>; 2504defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; 2505defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; 2506defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; 2507defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>; 2508defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; 2509defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; 2510defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; 2511defm GLOBAL_ATOMIC_MAX_U32 : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>; 2512defm GLOBAL_ATOMIC_AND_B32 : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>; 2513defm GLOBAL_ATOMIC_OR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>; 2514defm GLOBAL_ATOMIC_XOR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>; 2515defm GLOBAL_ATOMIC_INC_U32 : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>; 2516defm GLOBAL_ATOMIC_DEC_U32 : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>; 2517defm GLOBAL_ATOMIC_SWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>; 2518defm GLOBAL_ATOMIC_CMPSWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>; 2519defm GLOBAL_ATOMIC_ADD_U64 : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>; 2520defm GLOBAL_ATOMIC_SUB_U64 : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>; 2521defm GLOBAL_ATOMIC_MIN_I64 : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>; 2522defm GLOBAL_ATOMIC_MIN_U64 : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>; 2523defm GLOBAL_ATOMIC_MAX_I64 : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>; 2524defm GLOBAL_ATOMIC_MAX_U64 : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>; 2525defm GLOBAL_ATOMIC_AND_B64 : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>; 2526defm GLOBAL_ATOMIC_OR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>; 2527defm GLOBAL_ATOMIC_XOR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>; 2528defm GLOBAL_ATOMIC_INC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>; 2529defm GLOBAL_ATOMIC_DEC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>; 2530defm GLOBAL_ATOMIC_CMPSWAP_F32 : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">; 2531defm GLOBAL_ATOMIC_MIN_F32 : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">; 2532defm GLOBAL_ATOMIC_MAX_F32 : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">; 2533defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; 2534 2535// ENC_FLAT_SCRATCH. 2536defm SCRATCH_LOAD_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; 2537defm SCRATCH_LOAD_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; 2538defm SCRATCH_LOAD_U16 : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>; 2539defm SCRATCH_LOAD_I16 : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>; 2540defm SCRATCH_LOAD_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>; 2541defm SCRATCH_LOAD_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>; 2542defm SCRATCH_LOAD_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>; 2543defm SCRATCH_LOAD_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>; 2544defm SCRATCH_STORE_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>; 2545defm SCRATCH_STORE_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>; 2546defm SCRATCH_STORE_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>; 2547defm SCRATCH_STORE_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>; 2548defm SCRATCH_STORE_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>; 2549defm SCRATCH_STORE_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>; 2550defm SCRATCH_LOAD_D16_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">; 2551defm SCRATCH_LOAD_D16_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">; 2552defm SCRATCH_LOAD_D16_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">; 2553defm SCRATCH_LOAD_D16_HI_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">; 2554defm SCRATCH_LOAD_D16_HI_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">; 2555defm SCRATCH_LOAD_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">; 2556defm SCRATCH_STORE_D16_HI_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">; 2557defm SCRATCH_STORE_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">; 2558 2559//===----------------------------------------------------------------------===// 2560// GFX12 2561//===----------------------------------------------------------------------===// 2562 2563class VFLAT_Real_gfx12 <bits<8> op, FLAT_Pseudo ps, 2564 string opName = ps.Mnemonic> : 2565 VFLAT_Real <op, ps, opName>, 2566 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX12> { 2567 let AssemblerPredicate = isGFX12Plus; 2568 let DecoderNamespace = "GFX12"; 2569 2570 let Inst{25-24} = !if(ps.is_flat_scratch, 0b01, 2571 !if(ps.is_flat_global, 0b10, 0b00)); 2572} 2573 2574multiclass VFLAT_Aliases_gfx12<string ps, string opName, int renamed, string alias> { 2575 if renamed then 2576 def _renamed_gfx12 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX12Plus]>; 2577 if !not(!empty(alias)) then 2578 def _alias_gfx12 : MnemonicAlias<alias, opName>, Requires<[isGFX12Plus]>; 2579} 2580 2581multiclass VFLAT_Real_Base_gfx12<bits<8> op, string ps = NAME, string opName = !tolower(NAME), 2582 int renamed = false, string alias = ""> : 2583 VFLAT_Aliases_gfx12<ps, opName, renamed, alias> { 2584 def _gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps), opName> { 2585 let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2586 } 2587} 2588 2589multiclass VFLAT_Real_RTN_gfx12<bits<8> op, string ps, string opName> { 2590 def _RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> { 2591 let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2592 } 2593} 2594 2595multiclass VFLAT_Real_SADDR_gfx12<bits<8> op, string ps, string opName> { 2596 def _SADDR_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>; 2597} 2598 2599multiclass VFLAT_Real_SADDR_RTN_gfx12<bits<8> op, string ps, string opName> { 2600 def _SADDR_RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>; 2601} 2602 2603multiclass VFLAT_Real_ST_gfx12<bits<8> op, string ps, string opName> { 2604 def _ST_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> { 2605 let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2606 let OtherPredicates = [HasFlatScratchSTMode]; 2607 } 2608} 2609 2610multiclass VFLAT_Real_SVS_gfx12<bits<8> op, string ps, string opName> { 2611 def _SVS_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> { 2612 let OtherPredicates = [HasFlatScratchSVSMode]; 2613 } 2614} 2615 2616multiclass VFLAT_Real_Atomics_gfx12<bits<8> op, string ps = NAME, string opName = !tolower(NAME), 2617 int renamed = false, string alias = ""> : 2618 VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>, 2619 VFLAT_Real_RTN_gfx12<op, ps, opName>; 2620 2621multiclass VGLOBAL_Real_AllAddr_gfx12<bits<8> op, string ps = NAME, string opName = !tolower(NAME), 2622 int renamed = false, string alias = ""> : 2623 VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>, 2624 VFLAT_Real_SADDR_gfx12<op, ps, opName>; 2625 2626multiclass VGLOBAL_Real_Atomics_gfx12<bits<8> op, string ps = NAME, string opName = !tolower(NAME), 2627 int renamed = false, string alias = ""> : 2628 VGLOBAL_Real_AllAddr_gfx12<op, ps, opName, renamed, alias>, 2629 VFLAT_Real_RTN_gfx12<op, ps, opName>, 2630 VFLAT_Real_SADDR_RTN_gfx12<op, ps, opName>; 2631 2632multiclass VSCRATCH_Real_AllAddr_gfx12<bits<8> op, string ps = NAME, string opName = !tolower(NAME), 2633 int renamed = false> : 2634 VFLAT_Real_Base_gfx12<op, ps, opName, renamed>, 2635 VFLAT_Real_SADDR_gfx12<op, ps, opName>, 2636 VFLAT_Real_ST_gfx12<op, ps, opName>, 2637 VFLAT_Real_SVS_gfx12<op, ps, opName>; 2638 2639// ENC_VFLAT. 2640defm FLAT_LOAD_U8 : VFLAT_Real_Base_gfx12<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>; 2641defm FLAT_LOAD_I8 : VFLAT_Real_Base_gfx12<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>; 2642defm FLAT_LOAD_U16 : VFLAT_Real_Base_gfx12<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>; 2643defm FLAT_LOAD_I16 : VFLAT_Real_Base_gfx12<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>; 2644defm FLAT_LOAD_B32 : VFLAT_Real_Base_gfx12<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>; 2645defm FLAT_LOAD_B64 : VFLAT_Real_Base_gfx12<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>; 2646defm FLAT_LOAD_B96 : VFLAT_Real_Base_gfx12<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>; 2647defm FLAT_LOAD_B128 : VFLAT_Real_Base_gfx12<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>; 2648defm FLAT_STORE_B8 : VFLAT_Real_Base_gfx12<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>; 2649defm FLAT_STORE_B16 : VFLAT_Real_Base_gfx12<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>; 2650defm FLAT_STORE_B32 : VFLAT_Real_Base_gfx12<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>; 2651defm FLAT_STORE_B64 : VFLAT_Real_Base_gfx12<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>; 2652defm FLAT_STORE_B96 : VFLAT_Real_Base_gfx12<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>; 2653defm FLAT_STORE_B128 : VFLAT_Real_Base_gfx12<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>; 2654defm FLAT_LOAD_D16_U8 : VFLAT_Real_Base_gfx12<0x01e, "FLAT_LOAD_UBYTE_D16">; 2655defm FLAT_LOAD_D16_I8 : VFLAT_Real_Base_gfx12<0x01f, "FLAT_LOAD_SBYTE_D16">; 2656defm FLAT_LOAD_D16_B16 : VFLAT_Real_Base_gfx12<0x020, "FLAT_LOAD_SHORT_D16">; 2657defm FLAT_LOAD_D16_HI_U8 : VFLAT_Real_Base_gfx12<0x021, "FLAT_LOAD_UBYTE_D16_HI">; 2658defm FLAT_LOAD_D16_HI_I8 : VFLAT_Real_Base_gfx12<0x022, "FLAT_LOAD_SBYTE_D16_HI">; 2659defm FLAT_LOAD_D16_HI_B16 : VFLAT_Real_Base_gfx12<0x023, "FLAT_LOAD_SHORT_D16_HI">; 2660defm FLAT_STORE_D16_HI_B8 : VFLAT_Real_Base_gfx12<0x024, "FLAT_STORE_BYTE_D16_HI">; 2661defm FLAT_STORE_D16_HI_B16 : VFLAT_Real_Base_gfx12<0x025, "FLAT_STORE_SHORT_D16_HI">; 2662defm FLAT_ATOMIC_SWAP_B32 : VFLAT_Real_Atomics_gfx12<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>; 2663defm FLAT_ATOMIC_CMPSWAP_B32 : VFLAT_Real_Atomics_gfx12<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>; 2664defm FLAT_ATOMIC_ADD_U32 : VFLAT_Real_Atomics_gfx12<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>; 2665defm FLAT_ATOMIC_SUB_U32 : VFLAT_Real_Atomics_gfx12<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>; 2666defm FLAT_ATOMIC_SUB_CLAMP_U32 : VFLAT_Real_Atomics_gfx12<0x037, "FLAT_ATOMIC_CSUB_U32", "flat_atomic_sub_clamp_u32", true>; 2667defm FLAT_ATOMIC_MIN_I32 : VFLAT_Real_Atomics_gfx12<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>; 2668defm FLAT_ATOMIC_MIN_U32 : VFLAT_Real_Atomics_gfx12<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>; 2669defm FLAT_ATOMIC_MAX_I32 : VFLAT_Real_Atomics_gfx12<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>; 2670defm FLAT_ATOMIC_MAX_U32 : VFLAT_Real_Atomics_gfx12<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>; 2671defm FLAT_ATOMIC_AND_B32 : VFLAT_Real_Atomics_gfx12<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>; 2672defm FLAT_ATOMIC_OR_B32 : VFLAT_Real_Atomics_gfx12<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>; 2673defm FLAT_ATOMIC_XOR_B32 : VFLAT_Real_Atomics_gfx12<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>; 2674defm FLAT_ATOMIC_INC_U32 : VFLAT_Real_Atomics_gfx12<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>; 2675defm FLAT_ATOMIC_DEC_U32 : VFLAT_Real_Atomics_gfx12<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>; 2676defm FLAT_ATOMIC_SWAP_B64 : VFLAT_Real_Atomics_gfx12<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>; 2677defm FLAT_ATOMIC_CMPSWAP_B64 : VFLAT_Real_Atomics_gfx12<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>; 2678defm FLAT_ATOMIC_ADD_U64 : VFLAT_Real_Atomics_gfx12<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>; 2679defm FLAT_ATOMIC_SUB_U64 : VFLAT_Real_Atomics_gfx12<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>; 2680defm FLAT_ATOMIC_MIN_I64 : VFLAT_Real_Atomics_gfx12<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>; 2681defm FLAT_ATOMIC_MIN_U64 : VFLAT_Real_Atomics_gfx12<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>; 2682defm FLAT_ATOMIC_MAX_I64 : VFLAT_Real_Atomics_gfx12<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>; 2683defm FLAT_ATOMIC_MAX_U64 : VFLAT_Real_Atomics_gfx12<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>; 2684defm FLAT_ATOMIC_AND_B64 : VFLAT_Real_Atomics_gfx12<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>; 2685defm FLAT_ATOMIC_OR_B64 : VFLAT_Real_Atomics_gfx12<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>; 2686defm FLAT_ATOMIC_XOR_B64 : VFLAT_Real_Atomics_gfx12<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>; 2687defm FLAT_ATOMIC_INC_U64 : VFLAT_Real_Atomics_gfx12<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>; 2688defm FLAT_ATOMIC_DEC_U64 : VFLAT_Real_Atomics_gfx12<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>; 2689defm FLAT_ATOMIC_COND_SUB_U32 : VFLAT_Real_Atomics_gfx12<0x050, "FLAT_ATOMIC_COND_SUB_U32", "flat_atomic_cond_sub_u32">; 2690defm FLAT_ATOMIC_MIN_NUM_F32 : VFLAT_Real_Atomics_gfx12<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_num_f32", true, "flat_atomic_min_f32">; 2691defm FLAT_ATOMIC_MAX_NUM_F32 : VFLAT_Real_Atomics_gfx12<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_num_f32", true, "flat_atomic_max_f32">; 2692defm FLAT_ATOMIC_ADD_F32 : VFLAT_Real_Atomics_gfx12<0x056>; 2693defm FLAT_ATOMIC_PK_ADD_F16 : VFLAT_Real_Atomics_gfx12<0x059>; 2694defm FLAT_ATOMIC_PK_ADD_BF16 : VFLAT_Real_Atomics_gfx12<0x05a>; 2695 2696// ENC_VGLOBAL. 2697defm GLOBAL_LOAD_U8 : VGLOBAL_Real_AllAddr_gfx12<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>; 2698defm GLOBAL_LOAD_I8 : VGLOBAL_Real_AllAddr_gfx12<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>; 2699defm GLOBAL_LOAD_U16 : VGLOBAL_Real_AllAddr_gfx12<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>; 2700defm GLOBAL_LOAD_I16 : VGLOBAL_Real_AllAddr_gfx12<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>; 2701defm GLOBAL_LOAD_B32 : VGLOBAL_Real_AllAddr_gfx12<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>; 2702defm GLOBAL_LOAD_B64 : VGLOBAL_Real_AllAddr_gfx12<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>; 2703defm GLOBAL_LOAD_B96 : VGLOBAL_Real_AllAddr_gfx12<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>; 2704defm GLOBAL_LOAD_B128 : VGLOBAL_Real_AllAddr_gfx12<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>; 2705defm GLOBAL_STORE_B8 : VGLOBAL_Real_AllAddr_gfx12<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>; 2706defm GLOBAL_STORE_B16 : VGLOBAL_Real_AllAddr_gfx12<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>; 2707defm GLOBAL_STORE_B32 : VGLOBAL_Real_AllAddr_gfx12<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>; 2708defm GLOBAL_STORE_B64 : VGLOBAL_Real_AllAddr_gfx12<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>; 2709defm GLOBAL_STORE_B96 : VGLOBAL_Real_AllAddr_gfx12<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>; 2710defm GLOBAL_STORE_B128 : VGLOBAL_Real_AllAddr_gfx12<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>; 2711defm GLOBAL_LOAD_D16_U8 : VGLOBAL_Real_AllAddr_gfx12<0x01e, "GLOBAL_LOAD_UBYTE_D16">; 2712defm GLOBAL_LOAD_D16_I8 : VGLOBAL_Real_AllAddr_gfx12<0x01f, "GLOBAL_LOAD_SBYTE_D16">; 2713defm GLOBAL_LOAD_D16_B16 : VGLOBAL_Real_AllAddr_gfx12<0x020, "GLOBAL_LOAD_SHORT_D16">; 2714defm GLOBAL_LOAD_D16_HI_U8 : VGLOBAL_Real_AllAddr_gfx12<0x021, "GLOBAL_LOAD_UBYTE_D16_HI">; 2715defm GLOBAL_LOAD_D16_HI_I8 : VGLOBAL_Real_AllAddr_gfx12<0x022, "GLOBAL_LOAD_SBYTE_D16_HI">; 2716defm GLOBAL_LOAD_D16_HI_B16 : VGLOBAL_Real_AllAddr_gfx12<0x023, "GLOBAL_LOAD_SHORT_D16_HI">; 2717defm GLOBAL_STORE_D16_HI_B8 : VGLOBAL_Real_AllAddr_gfx12<0x024, "GLOBAL_STORE_BYTE_D16_HI">; 2718defm GLOBAL_STORE_D16_HI_B16 : VGLOBAL_Real_AllAddr_gfx12<0x025, "GLOBAL_STORE_SHORT_D16_HI">; 2719defm GLOBAL_LOAD_ADDTID_B32 : VGLOBAL_Real_AllAddr_gfx12<0x028, "GLOBAL_LOAD_DWORD_ADDTID">; 2720defm GLOBAL_STORE_ADDTID_B32 : VGLOBAL_Real_AllAddr_gfx12<0x029, "GLOBAL_STORE_DWORD_ADDTID">; 2721 2722defm GLOBAL_ATOMIC_SWAP_B32 : VGLOBAL_Real_Atomics_gfx12<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>; 2723defm GLOBAL_ATOMIC_CMPSWAP_B32 : VGLOBAL_Real_Atomics_gfx12<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; 2724defm GLOBAL_ATOMIC_ADD_U32 : VGLOBAL_Real_Atomics_gfx12<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; 2725defm GLOBAL_ATOMIC_SUB_U32 : VGLOBAL_Real_Atomics_gfx12<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; 2726defm GLOBAL_ATOMIC_SUB_CLAMP_U32 : VGLOBAL_Real_Atomics_gfx12<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_sub_clamp_u32", true, "global_atomic_csub_u32">; 2727defm GLOBAL_ATOMIC_MIN_I32 : VGLOBAL_Real_Atomics_gfx12<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; 2728defm GLOBAL_ATOMIC_MIN_U32 : VGLOBAL_Real_Atomics_gfx12<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; 2729defm GLOBAL_ATOMIC_MAX_I32 : VGLOBAL_Real_Atomics_gfx12<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; 2730defm GLOBAL_ATOMIC_MAX_U32 : VGLOBAL_Real_Atomics_gfx12<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>; 2731defm GLOBAL_ATOMIC_AND_B32 : VGLOBAL_Real_Atomics_gfx12<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>; 2732defm GLOBAL_ATOMIC_OR_B32 : VGLOBAL_Real_Atomics_gfx12<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>; 2733defm GLOBAL_ATOMIC_XOR_B32 : VGLOBAL_Real_Atomics_gfx12<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>; 2734defm GLOBAL_ATOMIC_INC_U32 : VGLOBAL_Real_Atomics_gfx12<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>; 2735defm GLOBAL_ATOMIC_DEC_U32 : VGLOBAL_Real_Atomics_gfx12<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>; 2736defm GLOBAL_ATOMIC_SWAP_B64 : VGLOBAL_Real_Atomics_gfx12<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>; 2737defm GLOBAL_ATOMIC_CMPSWAP_B64 : VGLOBAL_Real_Atomics_gfx12<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>; 2738defm GLOBAL_ATOMIC_ADD_U64 : VGLOBAL_Real_Atomics_gfx12<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>; 2739defm GLOBAL_ATOMIC_SUB_U64 : VGLOBAL_Real_Atomics_gfx12<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>; 2740defm GLOBAL_ATOMIC_MIN_I64 : VGLOBAL_Real_Atomics_gfx12<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>; 2741defm GLOBAL_ATOMIC_MIN_U64 : VGLOBAL_Real_Atomics_gfx12<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>; 2742defm GLOBAL_ATOMIC_MAX_I64 : VGLOBAL_Real_Atomics_gfx12<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>; 2743defm GLOBAL_ATOMIC_MAX_U64 : VGLOBAL_Real_Atomics_gfx12<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>; 2744defm GLOBAL_ATOMIC_AND_B64 : VGLOBAL_Real_Atomics_gfx12<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>; 2745defm GLOBAL_ATOMIC_OR_B64 : VGLOBAL_Real_Atomics_gfx12<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>; 2746defm GLOBAL_ATOMIC_XOR_B64 : VGLOBAL_Real_Atomics_gfx12<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>; 2747defm GLOBAL_ATOMIC_INC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>; 2748defm GLOBAL_ATOMIC_DEC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>; 2749defm GLOBAL_ATOMIC_COND_SUB_U32 : VGLOBAL_Real_Atomics_gfx12<0x050, "GLOBAL_ATOMIC_COND_SUB_U32", "global_atomic_cond_sub_u32">; 2750defm GLOBAL_ATOMIC_MIN_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_num_f32", true, "global_atomic_min_f32">; 2751defm GLOBAL_ATOMIC_MAX_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_num_f32", true, "global_atomic_max_f32">; 2752defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056>; 2753 2754let WaveSizePredicate = isWave32, DecoderNamespace = "GFX12" in { 2755 defm GLOBAL_LOAD_TR_B128_w32 : VGLOBAL_Real_AllAddr_gfx12<0x057, "GLOBAL_LOAD_TR_B128_w32", "global_load_tr_b128">; 2756 defm GLOBAL_LOAD_TR_B64_w32 : VGLOBAL_Real_AllAddr_gfx12<0x058, "GLOBAL_LOAD_TR_B64_w32", "global_load_tr_b64">; 2757} 2758 2759let WaveSizePredicate = isWave64, DecoderNamespace = "GFX12W64" in { 2760 defm GLOBAL_LOAD_TR_B128_w64 : VGLOBAL_Real_AllAddr_gfx12<0x057, "GLOBAL_LOAD_TR_B128_w64", "global_load_tr_b128">; 2761 defm GLOBAL_LOAD_TR_B64_w64 : VGLOBAL_Real_AllAddr_gfx12<0x058, "GLOBAL_LOAD_TR_B64_w64", "global_load_tr_b64">; 2762} 2763 2764defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073>; 2765defm GLOBAL_ATOMIC_PK_ADD_F16 : VGLOBAL_Real_Atomics_gfx12<0x059>; 2766defm GLOBAL_ATOMIC_PK_ADD_BF16 : VGLOBAL_Real_Atomics_gfx12<0x05a>; 2767 2768defm GLOBAL_INV : VFLAT_Real_Base_gfx12<0x02b>; 2769defm GLOBAL_WB : VFLAT_Real_Base_gfx12<0x02c>; 2770defm GLOBAL_WBINV : VFLAT_Real_Base_gfx12<0x04f>; 2771 2772// ENC_VSCRATCH. 2773defm SCRATCH_LOAD_U8 : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; 2774defm SCRATCH_LOAD_I8 : VSCRATCH_Real_AllAddr_gfx12<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; 2775defm SCRATCH_LOAD_U16 : VSCRATCH_Real_AllAddr_gfx12<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>; 2776defm SCRATCH_LOAD_I16 : VSCRATCH_Real_AllAddr_gfx12<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>; 2777defm SCRATCH_LOAD_B32 : VSCRATCH_Real_AllAddr_gfx12<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>; 2778defm SCRATCH_LOAD_B64 : VSCRATCH_Real_AllAddr_gfx12<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>; 2779defm SCRATCH_LOAD_B96 : VSCRATCH_Real_AllAddr_gfx12<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>; 2780defm SCRATCH_LOAD_B128 : VSCRATCH_Real_AllAddr_gfx12<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>; 2781defm SCRATCH_STORE_B8 : VSCRATCH_Real_AllAddr_gfx12<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>; 2782defm SCRATCH_STORE_B16 : VSCRATCH_Real_AllAddr_gfx12<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>; 2783defm SCRATCH_STORE_B32 : VSCRATCH_Real_AllAddr_gfx12<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>; 2784defm SCRATCH_STORE_B64 : VSCRATCH_Real_AllAddr_gfx12<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>; 2785defm SCRATCH_STORE_B96 : VSCRATCH_Real_AllAddr_gfx12<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>; 2786defm SCRATCH_STORE_B128 : VSCRATCH_Real_AllAddr_gfx12<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>; 2787defm SCRATCH_LOAD_D16_U8 : VSCRATCH_Real_AllAddr_gfx12<0x1e, "SCRATCH_LOAD_UBYTE_D16">; 2788defm SCRATCH_LOAD_D16_I8 : VSCRATCH_Real_AllAddr_gfx12<0x1f, "SCRATCH_LOAD_SBYTE_D16">; 2789defm SCRATCH_LOAD_D16_B16 : VSCRATCH_Real_AllAddr_gfx12<0x20, "SCRATCH_LOAD_SHORT_D16">; 2790defm SCRATCH_LOAD_D16_HI_U8 : VSCRATCH_Real_AllAddr_gfx12<0x21, "SCRATCH_LOAD_UBYTE_D16_HI">; 2791defm SCRATCH_LOAD_D16_HI_I8 : VSCRATCH_Real_AllAddr_gfx12<0x22, "SCRATCH_LOAD_SBYTE_D16_HI">; 2792defm SCRATCH_LOAD_D16_HI_B16 : VSCRATCH_Real_AllAddr_gfx12<0x23, "SCRATCH_LOAD_SHORT_D16_HI">; 2793defm SCRATCH_STORE_D16_HI_B8 : VSCRATCH_Real_AllAddr_gfx12<0x24, "SCRATCH_STORE_BYTE_D16_HI">; 2794defm SCRATCH_STORE_D16_HI_B16 : VSCRATCH_Real_AllAddr_gfx12<0x25, "SCRATCH_STORE_SHORT_D16_HI">; 2795