1//===-- FLATInstructions.td - FLAT Instruction Definitions ----------------===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9def FlatOffset : ComplexPattern<iPTR, 2, "SelectFlatOffset", [], [SDNPWantRoot], -10>; 10def GlobalOffset : ComplexPattern<iPTR, 2, "SelectGlobalOffset", [], [SDNPWantRoot], -10>; 11def ScratchOffset : ComplexPattern<iPTR, 2, "SelectScratchOffset", [], [SDNPWantRoot], -10>; 12 13def GlobalSAddr : ComplexPattern<iPTR, 3, "SelectGlobalSAddr", [], [SDNPWantRoot], -10>; 14def ScratchSAddr : ComplexPattern<iPTR, 2, "SelectScratchSAddr", [], [SDNPWantRoot], -10>; 15def ScratchSVAddr : ComplexPattern<iPTR, 3, "SelectScratchSVAddr", [], [SDNPWantRoot], -10>; 16 17//===----------------------------------------------------------------------===// 18// FLAT classes 19//===----------------------------------------------------------------------===// 20 21class FLAT_Pseudo<string opName, dag outs, dag ins, 22 string asmOps, list<dag> pattern=[]> : 23 InstSI<outs, ins, "", pattern>, 24 SIMCInstr<opName, SIEncodingFamily.NONE> { 25 26 let isPseudo = 1; 27 let isCodeGenOnly = 1; 28 29 let FLAT = 1; 30 31 let UseNamedOperandTable = 1; 32 let hasSideEffects = 0; 33 let SchedRW = [WriteVMEM]; 34 35 string Mnemonic = opName; 36 string AsmOperands = asmOps; 37 38 bits<1> is_flat_global = 0; 39 bits<1> is_flat_scratch = 0; 40 41 bits<1> has_vdst = 1; 42 43 // We need to distinguish having saddr and enabling saddr because 44 // saddr is only valid for scratch and global instructions. Pre-gfx9 45 // these bits were reserved, so we also don't necessarily want to 46 // set these bits to the disabled value for the original flat 47 // segment instructions. 48 bits<1> has_saddr = 0; 49 bits<1> enabled_saddr = 0; 50 bits<7> saddr_value = 0; 51 bits<1> has_vaddr = 1; 52 53 bits<1> has_data = 1; 54 bits<1> has_glc = 1; 55 bits<1> glcValue = 0; 56 bits<1> has_dlc = 1; 57 bits<1> dlcValue = 0; 58 bits<1> has_sccb = 1; 59 bits<1> sccbValue = 0; 60 bits<1> has_sve = 0; // Scratch VGPR Enable 61 bits<1> lds = 0; 62 bits<1> sve = 0; 63 64 let SubtargetPredicate = !if(is_flat_global, HasFlatGlobalInsts, 65 !if(is_flat_scratch, HasFlatScratchInsts, HasFlatAddressSpace)); 66 67 // TODO: M0 if it could possibly access LDS (before gfx9? only)? 68 let Uses = !if(is_flat_global, [EXEC], [EXEC, FLAT_SCR]); 69 70 // Internally, FLAT instruction are executed as both an LDS and a 71 // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT 72 // and are not considered done until both have been decremented. 73 let VM_CNT = 1; 74 let LGKM_CNT = !not(!or(is_flat_global, is_flat_scratch)); 75 76 let FlatGlobal = is_flat_global; 77 78 let FlatScratch = is_flat_scratch; 79} 80 81class FLAT_Real <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 82 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 83 Enc64 { 84 85 let isPseudo = 0; 86 let isCodeGenOnly = 0; 87 88 let FLAT = 1; 89 90 // copy relevant pseudo op flags 91 let SubtargetPredicate = ps.SubtargetPredicate; 92 let AsmMatchConverter = ps.AsmMatchConverter; 93 let OtherPredicates = ps.OtherPredicates; 94 let TSFlags = ps.TSFlags; 95 let UseNamedOperandTable = ps.UseNamedOperandTable; 96 let SchedRW = ps.SchedRW; 97 let mayLoad = ps.mayLoad; 98 let mayStore = ps.mayStore; 99 let IsAtomicRet = ps.IsAtomicRet; 100 let IsAtomicNoRet = ps.IsAtomicNoRet; 101 let VM_CNT = ps.VM_CNT; 102 let LGKM_CNT = ps.LGKM_CNT; 103 let VALU = ps.VALU; 104 105 // encoding fields 106 bits<8> vaddr; 107 bits<10> vdata; 108 bits<7> saddr; 109 bits<10> vdst; 110 111 bits<5> cpol; 112 113 // Only valid on gfx9 114 bits<1> lds = ps.lds; // LDS DMA for global and scratch 115 116 // Segment, 00=flat, 01=scratch, 10=global, 11=reserved 117 bits<2> seg = !if(ps.is_flat_global, 0b10, 118 !if(ps.is_flat_scratch, 0b01, 0)); 119 120 // Signed offset. Highest bit ignored for flat and treated as 12-bit 121 // unsigned for flat accesses. 122 bits<13> offset; 123 // GFX90A+ only: instruction uses AccVGPR for data 124 bits<1> acc = !if(ps.has_vdst, vdst{9}, !if(ps.has_data, vdata{9}, 0)); 125 126 // We don't use tfe right now, and it was removed in gfx9. 127 bits<1> tfe = 0; 128 129 // Only valid on GFX9+ 130 let Inst{12-0} = offset; 131 let Inst{13} = !if(ps.has_sve, ps.sve, lds); 132 let Inst{15-14} = seg; 133 134 let Inst{16} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 135 let Inst{17} = cpol{CPolBit.SLC}; 136 let Inst{24-18} = op; 137 let Inst{31-26} = 0x37; // Encoding. 138 let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?); 139 let Inst{47-40} = !if(ps.has_data, vdata{7-0}, ?); 140 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 141 142 // 54-48 is reserved. 143 let Inst{55} = acc; // nv on GFX9+, TFE before. AccVGPR for data on GFX90A. 144 let Inst{63-56} = !if(ps.has_vdst, vdst{7-0}, ?); 145} 146 147class VFLAT_Real <bits<8> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 148 InstSI <ps.OutOperandList, ps.InOperandList, opName # ps.AsmOperands, []>, 149 Enc96 { 150 151 let FLAT = 1; 152 153 // copy relevant pseudo op flags 154 let SubtargetPredicate = ps.SubtargetPredicate; 155 let AsmMatchConverter = ps.AsmMatchConverter; 156 let OtherPredicates = ps.OtherPredicates; 157 let TSFlags = ps.TSFlags; 158 let UseNamedOperandTable = ps.UseNamedOperandTable; 159 let SchedRW = ps.SchedRW; 160 let mayLoad = ps.mayLoad; 161 let mayStore = ps.mayStore; 162 let IsAtomicRet = ps.IsAtomicRet; 163 let IsAtomicNoRet = ps.IsAtomicNoRet; 164 let VM_CNT = ps.VM_CNT; 165 let LGKM_CNT = ps.LGKM_CNT; 166 let VALU = ps.VALU; 167 168 bits<7> saddr; 169 bits<8> vdst; 170 bits<6> cpol; 171 bits<8> vdata; // vsrc 172 bits<8> vaddr; 173 bits<24> offset; 174 175 let Inst{6-0} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7f), 0); 176 let Inst{21-14} = op; 177 let Inst{31-26} = 0x3b; 178 let Inst{39-32} = !if(ps.has_vdst, vdst, ?); 179 let Inst{49} = ps.sve; 180 let Inst{54-53} = cpol{2-1}; // th{2-1} 181 let Inst{52} = !if(ps.IsAtomicRet, 1, cpol{0}); // th{0} 182 let Inst{51-50} = cpol{4-3}; // scope 183 let Inst{62-55} = !if(ps.has_data, vdata{7-0}, ?); 184 let Inst{71-64} = !if(ps.has_vaddr, vaddr, ?); 185 let Inst{95-72} = offset; 186} 187 188class GlobalSaddrTable <bit is_saddr, string Name = ""> { 189 bit IsSaddr = is_saddr; 190 string SaddrOp = Name; 191} 192 193// TODO: Is exec allowed for saddr? The disabled value 0x7f is the 194// same encoding value as exec_hi, so it isn't possible to use that if 195// saddr is 32-bit (which isn't handled here yet). 196class FLAT_Load_Pseudo <string opName, RegisterClass regClass, 197 bit HasTiedOutput = 0, 198 bit HasSaddr = 0, bit EnableSaddr = 0, 199 RegisterOperand vdata_op = getLdStRegisterOperand<regClass>.ret> : FLAT_Pseudo< 200 opName, 201 (outs vdata_op:$vdst), 202 !con( 203 !con( 204 !if(EnableSaddr, 205 (ins SReg_64:$saddr, VGPR_32:$vaddr), 206 (ins VReg_64:$vaddr)), 207 (ins flat_offset:$offset)), 208 // FIXME: Operands with default values do not work with following non-optional operands. 209 !if(HasTiedOutput, (ins CPol:$cpol, vdata_op:$vdst_in), 210 (ins CPol_0:$cpol))), 211 " $vdst, $vaddr"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 212 let has_data = 0; 213 let mayLoad = 1; 214 let has_saddr = HasSaddr; 215 let enabled_saddr = EnableSaddr; 216 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 217 let maybeAtomic = 1; 218 219 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 220 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 221} 222 223class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass, 224 bit HasSaddr = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 225 opName, 226 (outs), 227 !con( 228 !if(EnableSaddr, 229 (ins VGPR_32:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata, SReg_64:$saddr), 230 (ins VReg_64:$vaddr, getLdStRegisterOperand<vdataClass>.ret:$vdata)), 231 (ins flat_offset:$offset, CPol_0:$cpol)), 232 " $vaddr, $vdata"#!if(HasSaddr, !if(EnableSaddr, ", $saddr", ", off"), "")#"$offset$cpol"> { 233 let mayLoad = 0; 234 let mayStore = 1; 235 let has_vdst = 0; 236 let has_saddr = HasSaddr; 237 let enabled_saddr = EnableSaddr; 238 let PseudoInstr = opName#!if(!and(HasSaddr, EnableSaddr), "_SADDR", ""); 239 let maybeAtomic = 1; 240} 241 242multiclass FLAT_Global_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedInput = 0> { 243 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 244 def "" : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1>, 245 GlobalSaddrTable<0, opName>; 246 def _SADDR : FLAT_Load_Pseudo<opName, regClass, HasTiedInput, 1, 1>, 247 GlobalSaddrTable<1, opName>; 248 } 249} 250 251class FLAT_Global_Load_AddTid_Pseudo <string opName, RegisterClass regClass, 252 bit HasTiedOutput = 0, bit EnableSaddr = 0> : FLAT_Pseudo< 253 opName, 254 (outs regClass:$vdst), 255 !con(!if(EnableSaddr, (ins SReg_64:$saddr), (ins)), 256 (ins flat_offset:$offset, CPol_0:$cpol), 257 !if(HasTiedOutput, (ins regClass:$vdst_in), (ins))), 258 " $vdst, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 259 let is_flat_global = 1; 260 let has_data = 0; 261 let mayLoad = 1; 262 let has_vaddr = 0; 263 let has_saddr = 1; 264 let enabled_saddr = EnableSaddr; 265 let maybeAtomic = 1; 266 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 267 268 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 269 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 270} 271 272multiclass FLAT_Global_Load_AddTid_Pseudo<string opName, RegisterClass regClass, 273 bit HasTiedOutput = 0> { 274 def "" : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput>, 275 GlobalSaddrTable<0, opName>; 276 def _SADDR : FLAT_Global_Load_AddTid_Pseudo<opName, regClass, HasTiedOutput, 1>, 277 GlobalSaddrTable<1, opName>; 278} 279 280multiclass FLAT_Global_Store_Pseudo<string opName, RegisterClass regClass> { 281 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 282 def "" : FLAT_Store_Pseudo<opName, regClass, 1>, 283 GlobalSaddrTable<0, opName>; 284 def _SADDR : FLAT_Store_Pseudo<opName, regClass, 1, 1>, 285 GlobalSaddrTable<1, opName>; 286 } 287} 288 289class FLAT_Global_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0> : FLAT_Pseudo< 290 opName, 291 (outs ), 292 !con( 293 !if(EnableSaddr, (ins SReg_64:$saddr, VGPR_32:$vaddr), (ins VReg_64:$vaddr)), 294 (ins flat_offset:$offset, CPol_0:$cpol)), 295 " $vaddr"#!if(EnableSaddr, ", $saddr", ", off")#"$offset$cpol"> { 296 let LGKM_CNT = 1; 297 let is_flat_global = 1; 298 let lds = 1; 299 let has_data = 0; 300 let has_vdst = 0; 301 let mayLoad = 1; 302 let mayStore = 1; 303 let has_saddr = 1; 304 let enabled_saddr = EnableSaddr; 305 let VALU = 1; 306 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 307 let Uses = [M0, EXEC]; 308 let SchedRW = [WriteVMEM, WriteLDS]; 309} 310 311multiclass FLAT_Global_Load_LDS_Pseudo<string opName> { 312 def "" : FLAT_Global_Load_LDS_Pseudo<opName>, 313 GlobalSaddrTable<0, opName>; 314 def _SADDR : FLAT_Global_Load_LDS_Pseudo<opName, 1>, 315 GlobalSaddrTable<1, opName>; 316} 317 318class FLAT_Global_Store_AddTid_Pseudo <string opName, RegisterClass vdataClass, 319 bit EnableSaddr = 0> : FLAT_Pseudo< 320 opName, 321 (outs), 322 !con(!if(EnableSaddr, (ins vdataClass:$vdata, SReg_64:$saddr), (ins vdataClass:$vdata)), 323 (ins flat_offset:$offset, CPol:$cpol)), 324 " $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 325 let is_flat_global = 1; 326 let mayLoad = 0; 327 let mayStore = 1; 328 let has_vdst = 0; 329 let has_vaddr = 0; 330 let has_saddr = 1; 331 let enabled_saddr = EnableSaddr; 332 let maybeAtomic = 1; 333 let PseudoInstr = opName#!if(EnableSaddr, "_SADDR", ""); 334} 335 336multiclass FLAT_Global_Store_AddTid_Pseudo<string opName, RegisterClass regClass> { 337 def "" : FLAT_Global_Store_AddTid_Pseudo<opName, regClass>, 338 GlobalSaddrTable<0, opName>; 339 def _SADDR : FLAT_Global_Store_AddTid_Pseudo<opName, regClass, 1>, 340 GlobalSaddrTable<1, opName>; 341} 342 343class FlatScratchInst <string sv_op, string mode> { 344 string SVOp = sv_op; 345 string Mode = mode; 346} 347 348class FLAT_Scratch_Load_Pseudo <string opName, RegisterClass regClass, 349 bit HasTiedOutput = 0, 350 bit EnableSaddr = 0, 351 bit EnableSVE = 0, 352 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> 353 : FLAT_Pseudo< 354 opName, 355 (outs getLdStRegisterOperand<regClass>.ret:$vdst), 356 !con( 357 !if(EnableSVE, 358 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 359 !if(EnableSaddr, 360 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset), 361 !if(EnableVaddr, 362 (ins VGPR_32:$vaddr, flat_offset:$offset), 363 (ins flat_offset:$offset)))), 364 !if(HasTiedOutput, (ins CPol:$cpol, getLdStRegisterOperand<regClass>.ret:$vdst_in), 365 (ins CPol_0:$cpol))), 366 " $vdst, "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 367 let has_data = 0; 368 let mayLoad = 1; 369 let has_saddr = 1; 370 let enabled_saddr = EnableSaddr; 371 let has_vaddr = EnableVaddr; 372 let has_sve = EnableSVE; 373 let sve = EnableVaddr; 374 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 375 let maybeAtomic = 1; 376 377 let Constraints = !if(HasTiedOutput, "$vdst = $vdst_in", ""); 378 let DisableEncoding = !if(HasTiedOutput, "$vdst_in", ""); 379} 380 381class FLAT_Scratch_Store_Pseudo <string opName, RegisterClass vdataClass, bit EnableSaddr = 0, 382 bit EnableSVE = 0, 383 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr)), 384 RegisterOperand vdata_op = getLdStRegisterOperand<vdataClass>.ret> : FLAT_Pseudo< 385 opName, 386 (outs), 387 !if(EnableSVE, 388 (ins vdata_op:$vdata, VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 389 !if(EnableSaddr, 390 (ins vdata_op:$vdata, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol_0:$cpol), 391 !if(EnableVaddr, 392 (ins vdata_op:$vdata, VGPR_32:$vaddr, flat_offset:$offset, CPol_0:$cpol), 393 (ins vdata_op:$vdata, flat_offset:$offset, CPol_0:$cpol)))), 394 " "#!if(EnableVaddr, "$vaddr", "off")#", $vdata, "#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 395 let mayLoad = 0; 396 let mayStore = 1; 397 let has_vdst = 0; 398 let has_saddr = 1; 399 let enabled_saddr = EnableSaddr; 400 let has_vaddr = EnableVaddr; 401 let has_sve = EnableSVE; 402 let sve = EnableVaddr; 403 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 404 let maybeAtomic = 1; 405} 406 407multiclass FLAT_Scratch_Load_Pseudo<string opName, RegisterClass regClass, bit HasTiedOutput = 0> { 408 let is_flat_scratch = 1 in { 409 def "" : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput>, 410 FlatScratchInst<opName, "SV">; 411 def _SADDR : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1>, 412 FlatScratchInst<opName, "SS">; 413 414 let SubtargetPredicate = HasFlatScratchSVSMode in 415 def _SVS : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 1, 1>, 416 FlatScratchInst<opName, "SVS">; 417 418 let SubtargetPredicate = HasFlatScratchSTMode in 419 def _ST : FLAT_Scratch_Load_Pseudo<opName, regClass, HasTiedOutput, 0, 0, 0>, 420 FlatScratchInst<opName, "ST">; 421 } 422} 423 424multiclass FLAT_Scratch_Store_Pseudo<string opName, RegisterClass regClass> { 425 let is_flat_scratch = 1 in { 426 def "" : FLAT_Scratch_Store_Pseudo<opName, regClass>, 427 FlatScratchInst<opName, "SV">; 428 def _SADDR : FLAT_Scratch_Store_Pseudo<opName, regClass, 1>, 429 FlatScratchInst<opName, "SS">; 430 431 let SubtargetPredicate = HasFlatScratchSVSMode in 432 def _SVS : FLAT_Scratch_Store_Pseudo<opName, regClass, 1, 1>, 433 FlatScratchInst<opName, "SVS">; 434 435 let SubtargetPredicate = HasFlatScratchSTMode in 436 def _ST : FLAT_Scratch_Store_Pseudo<opName, regClass, 0, 0, 0>, 437 FlatScratchInst<opName, "ST">; 438 } 439} 440 441class FLAT_Scratch_Load_LDS_Pseudo <string opName, bit EnableSaddr = 0, 442 bit EnableSVE = 0, 443 bit EnableVaddr = !or(EnableSVE, !not(EnableSaddr))> : FLAT_Pseudo< 444 opName, 445 (outs ), 446 !if(EnableSVE, 447 (ins VGPR_32:$vaddr, SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 448 !if(EnableSaddr, 449 (ins SReg_32_XEXEC_HI:$saddr, flat_offset:$offset, CPol:$cpol), 450 !if(EnableVaddr, 451 (ins VGPR_32:$vaddr, flat_offset:$offset, CPol:$cpol), 452 (ins flat_offset:$offset, CPol:$cpol)))), 453 " "#!if(EnableVaddr, "$vaddr, ", "off, ")#!if(EnableSaddr, "$saddr", "off")#"$offset$cpol"> { 454 455 let LGKM_CNT = 1; 456 let is_flat_scratch = 1; 457 let lds = 1; 458 let has_data = 0; 459 let has_vdst = 0; 460 let mayLoad = 1; 461 let mayStore = 1; 462 let has_saddr = 1; 463 let enabled_saddr = EnableSaddr; 464 let has_vaddr = EnableVaddr; 465 let has_sve = EnableSVE; 466 let sve = EnableVaddr; 467 let VALU = 1; 468 let PseudoInstr = opName#!if(EnableSVE, "_SVS", !if(EnableSaddr, "_SADDR", !if(EnableVaddr, "", "_ST"))); 469 let Uses = [M0, EXEC]; 470 let SchedRW = [WriteVMEM, WriteLDS]; 471} 472 473multiclass FLAT_Scratch_Load_LDS_Pseudo<string opName> { 474 def "" : FLAT_Scratch_Load_LDS_Pseudo<opName>, 475 FlatScratchInst<opName, "SV">; 476 def _SADDR : FLAT_Scratch_Load_LDS_Pseudo<opName, 1>, 477 FlatScratchInst<opName, "SS">; 478 def _SVS : FLAT_Scratch_Load_LDS_Pseudo<opName, 1, 1>, 479 FlatScratchInst<opName, "SVS">; 480 def _ST : FLAT_Scratch_Load_LDS_Pseudo<opName, 0, 0, 0>, 481 FlatScratchInst<opName, "ST">; 482} 483 484class FLAT_AtomicNoRet_Pseudo<string opName, dag outs, dag ins, 485 string asm, list<dag> pattern = []> : 486 FLAT_Pseudo<opName, outs, ins, asm, pattern> { 487 let mayLoad = 1; 488 let mayStore = 1; 489 let has_glc = 0; 490 let glcValue = 0; 491 let has_vdst = 0; 492 let has_sccb = 1; 493 let sccbValue = 0; 494 let maybeAtomic = 1; 495 let IsAtomicNoRet = 1; 496} 497 498class FLAT_AtomicRet_Pseudo<string opName, dag outs, dag ins, 499 string asm, list<dag> pattern = []> 500 : FLAT_AtomicNoRet_Pseudo<opName, outs, ins, asm, pattern> { 501 let hasPostISelHook = 1; 502 let has_vdst = 1; 503 let glcValue = 1; 504 let sccbValue = 0; 505 let IsAtomicNoRet = 0; 506 let IsAtomicRet = 1; 507 let PseudoInstr = NAME # "_RTN"; 508} 509 510multiclass FLAT_Atomic_Pseudo_NO_RTN< 511 string opName, 512 RegisterClass vdst_rc, 513 ValueType vt, 514 ValueType data_vt = vt, 515 RegisterClass data_rc = vdst_rc, 516 bit isFP = isFloatType<data_vt>.ret, 517 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 518 def "" : FLAT_AtomicNoRet_Pseudo <opName, 519 (outs), 520 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 521 " $vaddr, $vdata$offset$cpol">, 522 GlobalSaddrTable<0, opName>, 523 AtomicNoRet <opName, 0> { 524 let PseudoInstr = NAME; 525 let FPAtomic = isFP; 526 let AddedComplexity = -1; // Prefer global atomics if available 527 } 528} 529 530multiclass FLAT_Atomic_Pseudo_RTN< 531 string opName, 532 RegisterClass vdst_rc, 533 ValueType vt, 534 ValueType data_vt = vt, 535 RegisterClass data_rc = vdst_rc, 536 bit isFP = isFloatType<data_vt>.ret, 537 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 538 def _RTN : FLAT_AtomicRet_Pseudo <opName, 539 (outs getLdStRegisterOperand<vdst_rc>.ret:$vdst), 540 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 541 " $vdst, $vaddr, $vdata$offset$cpol">, 542 GlobalSaddrTable<0, opName#"_rtn">, 543 AtomicNoRet <opName, 1> { 544 let FPAtomic = isFP; 545 let AddedComplexity = -1; // Prefer global atomics if available 546 } 547} 548 549multiclass FLAT_Atomic_Pseudo< 550 string opName, 551 RegisterClass vdst_rc, 552 ValueType vt, 553 ValueType data_vt = vt, 554 RegisterClass data_rc = vdst_rc, 555 bit isFP = isFloatType<data_vt>.ret, 556 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 557 defm "" : FLAT_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>; 558 defm "" : FLAT_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc, isFP, data_op>; 559} 560 561multiclass FLAT_Global_Atomic_Pseudo_NO_RTN< 562 string opName, 563 RegisterClass vdst_rc, 564 ValueType vt, 565 ValueType data_vt = vt, 566 RegisterClass data_rc = vdst_rc, 567 bit isFP = isFloatType<data_vt>.ret, 568 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret> { 569 570 def "" : FLAT_AtomicNoRet_Pseudo <opName, 571 (outs), 572 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_0:$cpol), 573 " $vaddr, $vdata, off$offset$cpol">, 574 GlobalSaddrTable<0, opName>, 575 AtomicNoRet <opName, 0> { 576 let has_saddr = 1; 577 let PseudoInstr = NAME; 578 let FPAtomic = isFP; 579 } 580 581 def _SADDR : FLAT_AtomicNoRet_Pseudo <opName, 582 (outs), 583 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_0:$cpol), 584 " $vaddr, $vdata, $saddr$offset$cpol">, 585 GlobalSaddrTable<1, opName>, 586 AtomicNoRet <opName#"_saddr", 0> { 587 let has_saddr = 1; 588 let enabled_saddr = 1; 589 let PseudoInstr = NAME#"_SADDR"; 590 let FPAtomic = isFP; 591 } 592} 593 594multiclass FLAT_Global_Atomic_Pseudo_RTN< 595 string opName, 596 RegisterClass vdst_rc, 597 ValueType vt, 598 ValueType data_vt = vt, 599 RegisterClass data_rc = vdst_rc, 600 bit isFP = isFloatType<data_vt>.ret, 601 RegisterOperand data_op = getLdStRegisterOperand<data_rc>.ret, 602 RegisterOperand vdst_op = getLdStRegisterOperand<vdst_rc>.ret> { 603 604 def _RTN : FLAT_AtomicRet_Pseudo <opName, 605 (outs vdst_op:$vdst), 606 (ins VReg_64:$vaddr, data_op:$vdata, flat_offset:$offset, CPol_GLC1:$cpol), 607 " $vdst, $vaddr, $vdata, off$offset$cpol">, 608 GlobalSaddrTable<0, opName#"_rtn">, 609 AtomicNoRet <opName, 1> { 610 let has_saddr = 1; 611 let FPAtomic = isFP; 612 } 613 614 def _SADDR_RTN : FLAT_AtomicRet_Pseudo <opName, 615 (outs vdst_op:$vdst), 616 (ins VGPR_32:$vaddr, data_op:$vdata, SReg_64:$saddr, flat_offset:$offset, CPol_GLC1:$cpol), 617 " $vdst, $vaddr, $vdata, $saddr$offset$cpol">, 618 GlobalSaddrTable<1, opName#"_rtn">, 619 AtomicNoRet <opName#"_saddr", 1> { 620 let has_saddr = 1; 621 let enabled_saddr = 1; 622 let PseudoInstr = NAME#"_SADDR_RTN"; 623 let FPAtomic = isFP; 624 } 625} 626 627multiclass FLAT_Global_Atomic_Pseudo< 628 string opName, 629 RegisterClass vdst_rc, 630 ValueType vt, 631 ValueType data_vt = vt, 632 RegisterClass data_rc = vdst_rc> { 633 let is_flat_global = 1, SubtargetPredicate = HasFlatGlobalInsts in { 634 defm "" : FLAT_Global_Atomic_Pseudo_NO_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 635 defm "" : FLAT_Global_Atomic_Pseudo_RTN<opName, vdst_rc, vt, data_vt, data_rc>; 636 } 637} 638 639//===----------------------------------------------------------------------===// 640// Flat Instructions 641//===----------------------------------------------------------------------===// 642 643def FLAT_LOAD_UBYTE : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>; 644def FLAT_LOAD_SBYTE : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>; 645def FLAT_LOAD_USHORT : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>; 646def FLAT_LOAD_SSHORT : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>; 647def FLAT_LOAD_DWORD : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>; 648def FLAT_LOAD_DWORDX2 : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>; 649def FLAT_LOAD_DWORDX4 : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>; 650def FLAT_LOAD_DWORDX3 : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>; 651 652def FLAT_STORE_BYTE : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>; 653def FLAT_STORE_SHORT : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>; 654def FLAT_STORE_DWORD : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>; 655def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>; 656def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>; 657def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>; 658 659let SubtargetPredicate = HasD16LoadStore in { 660let TiedSourceNotRead = 1 in { 661def FLAT_LOAD_UBYTE_D16 : FLAT_Load_Pseudo <"flat_load_ubyte_d16", VGPR_32, 1>; 662def FLAT_LOAD_UBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_ubyte_d16_hi", VGPR_32, 1>; 663def FLAT_LOAD_SBYTE_D16 : FLAT_Load_Pseudo <"flat_load_sbyte_d16", VGPR_32, 1>; 664def FLAT_LOAD_SBYTE_D16_HI : FLAT_Load_Pseudo <"flat_load_sbyte_d16_hi", VGPR_32, 1>; 665def FLAT_LOAD_SHORT_D16 : FLAT_Load_Pseudo <"flat_load_short_d16", VGPR_32, 1>; 666def FLAT_LOAD_SHORT_D16_HI : FLAT_Load_Pseudo <"flat_load_short_d16_hi", VGPR_32, 1>; 667} 668 669def FLAT_STORE_BYTE_D16_HI : FLAT_Store_Pseudo <"flat_store_byte_d16_hi", VGPR_32>; 670def FLAT_STORE_SHORT_D16_HI : FLAT_Store_Pseudo <"flat_store_short_d16_hi", VGPR_32>; 671} 672 673defm FLAT_ATOMIC_CMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap", 674 VGPR_32, i32, v2i32, VReg_64>; 675 676defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2", 677 VReg_64, i64, v2i64, VReg_128>; 678 679defm FLAT_ATOMIC_SWAP : FLAT_Atomic_Pseudo <"flat_atomic_swap", 680 VGPR_32, i32>; 681 682defm FLAT_ATOMIC_SWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2", 683 VReg_64, i64>; 684 685defm FLAT_ATOMIC_ADD : FLAT_Atomic_Pseudo <"flat_atomic_add", 686 VGPR_32, i32>; 687 688defm FLAT_ATOMIC_SUB : FLAT_Atomic_Pseudo <"flat_atomic_sub", 689 VGPR_32, i32>; 690 691defm FLAT_ATOMIC_SMIN : FLAT_Atomic_Pseudo <"flat_atomic_smin", 692 VGPR_32, i32>; 693 694defm FLAT_ATOMIC_UMIN : FLAT_Atomic_Pseudo <"flat_atomic_umin", 695 VGPR_32, i32>; 696 697defm FLAT_ATOMIC_SMAX : FLAT_Atomic_Pseudo <"flat_atomic_smax", 698 VGPR_32, i32>; 699 700defm FLAT_ATOMIC_UMAX : FLAT_Atomic_Pseudo <"flat_atomic_umax", 701 VGPR_32, i32>; 702 703defm FLAT_ATOMIC_AND : FLAT_Atomic_Pseudo <"flat_atomic_and", 704 VGPR_32, i32>; 705 706defm FLAT_ATOMIC_OR : FLAT_Atomic_Pseudo <"flat_atomic_or", 707 VGPR_32, i32>; 708 709defm FLAT_ATOMIC_XOR : FLAT_Atomic_Pseudo <"flat_atomic_xor", 710 VGPR_32, i32>; 711 712defm FLAT_ATOMIC_INC : FLAT_Atomic_Pseudo <"flat_atomic_inc", 713 VGPR_32, i32>; 714 715defm FLAT_ATOMIC_DEC : FLAT_Atomic_Pseudo <"flat_atomic_dec", 716 VGPR_32, i32>; 717 718defm FLAT_ATOMIC_ADD_X2 : FLAT_Atomic_Pseudo <"flat_atomic_add_x2", 719 VReg_64, i64>; 720 721defm FLAT_ATOMIC_SUB_X2 : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2", 722 VReg_64, i64>; 723 724defm FLAT_ATOMIC_SMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2", 725 VReg_64, i64>; 726 727defm FLAT_ATOMIC_UMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2", 728 VReg_64, i64>; 729 730defm FLAT_ATOMIC_SMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2", 731 VReg_64, i64>; 732 733defm FLAT_ATOMIC_UMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2", 734 VReg_64, i64>; 735 736defm FLAT_ATOMIC_AND_X2 : FLAT_Atomic_Pseudo <"flat_atomic_and_x2", 737 VReg_64, i64>; 738 739defm FLAT_ATOMIC_OR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_or_x2", 740 VReg_64, i64>; 741 742defm FLAT_ATOMIC_XOR_X2 : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2", 743 VReg_64, i64>; 744 745defm FLAT_ATOMIC_INC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2", 746 VReg_64, i64>; 747 748defm FLAT_ATOMIC_DEC_X2 : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2", 749 VReg_64, i64>; 750 751// GFX7-, GFX10-only flat instructions. 752let SubtargetPredicate = isGFX7GFX10 in { 753 754defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2", 755 VReg_64, f64, v2f64, VReg_128>; 756 757defm FLAT_ATOMIC_FMIN_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2", 758 VReg_64, f64>; 759 760defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", 761 VReg_64, f64>; 762 763} // End SubtargetPredicate = isGFX7GFX10 764 765let SubtargetPredicate = isGFX90APlus in { 766 defm FLAT_ATOMIC_ADD_F64 : FLAT_Atomic_Pseudo<"flat_atomic_add_f64", VReg_64, f64>; 767 defm FLAT_ATOMIC_MIN_F64 : FLAT_Atomic_Pseudo<"flat_atomic_min_f64", VReg_64, f64>; 768 defm FLAT_ATOMIC_MAX_F64 : FLAT_Atomic_Pseudo<"flat_atomic_max_f64", VReg_64, f64>; 769 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_add_f64", VReg_64, f64>; 770 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_min_f64", VReg_64, f64>; 771 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Atomic_Pseudo<"global_atomic_max_f64", VReg_64, f64>; 772} // End SubtargetPredicate = isGFX90APlus 773 774let SubtargetPredicate = HasAtomicFlatPkAdd16Insts in { 775 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_f16", VGPR_32, v2f16>; 776 let FPAtomic = 1 in 777 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Atomic_Pseudo<"flat_atomic_pk_add_bf16", VGPR_32, v2i16>; 778} // End SubtargetPredicate = HasAtomicFlatPkAdd16Insts 779 780let SubtargetPredicate = HasAtomicGlobalPkAddBF16Inst, FPAtomic = 1 in 781 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Atomic_Pseudo<"global_atomic_pk_add_bf16", VGPR_32, v2i16>; 782 783// GFX7-, GFX10-, GFX11-only flat instructions. 784let SubtargetPredicate = isGFX7GFX10GFX11 in { 785 786defm FLAT_ATOMIC_FCMPSWAP : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap", 787 VGPR_32, f32, v2f32, VReg_64>; 788 789defm FLAT_ATOMIC_FMIN : FLAT_Atomic_Pseudo <"flat_atomic_fmin", 790 VGPR_32, f32>; 791 792defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax", 793 VGPR_32, f32>; 794 795} // End SubtargetPredicate = isGFX7GFX10GFX11 796 797// GFX940-, GFX11-only flat instructions. 798let SubtargetPredicate = HasFlatAtomicFaddF32Inst in { 799 defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>; 800} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst 801 802let SubtargetPredicate = isGFX12Plus in { 803 defm FLAT_ATOMIC_CSUB_U32 : FLAT_Atomic_Pseudo <"flat_atomic_csub_u32", VGPR_32, i32>; 804} // End SubtargetPredicate = isGFX12Plus 805 806defm GLOBAL_LOAD_UBYTE : FLAT_Global_Load_Pseudo <"global_load_ubyte", VGPR_32>; 807defm GLOBAL_LOAD_SBYTE : FLAT_Global_Load_Pseudo <"global_load_sbyte", VGPR_32>; 808defm GLOBAL_LOAD_USHORT : FLAT_Global_Load_Pseudo <"global_load_ushort", VGPR_32>; 809defm GLOBAL_LOAD_SSHORT : FLAT_Global_Load_Pseudo <"global_load_sshort", VGPR_32>; 810defm GLOBAL_LOAD_DWORD : FLAT_Global_Load_Pseudo <"global_load_dword", VGPR_32>; 811defm GLOBAL_LOAD_DWORDX2 : FLAT_Global_Load_Pseudo <"global_load_dwordx2", VReg_64>; 812defm GLOBAL_LOAD_DWORDX3 : FLAT_Global_Load_Pseudo <"global_load_dwordx3", VReg_96>; 813defm GLOBAL_LOAD_DWORDX4 : FLAT_Global_Load_Pseudo <"global_load_dwordx4", VReg_128>; 814 815let TiedSourceNotRead = 1 in { 816defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16", VGPR_32, 1>; 817defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_ubyte_d16_hi", VGPR_32, 1>; 818defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16", VGPR_32, 1>; 819defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Global_Load_Pseudo <"global_load_sbyte_d16_hi", VGPR_32, 1>; 820defm GLOBAL_LOAD_SHORT_D16 : FLAT_Global_Load_Pseudo <"global_load_short_d16", VGPR_32, 1>; 821defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Global_Load_Pseudo <"global_load_short_d16_hi", VGPR_32, 1>; 822} 823 824let OtherPredicates = [HasGFX10_BEncoding] in 825defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Global_Load_AddTid_Pseudo <"global_load_dword_addtid", VGPR_32>; 826 827defm GLOBAL_STORE_BYTE : FLAT_Global_Store_Pseudo <"global_store_byte", VGPR_32>; 828defm GLOBAL_STORE_SHORT : FLAT_Global_Store_Pseudo <"global_store_short", VGPR_32>; 829defm GLOBAL_STORE_DWORD : FLAT_Global_Store_Pseudo <"global_store_dword", VGPR_32>; 830defm GLOBAL_STORE_DWORDX2 : FLAT_Global_Store_Pseudo <"global_store_dwordx2", VReg_64>; 831defm GLOBAL_STORE_DWORDX3 : FLAT_Global_Store_Pseudo <"global_store_dwordx3", VReg_96>; 832defm GLOBAL_STORE_DWORDX4 : FLAT_Global_Store_Pseudo <"global_store_dwordx4", VReg_128>; 833let OtherPredicates = [HasGFX10_BEncoding] in 834defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Global_Store_AddTid_Pseudo <"global_store_dword_addtid", VGPR_32>; 835 836defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Global_Store_Pseudo <"global_store_byte_d16_hi", VGPR_32>; 837defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Global_Store_Pseudo <"global_store_short_d16_hi", VGPR_32>; 838 839let is_flat_global = 1 in { 840defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap", 841 VGPR_32, i32, v2i32, VReg_64>; 842 843defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_cmpswap_x2", 844 VReg_64, i64, v2i64, VReg_128>; 845 846defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Atomic_Pseudo <"global_atomic_swap", 847 VGPR_32, i32>; 848 849defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_swap_x2", 850 VReg_64, i64>; 851 852defm GLOBAL_ATOMIC_ADD : FLAT_Global_Atomic_Pseudo <"global_atomic_add", 853 VGPR_32, i32>; 854 855defm GLOBAL_ATOMIC_SUB : FLAT_Global_Atomic_Pseudo <"global_atomic_sub", 856 VGPR_32, i32>; 857 858defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_smin", 859 VGPR_32, i32>; 860 861defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Atomic_Pseudo <"global_atomic_umin", 862 VGPR_32, i32>; 863 864defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_smax", 865 VGPR_32, i32>; 866 867defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Atomic_Pseudo <"global_atomic_umax", 868 VGPR_32, i32>; 869 870defm GLOBAL_ATOMIC_AND : FLAT_Global_Atomic_Pseudo <"global_atomic_and", 871 VGPR_32, i32>; 872 873defm GLOBAL_ATOMIC_OR : FLAT_Global_Atomic_Pseudo <"global_atomic_or", 874 VGPR_32, i32>; 875 876defm GLOBAL_ATOMIC_XOR : FLAT_Global_Atomic_Pseudo <"global_atomic_xor", 877 VGPR_32, i32>; 878 879defm GLOBAL_ATOMIC_INC : FLAT_Global_Atomic_Pseudo <"global_atomic_inc", 880 VGPR_32, i32>; 881 882defm GLOBAL_ATOMIC_DEC : FLAT_Global_Atomic_Pseudo <"global_atomic_dec", 883 VGPR_32, i32>; 884 885defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_add_x2", 886 VReg_64, i64>; 887 888defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_sub_x2", 889 VReg_64, i64>; 890 891defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smin_x2", 892 VReg_64, i64>; 893 894defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umin_x2", 895 VReg_64, i64>; 896 897defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_smax_x2", 898 VReg_64, i64>; 899 900defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_umax_x2", 901 VReg_64, i64>; 902 903defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_and_x2", 904 VReg_64, i64>; 905 906defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_or_x2", 907 VReg_64, i64>; 908 909defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_xor_x2", 910 VReg_64, i64>; 911 912defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_inc_x2", 913 VReg_64, i64>; 914 915defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Atomic_Pseudo <"global_atomic_dec_x2", 916 VReg_64, i64>; 917 918let SubtargetPredicate = HasGFX10_BEncoding in { 919 defm GLOBAL_ATOMIC_CSUB : FLAT_Global_Atomic_Pseudo <"global_atomic_csub", 920 VGPR_32, i32>; 921} 922 923defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ubyte">; 924defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sbyte">; 925defm GLOBAL_LOAD_LDS_USHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_ushort">; 926defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_sshort">; 927defm GLOBAL_LOAD_LDS_DWORD : FLAT_Global_Load_LDS_Pseudo <"global_load_lds_dword">; 928 929let SubtargetPredicate = isGFX12Plus in { 930 defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : FLAT_Global_Atomic_Pseudo <"global_atomic_ordered_add_b64", VReg_64, i64>; 931} // End SubtargetPredicate = isGFX12Plus 932 933} // End is_flat_global = 1 934 935let SubtargetPredicate = HasFlatScratchInsts in { 936defm SCRATCH_LOAD_UBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte", VGPR_32>; 937defm SCRATCH_LOAD_SBYTE : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte", VGPR_32>; 938defm SCRATCH_LOAD_USHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_ushort", VGPR_32>; 939defm SCRATCH_LOAD_SSHORT : FLAT_Scratch_Load_Pseudo <"scratch_load_sshort", VGPR_32>; 940defm SCRATCH_LOAD_DWORD : FLAT_Scratch_Load_Pseudo <"scratch_load_dword", VGPR_32>; 941defm SCRATCH_LOAD_DWORDX2 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx2", VReg_64>; 942defm SCRATCH_LOAD_DWORDX3 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx3", VReg_96>; 943defm SCRATCH_LOAD_DWORDX4 : FLAT_Scratch_Load_Pseudo <"scratch_load_dwordx4", VReg_128>; 944 945let TiedSourceNotRead = 1 in { 946defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16", VGPR_32, 1>; 947defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_ubyte_d16_hi", VGPR_32, 1>; 948defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16", VGPR_32, 1>; 949defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_sbyte_d16_hi", VGPR_32, 1>; 950defm SCRATCH_LOAD_SHORT_D16 : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16", VGPR_32, 1>; 951defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Scratch_Load_Pseudo <"scratch_load_short_d16_hi", VGPR_32, 1>; 952} 953 954defm SCRATCH_STORE_BYTE : FLAT_Scratch_Store_Pseudo <"scratch_store_byte", VGPR_32>; 955defm SCRATCH_STORE_SHORT : FLAT_Scratch_Store_Pseudo <"scratch_store_short", VGPR_32>; 956defm SCRATCH_STORE_DWORD : FLAT_Scratch_Store_Pseudo <"scratch_store_dword", VGPR_32>; 957defm SCRATCH_STORE_DWORDX2 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx2", VReg_64>; 958defm SCRATCH_STORE_DWORDX3 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx3", VReg_96>; 959defm SCRATCH_STORE_DWORDX4 : FLAT_Scratch_Store_Pseudo <"scratch_store_dwordx4", VReg_128>; 960 961defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_byte_d16_hi", VGPR_32>; 962defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Scratch_Store_Pseudo <"scratch_store_short_d16_hi", VGPR_32>; 963 964defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ubyte">; 965defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sbyte">; 966defm SCRATCH_LOAD_LDS_USHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_ushort">; 967defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_sshort">; 968defm SCRATCH_LOAD_LDS_DWORD : FLAT_Scratch_Load_LDS_Pseudo <"scratch_load_lds_dword">; 969 970} // End SubtargetPredicate = HasFlatScratchInsts 971 972let SubtargetPredicate = isGFX10Plus, is_flat_global = 1 in { 973 defm GLOBAL_ATOMIC_FCMPSWAP : 974 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap", VGPR_32, f32, v2f32, VReg_64>; 975 defm GLOBAL_ATOMIC_FMIN : 976 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin", VGPR_32, f32>; 977 defm GLOBAL_ATOMIC_FMAX : 978 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax", VGPR_32, f32>; 979 defm GLOBAL_ATOMIC_FCMPSWAP_X2 : 980 FLAT_Global_Atomic_Pseudo<"global_atomic_fcmpswap_x2", VReg_64, f64, v2f64, VReg_128>; 981 defm GLOBAL_ATOMIC_FMIN_X2 : 982 FLAT_Global_Atomic_Pseudo<"global_atomic_fmin_x2", VReg_64, f64>; 983 defm GLOBAL_ATOMIC_FMAX_X2 : 984 FLAT_Global_Atomic_Pseudo<"global_atomic_fmax_x2", VReg_64, f64>; 985} // End SubtargetPredicate = isGFX10Plus, is_flat_global = 1 986 987let is_flat_global = 1 in { 988let OtherPredicates = [HasAtomicFaddNoRtnInsts] in 989 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_NO_RTN < 990 "global_atomic_add_f32", VGPR_32, f32 991 >; 992let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in 993 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_NO_RTN < 994 "global_atomic_pk_add_f16", VGPR_32, v2f16 995 >; 996let OtherPredicates = [HasAtomicFaddRtnInsts] in 997 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Atomic_Pseudo_RTN < 998 "global_atomic_add_f32", VGPR_32, f32 999 >; 1000let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in 1001 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Atomic_Pseudo_RTN < 1002 "global_atomic_pk_add_f16", VGPR_32, v2f16 1003 >; 1004} // End is_flat_global = 1 1005 1006//===----------------------------------------------------------------------===// 1007// Flat Patterns 1008//===----------------------------------------------------------------------===// 1009 1010// Patterns for global loads with no offset. 1011class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1012 (vt (node (FlatOffset i64:$vaddr, i32:$offset))), 1013 (inst $vaddr, $offset) 1014>; 1015 1016class FlatLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1017 (node (FlatOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), 1018 (inst $vaddr, $offset, 0, $in) 1019>; 1020 1021class FlatSignedLoadPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1022 (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset), vt:$in), 1023 (inst $vaddr, $offset, 0, $in) 1024>; 1025 1026class GlobalLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1027 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$in)), 1028 (inst $saddr, $voffset, $offset, 0, $in) 1029>; 1030 1031class FlatLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1032 (vt (node (GlobalOffset (i64 VReg_64:$vaddr), i32:$offset))), 1033 (inst $vaddr, $offset) 1034>; 1035 1036class GlobalLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1037 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset))), 1038 (inst $saddr, $voffset, $offset, 0) 1039>; 1040 1041class GlobalStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1042 ValueType vt> : GCNPat < 1043 (node vt:$data, (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset)), 1044 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1045>; 1046 1047class GlobalAtomicSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1048 ValueType vt, ValueType data_vt = vt> : GCNPat < 1049 (vt (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), data_vt:$data)), 1050 (inst $voffset, getVregSrcForVT<data_vt>.ret:$data, $saddr, $offset) 1051>; 1052 1053class GlobalAtomicNoRtnSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1054 ValueType vt> : GCNPat < 1055 (node (GlobalSAddr (i64 SReg_64:$saddr), (i32 VGPR_32:$voffset), i32:$offset), vt:$data), 1056 (inst $voffset, getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1057>; 1058 1059class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1060 (node vt:$data, (FlatOffset i64:$vaddr, i32:$offset)), 1061 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1062>; 1063 1064class FlatStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1065 (node vt:$data, (GlobalOffset i64:$vaddr, i32:$offset)), 1066 (inst $vaddr, getVregSrcForVT<vt>.ret:$data, $offset) 1067>; 1068 1069class FlatStoreSignedAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, 1070 ValueType vt, ValueType data_vt = vt> : GCNPat < 1071 // atomic store follows atomic binop convention so the address comes 1072 // first. 1073 (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data), 1074 (inst $vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1075>; 1076 1077multiclass FlatAtomicNoRtnPat <string inst, string node, ValueType vt, 1078 ValueType data_vt = vt, bit isIntr = 0> { 1079 defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_"#vt.Size)); 1080 1081 let AddedComplexity = 1 in 1082 def : GCNPat <(vt (noRtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1083 (!cast<FLAT_Pseudo>(inst) VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1084} 1085 1086multiclass FlatAtomicRtnPat <string inst, string node, ValueType vt, 1087 ValueType data_vt = vt, bit isIntr = 0> { 1088 defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_"#vt.Size)); 1089 1090 def : GCNPat <(vt (rtnNode (FlatOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1091 (!cast<FLAT_Pseudo>(inst#"_RTN") VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset)>; 1092} 1093 1094multiclass FlatAtomicPat <string inst, string node, ValueType vt, 1095 ValueType data_vt = vt, bit isIntr = 0> : 1096 FlatAtomicRtnPat<inst, node, vt, data_vt, isIntr>, 1097 FlatAtomicNoRtnPat<inst, node, vt, data_vt, isIntr>; 1098 1099multiclass FlatAtomicIntrNoRtnPat <string inst, string node, ValueType vt, 1100 ValueType data_vt = vt> { 1101 defm : FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1102} 1103 1104multiclass FlatAtomicIntrRtnPat <string inst, string node, ValueType vt, 1105 ValueType data_vt = vt> { 1106 defm : FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1107} 1108 1109multiclass FlatAtomicIntrPat <string inst, string node, ValueType vt, 1110 ValueType data_vt = vt> : 1111 FlatAtomicRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>, 1112 FlatAtomicNoRtnPat<inst, node, vt, data_vt, /* isIntr */ 1>; 1113 1114class FlatSignedAtomicPatBase <FLAT_Pseudo inst, SDPatternOperator node, 1115 ValueType vt, ValueType data_vt = vt> : GCNPat < 1116 (vt (node (GlobalOffset i64:$vaddr, i32:$offset), data_vt:$data)), 1117 (inst VReg_64:$vaddr, getVregSrcForVT<data_vt>.ret:$data, $offset) 1118>; 1119 1120multiclass FlatSignedAtomicPat <string inst, string node, ValueType vt, 1121 ValueType data_vt = vt, int complexity = 0, 1122 bit isIntr = 0> { 1123 defvar rtnNode = !cast<SDPatternOperator>(node # !if(isIntr, "", "_" # vt.Size)); 1124 defvar noRtnNode = !cast<PatFrags>(node # "_noret" # !if(isIntr, "", "_" # vt.Size)); 1125 1126 let AddedComplexity = complexity in 1127 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1128 1129 let AddedComplexity = !add(complexity, 1) in 1130 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>; 1131} 1132 1133multiclass FlatSignedAtomicIntrPat <string inst, string node, ValueType vt, 1134 ValueType data_vt = vt> { 1135 defm : FlatSignedAtomicPat<inst, node, vt, data_vt, /* complexity */ 0, /* isIntr */ 1>; 1136} 1137 1138multiclass FlatSignedAtomicPatWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1139 ValueType vt, ValueType data_vt = vt> { 1140 defvar noRtnNode = !cast<PatFrags>(intr # "_noret_" # addrSpaceSuffix); 1141 defvar rtnNode = !cast<PatFrags>(intr # "_" # addrSpaceSuffix); 1142 1143 let AddedComplexity = 1 in 1144 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), noRtnNode, vt, data_vt>; 1145 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1146} 1147 1148class ScratchLoadSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1149 (vt (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset))), 1150 (inst $vaddr, $offset) 1151>; 1152 1153class ScratchLoadSignedPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1154 (node (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset), vt:$in), 1155 (inst $vaddr, $offset, 0, $in) 1156>; 1157 1158class ScratchStoreSignedPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1159 (node vt:$data, (ScratchOffset (i32 VGPR_32:$vaddr), i32:$offset)), 1160 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $offset) 1161>; 1162 1163class ScratchLoadSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1164 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset))), 1165 (inst $saddr, $offset) 1166>; 1167 1168class ScratchLoadSaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1169 (vt (node (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset), vt:$in)), 1170 (inst $saddr, $offset, 0, $in) 1171>; 1172 1173class ScratchStoreSaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1174 ValueType vt> : GCNPat < 1175 (node vt:$data, (ScratchSAddr (i32 SGPR_32:$saddr), i32:$offset)), 1176 (inst getVregSrcForVT<vt>.ret:$data, $saddr, $offset) 1177>; 1178 1179class ScratchLoadSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1180 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset))), 1181 (inst $vaddr, $saddr, $offset, 0) 1182>; 1183 1184class ScratchStoreSVaddrPat <FLAT_Pseudo inst, SDPatternOperator node, 1185 ValueType vt> : GCNPat < 1186 (node vt:$data, (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset)), 1187 (inst getVregSrcForVT<vt>.ret:$data, $vaddr, $saddr, $offset) 1188>; 1189 1190class ScratchLoadSVaddrPat_D16 <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : GCNPat < 1191 (vt (node (ScratchSVAddr (i32 VGPR_32:$vaddr), (i32 SGPR_32:$saddr), i32:$offset), vt:$in)), 1192 (inst $vaddr, $saddr, $offset, 0, $in) 1193>; 1194 1195let OtherPredicates = [HasFlatAddressSpace] in { 1196 1197def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i32>; 1198def : FlatLoadPat <FLAT_LOAD_UBYTE, atomic_load_8_flat, i16>; 1199def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i32>; 1200def : FlatLoadPat <FLAT_LOAD_USHORT, atomic_load_16_flat, i16>; 1201def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i32>; 1202def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i32>; 1203def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i32>; 1204def : FlatLoadPat <FLAT_LOAD_UBYTE, extloadi8_flat, i16>; 1205def : FlatLoadPat <FLAT_LOAD_UBYTE, zextloadi8_flat, i16>; 1206def : FlatLoadPat <FLAT_LOAD_SBYTE, sextloadi8_flat, i16>; 1207def : FlatLoadPat <FLAT_LOAD_USHORT, extloadi16_flat, i32>; 1208def : FlatLoadPat <FLAT_LOAD_USHORT, zextloadi16_flat, i32>; 1209def : FlatLoadPat <FLAT_LOAD_USHORT, load_flat, i16>; 1210def : FlatLoadPat <FLAT_LOAD_SSHORT, sextloadi16_flat, i32>; 1211def : FlatLoadPat <FLAT_LOAD_DWORDX3, load_flat, v3i32>; 1212 1213def : FlatLoadPat <FLAT_LOAD_DWORD, atomic_load_32_flat, i32>; 1214def : FlatLoadPat <FLAT_LOAD_DWORDX2, atomic_load_64_flat, i64>; 1215 1216def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i32>; 1217def : FlatStorePat <FLAT_STORE_SHORT, truncstorei16_flat, i32>; 1218 1219foreach vt = Reg32Types.types in { 1220def : FlatLoadPat <FLAT_LOAD_DWORD, load_flat, vt>; 1221def : FlatStorePat <FLAT_STORE_DWORD, store_flat, vt>; 1222} 1223 1224foreach vt = VReg_64.RegTypes in { 1225def : FlatStorePat <FLAT_STORE_DWORDX2, store_flat, vt>; 1226def : FlatLoadPat <FLAT_LOAD_DWORDX2, load_flat, vt>; 1227} 1228 1229def : FlatStorePat <FLAT_STORE_DWORDX3, store_flat, v3i32>; 1230 1231foreach vt = VReg_128.RegTypes in { 1232def : FlatLoadPat <FLAT_LOAD_DWORDX4, load_flat, vt>; 1233def : FlatStorePat <FLAT_STORE_DWORDX4, store_flat, vt>; 1234} 1235 1236def : FlatStorePat <FLAT_STORE_DWORD, atomic_store_32_flat, i32>; 1237def : FlatStorePat <FLAT_STORE_DWORDX2, atomic_store_64_flat, i64>; 1238def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i32>; 1239def : FlatStorePat <FLAT_STORE_BYTE, atomic_store_8_flat, i16>; 1240def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i32>; 1241def : FlatStorePat <FLAT_STORE_SHORT, atomic_store_16_flat, i16>; 1242 1243foreach as = [ "flat", "global" ] in { 1244defm : FlatAtomicPat <"FLAT_ATOMIC_ADD", "atomic_load_add_"#as, i32>; 1245defm : FlatAtomicPat <"FLAT_ATOMIC_SUB", "atomic_load_sub_"#as, i32>; 1246defm : FlatAtomicPat <"FLAT_ATOMIC_INC", "atomic_load_uinc_wrap_"#as, i32>; 1247defm : FlatAtomicPat <"FLAT_ATOMIC_DEC", "atomic_load_udec_wrap_"#as, i32>; 1248defm : FlatAtomicPat <"FLAT_ATOMIC_AND", "atomic_load_and_"#as, i32>; 1249defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX", "atomic_load_max_"#as, i32>; 1250defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX", "atomic_load_umax_"#as, i32>; 1251defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN", "atomic_load_min_"#as, i32>; 1252defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN", "atomic_load_umin_"#as, i32>; 1253defm : FlatAtomicPat <"FLAT_ATOMIC_OR", "atomic_load_or_"#as, i32>; 1254defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP", "atomic_swap_"#as, i32>; 1255defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_"#as, i32, v2i32>; 1256defm : FlatAtomicPat <"FLAT_ATOMIC_XOR", "atomic_load_xor_"#as, i32>; 1257 1258defm : FlatAtomicPat <"FLAT_ATOMIC_ADD_X2", "atomic_load_add_"#as, i64>; 1259defm : FlatAtomicPat <"FLAT_ATOMIC_SUB_X2", "atomic_load_sub_"#as, i64>; 1260defm : FlatAtomicPat <"FLAT_ATOMIC_INC_X2", "atomic_load_uinc_wrap_"#as, i64>; 1261defm : FlatAtomicPat <"FLAT_ATOMIC_DEC_X2", "atomic_load_udec_wrap_"#as, i64>; 1262defm : FlatAtomicPat <"FLAT_ATOMIC_AND_X2", "atomic_load_and_"#as, i64>; 1263defm : FlatAtomicPat <"FLAT_ATOMIC_SMAX_X2", "atomic_load_max_"#as, i64>; 1264defm : FlatAtomicPat <"FLAT_ATOMIC_UMAX_X2", "atomic_load_umax_"#as, i64>; 1265defm : FlatAtomicPat <"FLAT_ATOMIC_SMIN_X2", "atomic_load_min_"#as, i64>; 1266defm : FlatAtomicPat <"FLAT_ATOMIC_UMIN_X2", "atomic_load_umin_"#as, i64>; 1267defm : FlatAtomicPat <"FLAT_ATOMIC_OR_X2", "atomic_load_or_"#as, i64>; 1268defm : FlatAtomicPat <"FLAT_ATOMIC_SWAP_X2", "atomic_swap_"#as, i64>; 1269defm : FlatAtomicPat <"FLAT_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_"#as, i64, v2i64>; 1270defm : FlatAtomicPat <"FLAT_ATOMIC_XOR_X2", "atomic_load_xor_"#as, i64>; 1271} // end foreach as 1272 1273def : FlatStorePat <FLAT_STORE_BYTE, truncstorei8_flat, i16>; 1274def : FlatStorePat <FLAT_STORE_SHORT, store_flat, i16>; 1275 1276let OtherPredicates = [HasD16LoadStore] in { 1277def : FlatStorePat <FLAT_STORE_SHORT_D16_HI, truncstorei16_hi16_flat, i32>; 1278def : FlatStorePat <FLAT_STORE_BYTE_D16_HI, truncstorei8_hi16_flat, i32>; 1279} 1280 1281let OtherPredicates = [D16PreservesUnusedBits] in { 1282def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2i16>; 1283def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_flat, v2f16>; 1284def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2i16>; 1285def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_flat, v2f16>; 1286def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2i16>; 1287def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16_HI, load_d16_hi_flat, v2f16>; 1288 1289def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2i16>; 1290def : FlatLoadPat_D16 <FLAT_LOAD_UBYTE_D16, az_extloadi8_d16_lo_flat, v2f16>; 1291def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2i16>; 1292def : FlatLoadPat_D16 <FLAT_LOAD_SBYTE_D16, sextloadi8_d16_lo_flat, v2f16>; 1293def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2i16>; 1294def : FlatLoadPat_D16 <FLAT_LOAD_SHORT_D16, load_d16_lo_flat, v2f16>; 1295} 1296 1297} // End OtherPredicates = [HasFlatAddressSpace] 1298 1299 1300multiclass GlobalFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1301 def : FlatLoadSignedPat <inst, node, vt> { 1302 let AddedComplexity = 10; 1303 } 1304 1305 def : GlobalLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1306 let AddedComplexity = 11; 1307 } 1308} 1309 1310multiclass GlobalFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1311 def : FlatSignedLoadPat_D16 <inst, node, vt> { 1312 let AddedComplexity = 10; 1313 } 1314 1315 def : GlobalLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1316 let AddedComplexity = 11; 1317 } 1318} 1319 1320multiclass GlobalFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1321 ValueType vt> { 1322 def : FlatStoreSignedPat <inst, node, vt> { 1323 let AddedComplexity = 10; 1324 } 1325 1326 def : GlobalStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1327 let AddedComplexity = 11; 1328 } 1329} 1330 1331multiclass GlobalFLATAtomicPatsNoRtnBase<string inst, string node, ValueType vt, 1332 ValueType data_vt = vt> { 1333 let AddedComplexity = 11 in 1334 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst), !cast<SDPatternOperator>(node), vt, data_vt>; 1335 1336 let AddedComplexity = 13 in 1337 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR"), !cast<SDPatternOperator>(node), vt, data_vt>; 1338} 1339 1340multiclass GlobalFLATAtomicPatsRtnBase<string inst, string node, ValueType vt, 1341 ValueType data_vt = vt, bit isPatFrags = 0> { 1342 defvar rtnNode = !if(isPatFrags, !cast<PatFrags>(node), !cast<SDPatternOperator>(node)); 1343 1344 let AddedComplexity = 10 in 1345 def : FlatSignedAtomicPatBase<!cast<FLAT_Pseudo>(inst#"_RTN"), rtnNode, vt, data_vt>; 1346 1347 let AddedComplexity = 12 in 1348 def : GlobalAtomicSaddrPat<!cast<FLAT_Pseudo>(inst#"_SADDR_RTN"), rtnNode, vt, data_vt>; 1349} 1350 1351multiclass GlobalFLATAtomicPatsNoRtn<string inst, string node, ValueType vt, 1352 ValueType data_vt = vt, bit isIntr = 0> : 1353 GlobalFLATAtomicPatsNoRtnBase<inst, node # "_noret" # !if(isIntr, "", "_" # vt.Size), vt, data_vt>; 1354 1355multiclass GlobalFLATAtomicPatsRtn<string inst, string node, ValueType vt, 1356 ValueType data_vt = vt, bit isIntr = 0> : 1357 GlobalFLATAtomicPatsRtnBase<inst, node # !if(isIntr, "", "_" # vt.Size), vt, data_vt>; 1358 1359multiclass GlobalFLATAtomicPats<string inst, string node, ValueType vt, 1360 ValueType data_vt = vt, bit isIntr = 0> : 1361 GlobalFLATAtomicPatsNoRtn<inst, node, vt, data_vt, isIntr>, 1362 GlobalFLATAtomicPatsRtn<inst, node, vt, data_vt, isIntr>; 1363 1364multiclass GlobalFLATAtomicPatsNoRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1365 ValueType vt, ValueType data_vt = vt> : 1366 GlobalFLATAtomicPatsNoRtnBase<inst, intr # "_noret_" # addrSpaceSuffix, vt, data_vt>; 1367 1368multiclass GlobalFLATAtomicPatsRtnWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1369 ValueType vt, ValueType data_vt = vt> : 1370 GlobalFLATAtomicPatsRtnBase<inst, intr # "_" # addrSpaceSuffix, vt, data_vt, /*isPatFrags*/ 1>; 1371 1372multiclass GlobalFLATAtomicPatsWithAddrSpace<string inst, string intr, string addrSpaceSuffix, 1373 ValueType vt, ValueType data_vt = vt> : 1374 GlobalFLATAtomicPatsNoRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>, 1375 GlobalFLATAtomicPatsRtnWithAddrSpace<inst, intr, addrSpaceSuffix, vt, data_vt>; 1376 1377multiclass GlobalFLATAtomicIntrPats<string inst, string node, ValueType vt, 1378 ValueType data_vt = vt> { 1379 defm : GlobalFLATAtomicPats<inst, node, vt, data_vt, /* isIntr */ 1>; 1380} 1381 1382multiclass ScratchFLATLoadPats<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1383 def : ScratchLoadSignedPat <inst, node, vt> { 1384 let AddedComplexity = 25; 1385 } 1386 1387 def : ScratchLoadSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1388 let AddedComplexity = 26; 1389 } 1390 1391 def : ScratchLoadSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1392 let SubtargetPredicate = HasFlatScratchSVSMode; 1393 let AddedComplexity = 27; 1394 } 1395} 1396 1397multiclass ScratchFLATStorePats<FLAT_Pseudo inst, SDPatternOperator node, 1398 ValueType vt> { 1399 def : ScratchStoreSignedPat <inst, node, vt> { 1400 let AddedComplexity = 25; 1401 } 1402 1403 def : ScratchStoreSaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1404 let AddedComplexity = 26; 1405 } 1406 1407 def : ScratchStoreSVaddrPat<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1408 let SubtargetPredicate = HasFlatScratchSVSMode; 1409 let AddedComplexity = 27; 1410 } 1411} 1412 1413multiclass ScratchFLATLoadPats_D16<FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> { 1414 def : ScratchLoadSignedPat_D16 <inst, node, vt> { 1415 let AddedComplexity = 25; 1416 } 1417 1418 def : ScratchLoadSaddrPat_D16<!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SADDR"), node, vt> { 1419 let AddedComplexity = 26; 1420 } 1421 1422 def : ScratchLoadSVaddrPat_D16 <!cast<FLAT_Pseudo>(!cast<string>(inst)#"_SVS"), node, vt> { 1423 let SubtargetPredicate = HasFlatScratchSVSMode; 1424 let AddedComplexity = 27; 1425 } 1426} 1427 1428let OtherPredicates = [HasFlatGlobalInsts] in { 1429 1430defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i32>; 1431defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, atomic_load_8_global, i16>; 1432defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i32>; 1433defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, atomic_load_16_global, i16>; 1434defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i32>; 1435defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i32>; 1436defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i32>; 1437defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, extloadi8_global, i16>; 1438defm : GlobalFLATLoadPats <GLOBAL_LOAD_UBYTE, zextloadi8_global, i16>; 1439defm : GlobalFLATLoadPats <GLOBAL_LOAD_SBYTE, sextloadi8_global, i16>; 1440defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, extloadi16_global, i32>; 1441defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, zextloadi16_global, i32>; 1442defm : GlobalFLATLoadPats <GLOBAL_LOAD_SSHORT, sextloadi16_global, i32>; 1443defm : GlobalFLATLoadPats <GLOBAL_LOAD_USHORT, load_global, i16>; 1444 1445foreach vt = Reg32Types.types in { 1446defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, load_global, vt>; 1447defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, store_global, vt>; 1448} 1449 1450foreach vt = VReg_64.RegTypes in { 1451defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, load_global, vt>; 1452defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, store_global, vt>; 1453} 1454 1455defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX3, load_global, v3i32>; 1456 1457foreach vt = VReg_128.RegTypes in { 1458defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX4, load_global, vt>; 1459defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX4, store_global, vt>; 1460} 1461 1462// There is no distinction for atomic load lowering during selection; 1463// the memory legalizer will set the cache bits and insert the 1464// appropriate waits. 1465defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORD, atomic_load_32_global, i32>; 1466defm : GlobalFLATLoadPats <GLOBAL_LOAD_DWORDX2, atomic_load_64_global, i64>; 1467 1468defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i32>; 1469defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, truncstorei8_global, i16>; 1470defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, truncstorei16_global, i32>; 1471defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, store_global, i16>; 1472defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX3, store_global, v3i32>; 1473 1474let OtherPredicates = [HasD16LoadStore] in { 1475defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT_D16_HI, truncstorei16_hi16_global, i32>; 1476defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE_D16_HI, truncstorei8_hi16_global, i32>; 1477} 1478 1479let OtherPredicates = [D16PreservesUnusedBits] in { 1480defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2i16>; 1481defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_global, v2f16>; 1482defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2i16>; 1483defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_global, v2f16>; 1484defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2i16>; 1485defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16_HI, load_d16_hi_global, v2f16>; 1486 1487defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2i16>; 1488defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_UBYTE_D16, az_extloadi8_d16_lo_global, v2f16>; 1489defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2i16>; 1490defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SBYTE_D16, sextloadi8_d16_lo_global, v2f16>; 1491defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2i16>; 1492defm : GlobalFLATLoadPats_D16 <GLOBAL_LOAD_SHORT_D16, load_d16_lo_global, v2f16>; 1493} 1494 1495defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i32>; 1496defm : GlobalFLATStorePats <GLOBAL_STORE_BYTE, atomic_store_8_global, i16>; 1497defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i32>; 1498defm : GlobalFLATStorePats <GLOBAL_STORE_SHORT, atomic_store_16_global, i16>; 1499defm : GlobalFLATStorePats <GLOBAL_STORE_DWORD, atomic_store_32_global, i32>; 1500defm : GlobalFLATStorePats <GLOBAL_STORE_DWORDX2, atomic_store_64_global, i64>; 1501 1502defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD", "atomic_load_add_global", i32>; 1503defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB", "atomic_load_sub_global", i32>; 1504defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC", "atomic_load_uinc_wrap_global", i32>; 1505defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC", "atomic_load_udec_wrap_global", i32>; 1506defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND", "atomic_load_and_global", i32>; 1507defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX", "atomic_load_max_global", i32>; 1508defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX", "atomic_load_umax_global", i32>; 1509defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN", "atomic_load_min_global", i32>; 1510defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN", "atomic_load_umin_global", i32>; 1511defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR", "atomic_load_or_global", i32>; 1512defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP", "atomic_swap_global", i32>; 1513defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP", "AMDGPUatomic_cmp_swap_global", i32, v2i32>; 1514defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR", "atomic_load_xor_global", i32>; 1515defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1516 1517let OtherPredicates = [HasAtomicCSubNoRtnInsts] in 1518defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_CSUB", "int_amdgcn_global_atomic_csub", i32, i32, /* isIntr */ 1>; 1519 1520defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_X2", "atomic_load_add_global", i64>; 1521defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SUB_X2", "atomic_load_sub_global", i64>; 1522defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_INC_X2", "atomic_load_uinc_wrap_global", i64>; 1523defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_DEC_X2", "atomic_load_udec_wrap_global", i64>; 1524defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_AND_X2", "atomic_load_and_global", i64>; 1525defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMAX_X2", "atomic_load_max_global", i64>; 1526defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMAX_X2", "atomic_load_umax_global", i64>; 1527defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SMIN_X2", "atomic_load_min_global", i64>; 1528defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_UMIN_X2", "atomic_load_umin_global", i64>; 1529defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_OR_X2", "atomic_load_or_global", i64>; 1530defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_SWAP_X2", "atomic_swap_global", i64>; 1531defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_CMPSWAP_X2", "AMDGPUatomic_cmp_swap_global", i64, v2i64>; 1532defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_XOR_X2", "atomic_load_xor_global", i64>; 1533 1534let OtherPredicates = [isGFX12Plus] in { 1535 defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ORDERED_ADD_B64", "int_amdgcn_global_atomic_ordered_add_b64", i64, i64, /* isIntr */ 1>; 1536} 1537 1538let OtherPredicates = [isGFX10Plus] in { 1539defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN", "atomic_load_fmin_global", f32>; 1540defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX", "atomic_load_fmax_global", f32>; 1541defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN", "atomic_load_fmin_flat", f32>; 1542defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX", "atomic_load_fmax_flat", f32>; 1543} 1544 1545let OtherPredicates = [isGFX10GFX11] in { 1546defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin", f32>; 1547defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax", f32>; 1548 1549defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin", f32>; 1550defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax", f32>; 1551} 1552 1553let OtherPredicates = [isGFX10Only] in { 1554defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMIN_X2", "atomic_load_fmin_global", f64>; 1555defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_FMAX_X2", "atomic_load_fmax_global", f64>; 1556defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN_X2", "int_amdgcn_global_atomic_fmin", f64>; 1557defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX_X2", "int_amdgcn_global_atomic_fmax", f64>; 1558defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMIN_X2", "atomic_load_fmin_flat", f64>; 1559defm : FlatSignedAtomicPat <"FLAT_ATOMIC_FMAX_X2", "atomic_load_fmax_flat", f64>; 1560defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN_X2", "int_amdgcn_flat_atomic_fmin", f64>; 1561defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX_X2", "int_amdgcn_flat_atomic_fmax", f64>; 1562} 1563 1564let OtherPredicates = [isGFX12Only] in { 1565 defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMIN", "int_amdgcn_global_atomic_fmin_num", f32>; 1566 defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_FMAX", "int_amdgcn_global_atomic_fmax_num", f32>; 1567 defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMIN", "int_amdgcn_flat_atomic_fmin_num", f32>; 1568 defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_FMAX", "int_amdgcn_flat_atomic_fmax_num", f32>; 1569} 1570 1571let OtherPredicates = [HasAtomicFaddNoRtnInsts] in { 1572defm : GlobalFLATAtomicPatsNoRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1573defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>; 1574defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; 1575} 1576 1577let OtherPredicates = [HasAtomicBufferGlobalPkAddF16NoRtnInsts] in { 1578defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; 1579defm : GlobalFLATAtomicPatsNoRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; 1580} 1581 1582let OtherPredicates = [HasAtomicFaddRtnInsts] in { 1583defm : GlobalFLATAtomicPatsRtn <"GLOBAL_ATOMIC_ADD_F32", "atomic_load_fadd_global", f32>; 1584defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f32>; 1585defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_ADD_F32", "int_amdgcn_global_atomic_fadd", "global_addrspace", f32>; 1586} 1587 1588let OtherPredicates = [HasAtomicBufferGlobalPkAddF16Insts] in { 1589defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "global_addrspace", v2f16>; 1590defm : GlobalFLATAtomicPatsRtnWithAddrSpace <"GLOBAL_ATOMIC_PK_ADD_F16", "int_amdgcn_global_atomic_fadd", "global_addrspace", v2f16>; 1591} 1592 1593let OtherPredicates = [isGFX90APlus] in { 1594defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_ADD_F64", "atomic_load_fadd_global", f64>; 1595defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MIN_F64", "atomic_load_fmin_global", f64>; 1596defm : GlobalFLATAtomicPats <"GLOBAL_ATOMIC_MAX_F64", "atomic_load_fmax_global", f64>; 1597defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "global_addrspace", f64>; 1598defm : GlobalFLATAtomicPatsWithAddrSpace<"GLOBAL_ATOMIC_ADD_F64", "int_amdgcn_global_atomic_fadd", "global_addrspace", f64>; 1599defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MIN_F64", "int_amdgcn_global_atomic_fmin", f64>; 1600defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_MAX_F64", "int_amdgcn_global_atomic_fmax", f64>; 1601defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F64", "atomic_load_fadd_flat", f64>; 1602defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MIN_F64", "atomic_load_fmin_flat", f64>; 1603defm : FlatSignedAtomicPat <"FLAT_ATOMIC_MAX_F64", "atomic_load_fmax_flat", f64>; 1604defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F64", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f64>; 1605defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MIN_F64", "int_amdgcn_flat_atomic_fmin", f64>; 1606defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_MAX_F64", "int_amdgcn_flat_atomic_fmax", f64>; 1607} 1608 1609let OtherPredicates = [HasFlatAtomicFaddF32Inst] in { 1610defm : FlatSignedAtomicPat <"FLAT_ATOMIC_ADD_F32", "atomic_load_fadd_flat", f32>; 1611defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_ADD_F32", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", f32>; 1612} 1613 1614let OtherPredicates = [HasAtomicFlatPkAdd16Insts] in { 1615defm : FlatSignedAtomicPatWithAddrSpace <"FLAT_ATOMIC_PK_ADD_F16", "int_amdgcn_flat_atomic_fadd", "flat_addrspace", v2f16>; 1616defm : FlatSignedAtomicIntrPat <"FLAT_ATOMIC_PK_ADD_BF16", "int_amdgcn_flat_atomic_fadd_v2bf16", v2i16>; 1617} 1618 1619let OtherPredicates = [HasAtomicGlobalPkAddBF16Inst] in 1620defm : GlobalFLATAtomicIntrPats <"GLOBAL_ATOMIC_PK_ADD_BF16", "int_amdgcn_global_atomic_fadd_v2bf16", v2i16>; 1621 1622} // End OtherPredicates = [HasFlatGlobalInsts], AddedComplexity = 10 1623 1624let OtherPredicates = [HasFlatScratchInsts, EnableFlatScratch] in { 1625 1626defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i32>; 1627defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i32>; 1628defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i32>; 1629defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, extloadi8_private, i16>; 1630defm : ScratchFLATLoadPats <SCRATCH_LOAD_UBYTE, zextloadi8_private, i16>; 1631defm : ScratchFLATLoadPats <SCRATCH_LOAD_SBYTE, sextloadi8_private, i16>; 1632defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, extloadi16_private, i32>; 1633defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, zextloadi16_private, i32>; 1634defm : ScratchFLATLoadPats <SCRATCH_LOAD_SSHORT, sextloadi16_private, i32>; 1635defm : ScratchFLATLoadPats <SCRATCH_LOAD_USHORT, load_private, i16>; 1636 1637foreach vt = Reg32Types.types in { 1638defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORD, load_private, vt>; 1639defm : ScratchFLATStorePats <SCRATCH_STORE_DWORD, store_private, vt>; 1640} 1641 1642foreach vt = VReg_64.RegTypes in { 1643defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX2, load_private, vt>; 1644defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX2, store_private, vt>; 1645} 1646 1647defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX3, load_private, v3i32>; 1648 1649foreach vt = VReg_128.RegTypes in { 1650defm : ScratchFLATLoadPats <SCRATCH_LOAD_DWORDX4, load_private, vt>; 1651defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX4, store_private, vt>; 1652} 1653 1654defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i32>; 1655defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE, truncstorei8_private, i16>; 1656defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, truncstorei16_private, i32>; 1657defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT, store_private, i16>; 1658defm : ScratchFLATStorePats <SCRATCH_STORE_DWORDX3, store_private, v3i32>; 1659 1660let OtherPredicates = [HasD16LoadStore, HasFlatScratchInsts, EnableFlatScratch] in { 1661defm : ScratchFLATStorePats <SCRATCH_STORE_SHORT_D16_HI, truncstorei16_hi16_private, i32>; 1662defm : ScratchFLATStorePats <SCRATCH_STORE_BYTE_D16_HI, truncstorei8_hi16_private, i32>; 1663} 1664 1665let OtherPredicates = [D16PreservesUnusedBits, HasFlatScratchInsts, EnableFlatScratch] in { 1666defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2i16>; 1667defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16_HI, az_extloadi8_d16_hi_private, v2f16>; 1668defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2i16>; 1669defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16_HI, sextloadi8_d16_hi_private, v2f16>; 1670defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2i16>; 1671defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16_HI, load_d16_hi_private, v2f16>; 1672 1673defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2i16>; 1674defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_UBYTE_D16, az_extloadi8_d16_lo_private, v2f16>; 1675defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2i16>; 1676defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SBYTE_D16, sextloadi8_d16_lo_private, v2f16>; 1677defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2i16>; 1678defm : ScratchFLATLoadPats_D16 <SCRATCH_LOAD_SHORT_D16, load_d16_lo_private, v2f16>; 1679} 1680 1681} // End OtherPredicates = [HasFlatScratchInsts,EnableFlatScratch] 1682 1683//===----------------------------------------------------------------------===// 1684// Target 1685//===----------------------------------------------------------------------===// 1686 1687//===----------------------------------------------------------------------===// 1688// CI 1689//===----------------------------------------------------------------------===// 1690 1691class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> : 1692 FLAT_Real <op, ps>, 1693 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> { 1694 let AssemblerPredicate = isGFX7Only; 1695 let DecoderNamespace="GFX7"; 1696} 1697 1698def FLAT_LOAD_UBYTE_ci : FLAT_Real_ci <0x8, FLAT_LOAD_UBYTE>; 1699def FLAT_LOAD_SBYTE_ci : FLAT_Real_ci <0x9, FLAT_LOAD_SBYTE>; 1700def FLAT_LOAD_USHORT_ci : FLAT_Real_ci <0xa, FLAT_LOAD_USHORT>; 1701def FLAT_LOAD_SSHORT_ci : FLAT_Real_ci <0xb, FLAT_LOAD_SSHORT>; 1702def FLAT_LOAD_DWORD_ci : FLAT_Real_ci <0xc, FLAT_LOAD_DWORD>; 1703def FLAT_LOAD_DWORDX2_ci : FLAT_Real_ci <0xd, FLAT_LOAD_DWORDX2>; 1704def FLAT_LOAD_DWORDX4_ci : FLAT_Real_ci <0xe, FLAT_LOAD_DWORDX4>; 1705def FLAT_LOAD_DWORDX3_ci : FLAT_Real_ci <0xf, FLAT_LOAD_DWORDX3>; 1706 1707def FLAT_STORE_BYTE_ci : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>; 1708def FLAT_STORE_SHORT_ci : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>; 1709def FLAT_STORE_DWORD_ci : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>; 1710def FLAT_STORE_DWORDX2_ci : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>; 1711def FLAT_STORE_DWORDX4_ci : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>; 1712def FLAT_STORE_DWORDX3_ci : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>; 1713 1714multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> { 1715 def _ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 1716 def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 1717} 1718 1719defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>; 1720defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>; 1721defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>; 1722defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>; 1723defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>; 1724defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>; 1725defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>; 1726defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>; 1727defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>; 1728defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>; 1729defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>; 1730defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>; 1731defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>; 1732defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>; 1733defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>; 1734defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>; 1735defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>; 1736defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>; 1737defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>; 1738defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>; 1739defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>; 1740defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>; 1741defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>; 1742defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>; 1743defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>; 1744defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>; 1745 1746// CI Only flat instructions 1747defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>; 1748defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>; 1749defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>; 1750defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>; 1751defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>; 1752defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>; 1753 1754 1755//===----------------------------------------------------------------------===// 1756// VI 1757//===----------------------------------------------------------------------===// 1758 1759class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps, bit has_sccb = ps.has_sccb> : 1760 FLAT_Real <op, ps>, 1761 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> { 1762 let AssemblerPredicate = isGFX8GFX9; 1763 let DecoderNamespace = "GFX8"; 1764 1765 let Inst{25} = !if(has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1766 let AsmString = ps.Mnemonic # 1767 !subst("$sccb", !if(has_sccb, "$sccb",""), ps.AsmOperands); 1768} 1769 1770multiclass FLAT_Real_AllAddr_vi<bits<7> op, 1771 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1772 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME), has_sccb>; 1773 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>; 1774} 1775 1776class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> : 1777 FLAT_Real <op, ps>, 1778 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> { 1779 let AssemblerPredicate = isGFX940Plus; 1780 let DecoderNamespace = "GFX9"; 1781 let Inst{13} = ps.sve; 1782 let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue); 1783} 1784 1785multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> { 1786 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> { 1787 let AssemblerPredicate = isGFX8GFX9NotGFX940; 1788 let OtherPredicates = [isGFX8GFX9NotGFX940]; 1789 } 1790 def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> { 1791 let DecoderNamespace = "GFX9"; 1792 } 1793 let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in { 1794 def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1795 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1796 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1797 } 1798} 1799 1800multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op, 1801 string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr), 1802 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1803 1804 let OtherPredicates = [isGFX8GFX9NotGFX940] in { 1805 def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> { 1806 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds"; 1807 } 1808 def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> { 1809 let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds"; 1810 } 1811 } 1812 1813 let SubtargetPredicate = isGFX940Plus in { 1814 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1815 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1816 } 1817} 1818 1819multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> { 1820 defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>; 1821 let SubtargetPredicate = isGFX940Plus in { 1822 def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>; 1823 def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>; 1824 } 1825} 1826 1827def FLAT_LOAD_UBYTE_vi : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>; 1828def FLAT_LOAD_SBYTE_vi : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>; 1829def FLAT_LOAD_USHORT_vi : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>; 1830def FLAT_LOAD_SSHORT_vi : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>; 1831def FLAT_LOAD_DWORD_vi : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>; 1832def FLAT_LOAD_DWORDX2_vi : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>; 1833def FLAT_LOAD_DWORDX4_vi : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>; 1834def FLAT_LOAD_DWORDX3_vi : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>; 1835 1836def FLAT_STORE_BYTE_vi : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>; 1837def FLAT_STORE_BYTE_D16_HI_vi : FLAT_Real_vi <0x19, FLAT_STORE_BYTE_D16_HI>; 1838def FLAT_STORE_SHORT_vi : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>; 1839def FLAT_STORE_SHORT_D16_HI_vi : FLAT_Real_vi <0x1b, FLAT_STORE_SHORT_D16_HI>; 1840def FLAT_STORE_DWORD_vi : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>; 1841def FLAT_STORE_DWORDX2_vi : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>; 1842def FLAT_STORE_DWORDX4_vi : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>; 1843def FLAT_STORE_DWORDX3_vi : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>; 1844 1845def FLAT_LOAD_UBYTE_D16_vi : FLAT_Real_vi <0x20, FLAT_LOAD_UBYTE_D16>; 1846def FLAT_LOAD_UBYTE_D16_HI_vi : FLAT_Real_vi <0x21, FLAT_LOAD_UBYTE_D16_HI>; 1847def FLAT_LOAD_SBYTE_D16_vi : FLAT_Real_vi <0x22, FLAT_LOAD_SBYTE_D16>; 1848def FLAT_LOAD_SBYTE_D16_HI_vi : FLAT_Real_vi <0x23, FLAT_LOAD_SBYTE_D16_HI>; 1849def FLAT_LOAD_SHORT_D16_vi : FLAT_Real_vi <0x24, FLAT_LOAD_SHORT_D16>; 1850def FLAT_LOAD_SHORT_D16_HI_vi : FLAT_Real_vi <0x25, FLAT_LOAD_SHORT_D16_HI>; 1851 1852multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps, 1853 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> { 1854 def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr), has_sccb>; 1855 def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN"), has_sccb>; 1856} 1857 1858multiclass FLAT_Global_Real_Atomics_vi<bits<7> op, 1859 bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> : 1860 FLAT_Real_AllAddr_vi<op, has_sccb> { 1861 def _RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_RTN"), has_sccb>; 1862 def _SADDR_RTN_vi : FLAT_Real_vi <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN"), has_sccb>; 1863} 1864 1865 1866defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>; 1867defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>; 1868defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>; 1869defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>; 1870defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>; 1871defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>; 1872defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>; 1873defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>; 1874defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>; 1875defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>; 1876defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>; 1877defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>; 1878defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>; 1879defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>; 1880defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>; 1881defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>; 1882defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>; 1883defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>; 1884defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>; 1885defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>; 1886defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>; 1887defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>; 1888defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>; 1889defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>; 1890defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>; 1891defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>; 1892 1893defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_vi <0x10>; 1894defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_vi <0x11>; 1895defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_vi <0x12>; 1896defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_vi <0x13>; 1897defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_vi <0x14>; 1898defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_vi <0x15>; 1899defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_vi <0x16>; 1900defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_vi <0x17>; 1901 1902defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_vi <0x20>; 1903defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x21>; 1904defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_vi <0x22>; 1905defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_vi <0x23>; 1906defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_vi <0x24>; 1907defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x25>; 1908 1909defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_vi <0x18>; 1910defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_vi <0x19>; 1911defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_vi <0x1a>; 1912defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_vi <0x1b>; 1913defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_vi <0x1c>; 1914defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_vi <0x1d>; 1915defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_vi <0x1e>; 1916defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_vi <0x1f>; 1917 1918defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS <0x026, 0x10>; 1919defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS <0x027, 0x11>; 1920defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS <0x028, 0x12>; 1921defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS <0x029, 0x13>; 1922defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS <0x02a, 0x14>; 1923 1924defm GLOBAL_ATOMIC_SWAP : FLAT_Global_Real_Atomics_vi <0x40>; 1925defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Global_Real_Atomics_vi <0x41>; 1926defm GLOBAL_ATOMIC_ADD : FLAT_Global_Real_Atomics_vi <0x42>; 1927defm GLOBAL_ATOMIC_SUB : FLAT_Global_Real_Atomics_vi <0x43>; 1928defm GLOBAL_ATOMIC_SMIN : FLAT_Global_Real_Atomics_vi <0x44>; 1929defm GLOBAL_ATOMIC_UMIN : FLAT_Global_Real_Atomics_vi <0x45>; 1930defm GLOBAL_ATOMIC_SMAX : FLAT_Global_Real_Atomics_vi <0x46>; 1931defm GLOBAL_ATOMIC_UMAX : FLAT_Global_Real_Atomics_vi <0x47>; 1932defm GLOBAL_ATOMIC_AND : FLAT_Global_Real_Atomics_vi <0x48>; 1933defm GLOBAL_ATOMIC_OR : FLAT_Global_Real_Atomics_vi <0x49>; 1934defm GLOBAL_ATOMIC_XOR : FLAT_Global_Real_Atomics_vi <0x4a>; 1935defm GLOBAL_ATOMIC_INC : FLAT_Global_Real_Atomics_vi <0x4b>; 1936defm GLOBAL_ATOMIC_DEC : FLAT_Global_Real_Atomics_vi <0x4c>; 1937defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Global_Real_Atomics_vi <0x60>; 1938defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Global_Real_Atomics_vi <0x61>; 1939defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Global_Real_Atomics_vi <0x62>; 1940defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Global_Real_Atomics_vi <0x63>; 1941defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Global_Real_Atomics_vi <0x64>; 1942defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Global_Real_Atomics_vi <0x65>; 1943defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Global_Real_Atomics_vi <0x66>; 1944defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Global_Real_Atomics_vi <0x67>; 1945defm GLOBAL_ATOMIC_AND_X2 : FLAT_Global_Real_Atomics_vi <0x68>; 1946defm GLOBAL_ATOMIC_OR_X2 : FLAT_Global_Real_Atomics_vi <0x69>; 1947defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Global_Real_Atomics_vi <0x6a>; 1948defm GLOBAL_ATOMIC_INC_X2 : FLAT_Global_Real_Atomics_vi <0x6b>; 1949defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Global_Real_Atomics_vi <0x6c>; 1950 1951defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_SVE_LDS <0x026, 0x10>; 1952defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_SVE_LDS <0x027, 0x11>; 1953defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_AllAddr_SVE_LDS <0x028, 0x12>; 1954defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_SVE_LDS <0x029, 0x13>; 1955defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_AllAddr_SVE_LDS <0x02a, 0x14>; 1956 1957defm SCRATCH_LOAD_UBYTE : FLAT_Real_AllAddr_SVE_vi <0x10>; 1958defm SCRATCH_LOAD_SBYTE : FLAT_Real_AllAddr_SVE_vi <0x11>; 1959defm SCRATCH_LOAD_USHORT : FLAT_Real_AllAddr_SVE_vi <0x12>; 1960defm SCRATCH_LOAD_SSHORT : FLAT_Real_AllAddr_SVE_vi <0x13>; 1961defm SCRATCH_LOAD_DWORD : FLAT_Real_AllAddr_SVE_vi <0x14>; 1962defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x15>; 1963defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x16>; 1964defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x17>; 1965defm SCRATCH_STORE_BYTE : FLAT_Real_AllAddr_SVE_vi <0x18>; 1966defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x19>; 1967defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x20>; 1968defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x21>; 1969defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_SVE_vi <0x22>; 1970defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x23>; 1971defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_AllAddr_SVE_vi <0x24>; 1972defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x25>; 1973defm SCRATCH_STORE_SHORT : FLAT_Real_AllAddr_SVE_vi <0x1a>; 1974defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_SVE_vi <0x1b>; 1975defm SCRATCH_STORE_DWORD : FLAT_Real_AllAddr_SVE_vi <0x1c>; 1976defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>; 1977defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>; 1978defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>; 1979 1980let SubtargetPredicate = isGFX8GFX9NotGFX940 in { 1981 // These instructions are encoded differently on gfx90* and gfx940. 1982 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>; 1983 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>; 1984} 1985 1986let SubtargetPredicate = isGFX90AOnly in { 1987 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_vi<0x4f, FLAT_ATOMIC_ADD_F64, 0>; 1988 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_vi<0x50, FLAT_ATOMIC_MIN_F64, 0>; 1989 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_vi<0x51, FLAT_ATOMIC_MAX_F64, 0>; 1990 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_vi<0x4f, 0>; 1991 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_vi<0x50, 0>; 1992 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>; 1993} // End SubtargetPredicate = isGFX90AOnly 1994 1995multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> { 1996 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>; 1997 def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 1998} 1999 2000multiclass FLAT_Real_Atomics_gfx940 <bits<7> op, FLAT_Pseudo ps> { 2001 def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>; 2002 def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>; 2003} 2004 2005multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> : 2006 FLAT_Real_AllAddr_gfx940<op> { 2007 def _RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 2008 def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 2009} 2010 2011let SubtargetPredicate = isGFX940Plus in { 2012 // These instructions are encoded differently on gfx90* and gfx940. 2013 defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>; 2014 defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>; 2015 2016 defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f, FLAT_ATOMIC_ADD_F64>; 2017 defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50, FLAT_ATOMIC_MIN_F64>; 2018 defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51, FLAT_ATOMIC_MAX_F64>; 2019 defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>; 2020 defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>; 2021 defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>; 2022 defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d, FLAT_ATOMIC_ADD_F32>; 2023 defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e, FLAT_ATOMIC_PK_ADD_F16>; 2024 defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52, FLAT_ATOMIC_PK_ADD_BF16>; 2025 defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>; 2026} // End SubtargetPredicate = isGFX940Plus 2027 2028//===----------------------------------------------------------------------===// 2029// GFX10. 2030//===----------------------------------------------------------------------===// 2031 2032class FLAT_Real_gfx10<bits<7> op, FLAT_Pseudo ps> : 2033 FLAT_Real<op, ps>, SIMCInstr<ps.PseudoInstr, SIEncodingFamily.GFX10> { 2034 let AssemblerPredicate = isGFX10Only; 2035 let DecoderNamespace = "GFX10"; 2036 2037 let Inst{11-0} = offset{11-0}; 2038 let Inst{12} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2039 let Inst{54-48} = !if(ps.has_saddr, !if(ps.enabled_saddr, saddr, 0x7d), 0x7d); 2040 let Inst{55} = 0; 2041} 2042 2043 2044multiclass FLAT_Real_Base_gfx10<bits<7> op> { 2045 def _gfx10 : 2046 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME)>; 2047} 2048 2049multiclass FLAT_Real_RTN_gfx10<bits<7> op> { 2050 def _RTN_gfx10 : 2051 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_RTN")>; 2052} 2053 2054multiclass FLAT_Real_SADDR_gfx10<bits<7> op> { 2055 def _SADDR_gfx10 : 2056 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>; 2057} 2058 2059multiclass FLAT_Real_SADDR_RTN_gfx10<bits<7> op> { 2060 def _SADDR_RTN_gfx10 : 2061 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>; 2062} 2063 2064multiclass FLAT_Real_ST_gfx10<bits<7> op> { 2065 def _ST_gfx10 : 2066 FLAT_Real_gfx10<op, !cast<FLAT_Pseudo>(NAME#"_ST")> { 2067 let Inst{54-48} = EXEC_HI.Index; 2068 let OtherPredicates = [HasFlatScratchSTMode]; 2069 } 2070} 2071 2072multiclass FLAT_Real_AllAddr_gfx10<bits<7> op> : 2073 FLAT_Real_Base_gfx10<op>, 2074 FLAT_Real_SADDR_gfx10<op>; 2075 2076multiclass FLAT_Real_Atomics_gfx10<bits<7> op> : 2077 FLAT_Real_Base_gfx10<op>, 2078 FLAT_Real_RTN_gfx10<op>; 2079 2080multiclass FLAT_Real_GlblAtomics_gfx10<bits<7> op> : 2081 FLAT_Real_AllAddr_gfx10<op>, 2082 FLAT_Real_RTN_gfx10<op>, 2083 FLAT_Real_SADDR_RTN_gfx10<op>; 2084 2085multiclass FLAT_Real_GlblAtomics_RTN_gfx10<bits<7> op> : 2086 FLAT_Real_RTN_gfx10<op>, 2087 FLAT_Real_SADDR_RTN_gfx10<op>; 2088 2089multiclass FLAT_Real_ScratchAllAddr_gfx10<bits<7> op> : 2090 FLAT_Real_Base_gfx10<op>, 2091 FLAT_Real_SADDR_gfx10<op>, 2092 FLAT_Real_ST_gfx10<op>; 2093 2094multiclass FLAT_Real_AllAddr_LDS_gfx10<bits<7> op, 2095 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> { 2096 let AsmString = opname # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds" in 2097 defm "" : FLAT_Real_Base_gfx10<op>; 2098 2099 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds" in 2100 defm "" : FLAT_Real_SADDR_gfx10<op>; 2101} 2102 2103multiclass FLAT_Real_ScratchAllAddr_LDS_gfx10<bits<7> op, 2104 string opname = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).PseudoInstr)> { 2105 defm "" : FLAT_Real_AllAddr_LDS_gfx10<op>; 2106 2107 let AsmString = opname # !cast<FLAT_Pseudo>(NAME#"_ST").AsmOperands # " lds" in 2108 defm "" : FLAT_Real_ST_gfx10<op>; 2109} 2110 2111// ENC_FLAT. 2112defm FLAT_LOAD_UBYTE : FLAT_Real_Base_gfx10<0x008>; 2113defm FLAT_LOAD_SBYTE : FLAT_Real_Base_gfx10<0x009>; 2114defm FLAT_LOAD_USHORT : FLAT_Real_Base_gfx10<0x00a>; 2115defm FLAT_LOAD_SSHORT : FLAT_Real_Base_gfx10<0x00b>; 2116defm FLAT_LOAD_DWORD : FLAT_Real_Base_gfx10<0x00c>; 2117defm FLAT_LOAD_DWORDX2 : FLAT_Real_Base_gfx10<0x00d>; 2118defm FLAT_LOAD_DWORDX4 : FLAT_Real_Base_gfx10<0x00e>; 2119defm FLAT_LOAD_DWORDX3 : FLAT_Real_Base_gfx10<0x00f>; 2120defm FLAT_STORE_BYTE : FLAT_Real_Base_gfx10<0x018>; 2121defm FLAT_STORE_BYTE_D16_HI : FLAT_Real_Base_gfx10<0x019>; 2122defm FLAT_STORE_SHORT : FLAT_Real_Base_gfx10<0x01a>; 2123defm FLAT_STORE_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x01b>; 2124defm FLAT_STORE_DWORD : FLAT_Real_Base_gfx10<0x01c>; 2125defm FLAT_STORE_DWORDX2 : FLAT_Real_Base_gfx10<0x01d>; 2126defm FLAT_STORE_DWORDX4 : FLAT_Real_Base_gfx10<0x01e>; 2127defm FLAT_STORE_DWORDX3 : FLAT_Real_Base_gfx10<0x01f>; 2128defm FLAT_LOAD_UBYTE_D16 : FLAT_Real_Base_gfx10<0x020>; 2129defm FLAT_LOAD_UBYTE_D16_HI : FLAT_Real_Base_gfx10<0x021>; 2130defm FLAT_LOAD_SBYTE_D16 : FLAT_Real_Base_gfx10<0x022>; 2131defm FLAT_LOAD_SBYTE_D16_HI : FLAT_Real_Base_gfx10<0x023>; 2132defm FLAT_LOAD_SHORT_D16 : FLAT_Real_Base_gfx10<0x024>; 2133defm FLAT_LOAD_SHORT_D16_HI : FLAT_Real_Base_gfx10<0x025>; 2134defm FLAT_ATOMIC_SWAP : FLAT_Real_Atomics_gfx10<0x030>; 2135defm FLAT_ATOMIC_CMPSWAP : FLAT_Real_Atomics_gfx10<0x031>; 2136defm FLAT_ATOMIC_ADD : FLAT_Real_Atomics_gfx10<0x032>; 2137defm FLAT_ATOMIC_SUB : FLAT_Real_Atomics_gfx10<0x033>; 2138defm FLAT_ATOMIC_SMIN : FLAT_Real_Atomics_gfx10<0x035>; 2139defm FLAT_ATOMIC_UMIN : FLAT_Real_Atomics_gfx10<0x036>; 2140defm FLAT_ATOMIC_SMAX : FLAT_Real_Atomics_gfx10<0x037>; 2141defm FLAT_ATOMIC_UMAX : FLAT_Real_Atomics_gfx10<0x038>; 2142defm FLAT_ATOMIC_AND : FLAT_Real_Atomics_gfx10<0x039>; 2143defm FLAT_ATOMIC_OR : FLAT_Real_Atomics_gfx10<0x03a>; 2144defm FLAT_ATOMIC_XOR : FLAT_Real_Atomics_gfx10<0x03b>; 2145defm FLAT_ATOMIC_INC : FLAT_Real_Atomics_gfx10<0x03c>; 2146defm FLAT_ATOMIC_DEC : FLAT_Real_Atomics_gfx10<0x03d>; 2147defm FLAT_ATOMIC_FCMPSWAP : FLAT_Real_Atomics_gfx10<0x03e>; 2148defm FLAT_ATOMIC_FMIN : FLAT_Real_Atomics_gfx10<0x03f>; 2149defm FLAT_ATOMIC_FMAX : FLAT_Real_Atomics_gfx10<0x040>; 2150defm FLAT_ATOMIC_SWAP_X2 : FLAT_Real_Atomics_gfx10<0x050>; 2151defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x051>; 2152defm FLAT_ATOMIC_ADD_X2 : FLAT_Real_Atomics_gfx10<0x052>; 2153defm FLAT_ATOMIC_SUB_X2 : FLAT_Real_Atomics_gfx10<0x053>; 2154defm FLAT_ATOMIC_SMIN_X2 : FLAT_Real_Atomics_gfx10<0x055>; 2155defm FLAT_ATOMIC_UMIN_X2 : FLAT_Real_Atomics_gfx10<0x056>; 2156defm FLAT_ATOMIC_SMAX_X2 : FLAT_Real_Atomics_gfx10<0x057>; 2157defm FLAT_ATOMIC_UMAX_X2 : FLAT_Real_Atomics_gfx10<0x058>; 2158defm FLAT_ATOMIC_AND_X2 : FLAT_Real_Atomics_gfx10<0x059>; 2159defm FLAT_ATOMIC_OR_X2 : FLAT_Real_Atomics_gfx10<0x05a>; 2160defm FLAT_ATOMIC_XOR_X2 : FLAT_Real_Atomics_gfx10<0x05b>; 2161defm FLAT_ATOMIC_INC_X2 : FLAT_Real_Atomics_gfx10<0x05c>; 2162defm FLAT_ATOMIC_DEC_X2 : FLAT_Real_Atomics_gfx10<0x05d>; 2163defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Real_Atomics_gfx10<0x05e>; 2164defm FLAT_ATOMIC_FMIN_X2 : FLAT_Real_Atomics_gfx10<0x05f>; 2165defm FLAT_ATOMIC_FMAX_X2 : FLAT_Real_Atomics_gfx10<0x060>; 2166 2167 2168// ENC_FLAT_GLBL. 2169defm GLOBAL_LOAD_UBYTE : FLAT_Real_AllAddr_gfx10<0x008>; 2170defm GLOBAL_LOAD_SBYTE : FLAT_Real_AllAddr_gfx10<0x009>; 2171defm GLOBAL_LOAD_USHORT : FLAT_Real_AllAddr_gfx10<0x00a>; 2172defm GLOBAL_LOAD_SSHORT : FLAT_Real_AllAddr_gfx10<0x00b>; 2173defm GLOBAL_LOAD_DWORD : FLAT_Real_AllAddr_gfx10<0x00c>; 2174defm GLOBAL_LOAD_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x00d>; 2175defm GLOBAL_LOAD_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x00e>; 2176defm GLOBAL_LOAD_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x00f>; 2177defm GLOBAL_STORE_BYTE : FLAT_Real_AllAddr_gfx10<0x018>; 2178defm GLOBAL_STORE_BYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x019>; 2179defm GLOBAL_STORE_SHORT : FLAT_Real_AllAddr_gfx10<0x01a>; 2180defm GLOBAL_STORE_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x01b>; 2181defm GLOBAL_STORE_DWORD : FLAT_Real_AllAddr_gfx10<0x01c>; 2182defm GLOBAL_STORE_DWORDX2 : FLAT_Real_AllAddr_gfx10<0x01d>; 2183defm GLOBAL_STORE_DWORDX4 : FLAT_Real_AllAddr_gfx10<0x01e>; 2184defm GLOBAL_STORE_DWORDX3 : FLAT_Real_AllAddr_gfx10<0x01f>; 2185defm GLOBAL_LOAD_UBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x020>; 2186defm GLOBAL_LOAD_UBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x021>; 2187defm GLOBAL_LOAD_SBYTE_D16 : FLAT_Real_AllAddr_gfx10<0x022>; 2188defm GLOBAL_LOAD_SBYTE_D16_HI : FLAT_Real_AllAddr_gfx10<0x023>; 2189defm GLOBAL_LOAD_SHORT_D16 : FLAT_Real_AllAddr_gfx10<0x024>; 2190defm GLOBAL_LOAD_SHORT_D16_HI : FLAT_Real_AllAddr_gfx10<0x025>; 2191defm GLOBAL_ATOMIC_SWAP : FLAT_Real_GlblAtomics_gfx10<0x030>; 2192defm GLOBAL_ATOMIC_CMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x031>; 2193defm GLOBAL_ATOMIC_ADD : FLAT_Real_GlblAtomics_gfx10<0x032>; 2194defm GLOBAL_ATOMIC_SUB : FLAT_Real_GlblAtomics_gfx10<0x033>; 2195defm GLOBAL_ATOMIC_CSUB : FLAT_Real_GlblAtomics_gfx10<0x034>; 2196defm GLOBAL_ATOMIC_SMIN : FLAT_Real_GlblAtomics_gfx10<0x035>; 2197defm GLOBAL_ATOMIC_UMIN : FLAT_Real_GlblAtomics_gfx10<0x036>; 2198defm GLOBAL_ATOMIC_SMAX : FLAT_Real_GlblAtomics_gfx10<0x037>; 2199defm GLOBAL_ATOMIC_UMAX : FLAT_Real_GlblAtomics_gfx10<0x038>; 2200defm GLOBAL_ATOMIC_AND : FLAT_Real_GlblAtomics_gfx10<0x039>; 2201defm GLOBAL_ATOMIC_OR : FLAT_Real_GlblAtomics_gfx10<0x03a>; 2202defm GLOBAL_ATOMIC_XOR : FLAT_Real_GlblAtomics_gfx10<0x03b>; 2203defm GLOBAL_ATOMIC_INC : FLAT_Real_GlblAtomics_gfx10<0x03c>; 2204defm GLOBAL_ATOMIC_DEC : FLAT_Real_GlblAtomics_gfx10<0x03d>; 2205defm GLOBAL_ATOMIC_FCMPSWAP : FLAT_Real_GlblAtomics_gfx10<0x03e>; 2206defm GLOBAL_ATOMIC_FMIN : FLAT_Real_GlblAtomics_gfx10<0x03f>; 2207defm GLOBAL_ATOMIC_FMAX : FLAT_Real_GlblAtomics_gfx10<0x040>; 2208defm GLOBAL_ATOMIC_SWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x050>; 2209defm GLOBAL_ATOMIC_CMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x051>; 2210defm GLOBAL_ATOMIC_ADD_X2 : FLAT_Real_GlblAtomics_gfx10<0x052>; 2211defm GLOBAL_ATOMIC_SUB_X2 : FLAT_Real_GlblAtomics_gfx10<0x053>; 2212defm GLOBAL_ATOMIC_SMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x055>; 2213defm GLOBAL_ATOMIC_UMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x056>; 2214defm GLOBAL_ATOMIC_SMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x057>; 2215defm GLOBAL_ATOMIC_UMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x058>; 2216defm GLOBAL_ATOMIC_AND_X2 : FLAT_Real_GlblAtomics_gfx10<0x059>; 2217defm GLOBAL_ATOMIC_OR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05a>; 2218defm GLOBAL_ATOMIC_XOR_X2 : FLAT_Real_GlblAtomics_gfx10<0x05b>; 2219defm GLOBAL_ATOMIC_INC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05c>; 2220defm GLOBAL_ATOMIC_DEC_X2 : FLAT_Real_GlblAtomics_gfx10<0x05d>; 2221defm GLOBAL_ATOMIC_FCMPSWAP_X2 : FLAT_Real_GlblAtomics_gfx10<0x05e>; 2222defm GLOBAL_ATOMIC_FMIN_X2 : FLAT_Real_GlblAtomics_gfx10<0x05f>; 2223defm GLOBAL_ATOMIC_FMAX_X2 : FLAT_Real_GlblAtomics_gfx10<0x060>; 2224defm GLOBAL_LOAD_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x016>; 2225defm GLOBAL_STORE_DWORD_ADDTID : FLAT_Real_AllAddr_gfx10<0x017>; 2226 2227defm GLOBAL_LOAD_LDS_UBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x008>; 2228defm GLOBAL_LOAD_LDS_SBYTE : FLAT_Real_AllAddr_LDS_gfx10 <0x009>; 2229defm GLOBAL_LOAD_LDS_USHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00a>; 2230defm GLOBAL_LOAD_LDS_SSHORT : FLAT_Real_AllAddr_LDS_gfx10 <0x00b>; 2231defm GLOBAL_LOAD_LDS_DWORD : FLAT_Real_AllAddr_LDS_gfx10 <0x00c>; 2232 2233// ENC_FLAT_SCRATCH. 2234defm SCRATCH_LOAD_UBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x008>; 2235defm SCRATCH_LOAD_SBYTE : FLAT_Real_ScratchAllAddr_gfx10<0x009>; 2236defm SCRATCH_LOAD_USHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00a>; 2237defm SCRATCH_LOAD_SSHORT : FLAT_Real_ScratchAllAddr_gfx10<0x00b>; 2238defm SCRATCH_LOAD_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x00c>; 2239defm SCRATCH_LOAD_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x00d>; 2240defm SCRATCH_LOAD_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x00e>; 2241defm SCRATCH_LOAD_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x00f>; 2242defm SCRATCH_STORE_BYTE : FLAT_Real_ScratchAllAddr_gfx10<0x018>; 2243defm SCRATCH_STORE_BYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x019>; 2244defm SCRATCH_STORE_SHORT : FLAT_Real_ScratchAllAddr_gfx10<0x01a>; 2245defm SCRATCH_STORE_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x01b>; 2246defm SCRATCH_STORE_DWORD : FLAT_Real_ScratchAllAddr_gfx10<0x01c>; 2247defm SCRATCH_STORE_DWORDX2 : FLAT_Real_ScratchAllAddr_gfx10<0x01d>; 2248defm SCRATCH_STORE_DWORDX4 : FLAT_Real_ScratchAllAddr_gfx10<0x01e>; 2249defm SCRATCH_STORE_DWORDX3 : FLAT_Real_ScratchAllAddr_gfx10<0x01f>; 2250defm SCRATCH_LOAD_UBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x020>; 2251defm SCRATCH_LOAD_UBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x021>; 2252defm SCRATCH_LOAD_SBYTE_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x022>; 2253defm SCRATCH_LOAD_SBYTE_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x023>; 2254defm SCRATCH_LOAD_SHORT_D16 : FLAT_Real_ScratchAllAddr_gfx10<0x024>; 2255defm SCRATCH_LOAD_SHORT_D16_HI : FLAT_Real_ScratchAllAddr_gfx10<0x025>; 2256 2257defm SCRATCH_LOAD_LDS_UBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x008>; 2258defm SCRATCH_LOAD_LDS_SBYTE : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x009>; 2259defm SCRATCH_LOAD_LDS_USHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00a>; 2260defm SCRATCH_LOAD_LDS_SSHORT : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00b>; 2261defm SCRATCH_LOAD_LDS_DWORD : FLAT_Real_ScratchAllAddr_LDS_gfx10 <0x00c>; 2262 2263//===----------------------------------------------------------------------===// 2264// GFX11 2265//===----------------------------------------------------------------------===// 2266 2267class FLAT_Real_gfx11 <bits<7> op, FLAT_Pseudo ps, string opName = ps.Mnemonic> : 2268 FLAT_Real <op, ps, opName>, 2269 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX11> { 2270 let AssemblerPredicate = isGFX11Only; 2271 let DecoderNamespace = "GFX11"; 2272 2273 let Inst{13} = !if(ps.has_dlc, cpol{CPolBit.DLC}, ps.dlcValue); 2274 let Inst{14} = !if(ps.has_glc, cpol{CPolBit.GLC}, ps.glcValue); 2275 let Inst{15} = cpol{CPolBit.SLC}; 2276 let Inst{17-16} = seg; 2277 let Inst{55} = ps.sve; 2278} 2279 2280multiclass FLAT_Aliases_gfx11<string ps, string opName, int renamed> { 2281 if renamed then 2282 def _renamed_gfx11 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX11Only]>; 2283} 2284 2285multiclass FLAT_Real_Base_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2286 FLAT_Aliases_gfx11<ps, opName, renamed> { 2287 def _gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps), opName> { 2288 let Inst{54-48} = SGPR_NULL_gfx11plus.Index; 2289 } 2290} 2291 2292multiclass FLAT_Real_RTN_gfx11<bits<7> op, string ps, string opName> { 2293 def _RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> { 2294 let Inst{54-48} = SGPR_NULL_gfx11plus.Index; 2295 } 2296} 2297 2298multiclass FLAT_Real_SADDR_gfx11<bits<7> op, string ps, string opName> { 2299 def _SADDR_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>; 2300} 2301 2302multiclass FLAT_Real_SADDR_RTN_gfx11<bits<7> op, string ps, string opName> { 2303 def _SADDR_RTN_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>; 2304} 2305 2306multiclass FLAT_Real_ST_gfx11<bits<7> op, string ps, string opName> { 2307 def _ST_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> { 2308 let Inst{54-48} = SGPR_NULL_gfx11plus.Index; 2309 let OtherPredicates = [HasFlatScratchSTMode]; 2310 } 2311} 2312 2313multiclass FLAT_Real_SVS_gfx11<bits<7> op, string ps, string opName> { 2314 def _SVS_gfx11 : FLAT_Real_gfx11<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> { 2315 let OtherPredicates = [HasFlatScratchSVSMode]; 2316 } 2317} 2318 2319multiclass FLAT_Real_AllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2320 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2321 FLAT_Real_SADDR_gfx11<op, ps, opName>; 2322 2323multiclass FLAT_Real_Atomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2324 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2325 FLAT_Real_RTN_gfx11<op, ps, opName>; 2326 2327multiclass FLAT_Real_GlblAtomics_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2328 FLAT_Real_AllAddr_gfx11<op, ps, opName, renamed>, 2329 FLAT_Real_RTN_gfx11<op, ps, opName>, 2330 FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>; 2331 2332multiclass FLAT_Real_GlblAtomics_RTN_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2333 FLAT_Aliases_gfx11<ps#"_RTN", opName, renamed>, 2334 FLAT_Real_RTN_gfx11<op, ps, opName>, 2335 FLAT_Real_SADDR_RTN_gfx11<op, ps, opName>; 2336 2337multiclass FLAT_Real_ScratchAllAddr_gfx11<bits<7> op, string ps, string opName, int renamed = false> : 2338 FLAT_Real_Base_gfx11<op, ps, opName, renamed>, 2339 FLAT_Real_SADDR_gfx11<op, ps, opName>, 2340 FLAT_Real_ST_gfx11<op, ps, opName>, 2341 FLAT_Real_SVS_gfx11<op, ps, opName>; 2342 2343// ENC_FLAT. 2344defm FLAT_LOAD_U8 : FLAT_Real_Base_gfx11<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>; 2345defm FLAT_LOAD_I8 : FLAT_Real_Base_gfx11<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>; 2346defm FLAT_LOAD_U16 : FLAT_Real_Base_gfx11<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>; 2347defm FLAT_LOAD_I16 : FLAT_Real_Base_gfx11<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>; 2348defm FLAT_LOAD_B32 : FLAT_Real_Base_gfx11<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>; 2349defm FLAT_LOAD_B64 : FLAT_Real_Base_gfx11<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>; 2350defm FLAT_LOAD_B96 : FLAT_Real_Base_gfx11<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>; 2351defm FLAT_LOAD_B128 : FLAT_Real_Base_gfx11<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>; 2352defm FLAT_STORE_B8 : FLAT_Real_Base_gfx11<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>; 2353defm FLAT_STORE_B16 : FLAT_Real_Base_gfx11<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>; 2354defm FLAT_STORE_B32 : FLAT_Real_Base_gfx11<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>; 2355defm FLAT_STORE_B64 : FLAT_Real_Base_gfx11<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>; 2356defm FLAT_STORE_B96 : FLAT_Real_Base_gfx11<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>; 2357defm FLAT_STORE_B128 : FLAT_Real_Base_gfx11<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>; 2358defm FLAT_LOAD_D16_U8 : FLAT_Real_Base_gfx11<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">; 2359defm FLAT_LOAD_D16_I8 : FLAT_Real_Base_gfx11<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">; 2360defm FLAT_LOAD_D16_B16 : FLAT_Real_Base_gfx11<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">; 2361defm FLAT_LOAD_D16_HI_U8 : FLAT_Real_Base_gfx11<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">; 2362defm FLAT_LOAD_D16_HI_I8 : FLAT_Real_Base_gfx11<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">; 2363defm FLAT_LOAD_D16_HI_B16 : FLAT_Real_Base_gfx11<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">; 2364defm FLAT_STORE_D16_HI_B8 : FLAT_Real_Base_gfx11<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">; 2365defm FLAT_STORE_D16_HI_B16 : FLAT_Real_Base_gfx11<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">; 2366defm FLAT_ATOMIC_SWAP_B32 : FLAT_Real_Atomics_gfx11<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>; 2367defm FLAT_ATOMIC_CMPSWAP_B32 : FLAT_Real_Atomics_gfx11<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>; 2368defm FLAT_ATOMIC_ADD_U32 : FLAT_Real_Atomics_gfx11<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>; 2369defm FLAT_ATOMIC_SUB_U32 : FLAT_Real_Atomics_gfx11<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>; 2370defm FLAT_ATOMIC_MIN_I32 : FLAT_Real_Atomics_gfx11<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>; 2371defm FLAT_ATOMIC_MIN_U32 : FLAT_Real_Atomics_gfx11<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>; 2372defm FLAT_ATOMIC_MAX_I32 : FLAT_Real_Atomics_gfx11<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>; 2373defm FLAT_ATOMIC_MAX_U32 : FLAT_Real_Atomics_gfx11<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>; 2374defm FLAT_ATOMIC_AND_B32 : FLAT_Real_Atomics_gfx11<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>; 2375defm FLAT_ATOMIC_OR_B32 : FLAT_Real_Atomics_gfx11<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>; 2376defm FLAT_ATOMIC_XOR_B32 : FLAT_Real_Atomics_gfx11<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>; 2377defm FLAT_ATOMIC_INC_U32 : FLAT_Real_Atomics_gfx11<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>; 2378defm FLAT_ATOMIC_DEC_U32 : FLAT_Real_Atomics_gfx11<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>; 2379defm FLAT_ATOMIC_SWAP_B64 : FLAT_Real_Atomics_gfx11<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>; 2380defm FLAT_ATOMIC_CMPSWAP_B64 : FLAT_Real_Atomics_gfx11<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>; 2381defm FLAT_ATOMIC_ADD_U64 : FLAT_Real_Atomics_gfx11<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>; 2382defm FLAT_ATOMIC_SUB_U64 : FLAT_Real_Atomics_gfx11<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>; 2383defm FLAT_ATOMIC_MIN_I64 : FLAT_Real_Atomics_gfx11<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>; 2384defm FLAT_ATOMIC_MIN_U64 : FLAT_Real_Atomics_gfx11<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>; 2385defm FLAT_ATOMIC_MAX_I64 : FLAT_Real_Atomics_gfx11<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>; 2386defm FLAT_ATOMIC_MAX_U64 : FLAT_Real_Atomics_gfx11<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>; 2387defm FLAT_ATOMIC_AND_B64 : FLAT_Real_Atomics_gfx11<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>; 2388defm FLAT_ATOMIC_OR_B64 : FLAT_Real_Atomics_gfx11<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>; 2389defm FLAT_ATOMIC_XOR_B64 : FLAT_Real_Atomics_gfx11<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>; 2390defm FLAT_ATOMIC_INC_U64 : FLAT_Real_Atomics_gfx11<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>; 2391defm FLAT_ATOMIC_DEC_U64 : FLAT_Real_Atomics_gfx11<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>; 2392defm FLAT_ATOMIC_CMPSWAP_F32 : FLAT_Real_Atomics_gfx11<0x050, "FLAT_ATOMIC_FCMPSWAP", "flat_atomic_cmpswap_f32">; 2393defm FLAT_ATOMIC_MIN_F32 : FLAT_Real_Atomics_gfx11<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_f32">; 2394defm FLAT_ATOMIC_MAX_F32 : FLAT_Real_Atomics_gfx11<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_f32">; 2395defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_gfx11<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">; 2396 2397// ENC_FLAT_GLBL. 2398defm GLOBAL_LOAD_U8 : FLAT_Real_AllAddr_gfx11<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>; 2399defm GLOBAL_LOAD_I8 : FLAT_Real_AllAddr_gfx11<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>; 2400defm GLOBAL_LOAD_U16 : FLAT_Real_AllAddr_gfx11<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>; 2401defm GLOBAL_LOAD_I16 : FLAT_Real_AllAddr_gfx11<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>; 2402defm GLOBAL_LOAD_B32 : FLAT_Real_AllAddr_gfx11<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>; 2403defm GLOBAL_LOAD_B64 : FLAT_Real_AllAddr_gfx11<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>; 2404defm GLOBAL_LOAD_B96 : FLAT_Real_AllAddr_gfx11<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>; 2405defm GLOBAL_LOAD_B128 : FLAT_Real_AllAddr_gfx11<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>; 2406defm GLOBAL_STORE_B8 : FLAT_Real_AllAddr_gfx11<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>; 2407defm GLOBAL_STORE_B16 : FLAT_Real_AllAddr_gfx11<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>; 2408defm GLOBAL_STORE_B32 : FLAT_Real_AllAddr_gfx11<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>; 2409defm GLOBAL_STORE_B64 : FLAT_Real_AllAddr_gfx11<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>; 2410defm GLOBAL_STORE_B96 : FLAT_Real_AllAddr_gfx11<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>; 2411defm GLOBAL_STORE_B128 : FLAT_Real_AllAddr_gfx11<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>; 2412defm GLOBAL_LOAD_D16_U8 : FLAT_Real_AllAddr_gfx11<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">; 2413defm GLOBAL_LOAD_D16_I8 : FLAT_Real_AllAddr_gfx11<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">; 2414defm GLOBAL_LOAD_D16_B16 : FLAT_Real_AllAddr_gfx11<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">; 2415defm GLOBAL_LOAD_D16_HI_U8 : FLAT_Real_AllAddr_gfx11<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">; 2416defm GLOBAL_LOAD_D16_HI_I8 : FLAT_Real_AllAddr_gfx11<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">; 2417defm GLOBAL_LOAD_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">; 2418defm GLOBAL_STORE_D16_HI_B8 : FLAT_Real_AllAddr_gfx11<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">; 2419defm GLOBAL_STORE_D16_HI_B16 : FLAT_Real_AllAddr_gfx11<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">; 2420defm GLOBAL_LOAD_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">; 2421defm GLOBAL_STORE_ADDTID_B32 : FLAT_Real_AllAddr_gfx11<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">; 2422defm GLOBAL_ATOMIC_SWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>; 2423defm GLOBAL_ATOMIC_CMPSWAP_B32 : FLAT_Real_GlblAtomics_gfx11<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; 2424defm GLOBAL_ATOMIC_ADD_U32 : FLAT_Real_GlblAtomics_gfx11<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; 2425defm GLOBAL_ATOMIC_SUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; 2426defm GLOBAL_ATOMIC_CSUB_U32 : FLAT_Real_GlblAtomics_gfx11<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_csub_u32", true>; 2427defm GLOBAL_ATOMIC_MIN_I32 : FLAT_Real_GlblAtomics_gfx11<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; 2428defm GLOBAL_ATOMIC_MIN_U32 : FLAT_Real_GlblAtomics_gfx11<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; 2429defm GLOBAL_ATOMIC_MAX_I32 : FLAT_Real_GlblAtomics_gfx11<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; 2430defm GLOBAL_ATOMIC_MAX_U32 : FLAT_Real_GlblAtomics_gfx11<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>; 2431defm GLOBAL_ATOMIC_AND_B32 : FLAT_Real_GlblAtomics_gfx11<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>; 2432defm GLOBAL_ATOMIC_OR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>; 2433defm GLOBAL_ATOMIC_XOR_B32 : FLAT_Real_GlblAtomics_gfx11<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>; 2434defm GLOBAL_ATOMIC_INC_U32 : FLAT_Real_GlblAtomics_gfx11<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>; 2435defm GLOBAL_ATOMIC_DEC_U32 : FLAT_Real_GlblAtomics_gfx11<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>; 2436defm GLOBAL_ATOMIC_SWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>; 2437defm GLOBAL_ATOMIC_CMPSWAP_B64 : FLAT_Real_GlblAtomics_gfx11<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>; 2438defm GLOBAL_ATOMIC_ADD_U64 : FLAT_Real_GlblAtomics_gfx11<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>; 2439defm GLOBAL_ATOMIC_SUB_U64 : FLAT_Real_GlblAtomics_gfx11<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>; 2440defm GLOBAL_ATOMIC_MIN_I64 : FLAT_Real_GlblAtomics_gfx11<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>; 2441defm GLOBAL_ATOMIC_MIN_U64 : FLAT_Real_GlblAtomics_gfx11<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>; 2442defm GLOBAL_ATOMIC_MAX_I64 : FLAT_Real_GlblAtomics_gfx11<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>; 2443defm GLOBAL_ATOMIC_MAX_U64 : FLAT_Real_GlblAtomics_gfx11<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>; 2444defm GLOBAL_ATOMIC_AND_B64 : FLAT_Real_GlblAtomics_gfx11<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>; 2445defm GLOBAL_ATOMIC_OR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>; 2446defm GLOBAL_ATOMIC_XOR_B64 : FLAT_Real_GlblAtomics_gfx11<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>; 2447defm GLOBAL_ATOMIC_INC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>; 2448defm GLOBAL_ATOMIC_DEC_U64 : FLAT_Real_GlblAtomics_gfx11<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>; 2449defm GLOBAL_ATOMIC_CMPSWAP_F32 : FLAT_Real_GlblAtomics_gfx11<0x050, "GLOBAL_ATOMIC_FCMPSWAP", "global_atomic_cmpswap_f32">; 2450defm GLOBAL_ATOMIC_MIN_F32 : FLAT_Real_GlblAtomics_gfx11<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_f32">; 2451defm GLOBAL_ATOMIC_MAX_F32 : FLAT_Real_GlblAtomics_gfx11<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_f32">; 2452defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Real_GlblAtomics_gfx11<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; 2453 2454// ENC_FLAT_SCRATCH. 2455defm SCRATCH_LOAD_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; 2456defm SCRATCH_LOAD_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; 2457defm SCRATCH_LOAD_U16 : FLAT_Real_ScratchAllAddr_gfx11<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>; 2458defm SCRATCH_LOAD_I16 : FLAT_Real_ScratchAllAddr_gfx11<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>; 2459defm SCRATCH_LOAD_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>; 2460defm SCRATCH_LOAD_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>; 2461defm SCRATCH_LOAD_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>; 2462defm SCRATCH_LOAD_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>; 2463defm SCRATCH_STORE_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>; 2464defm SCRATCH_STORE_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>; 2465defm SCRATCH_STORE_B32 : FLAT_Real_ScratchAllAddr_gfx11<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>; 2466defm SCRATCH_STORE_B64 : FLAT_Real_ScratchAllAddr_gfx11<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>; 2467defm SCRATCH_STORE_B96 : FLAT_Real_ScratchAllAddr_gfx11<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>; 2468defm SCRATCH_STORE_B128 : FLAT_Real_ScratchAllAddr_gfx11<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>; 2469defm SCRATCH_LOAD_D16_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">; 2470defm SCRATCH_LOAD_D16_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">; 2471defm SCRATCH_LOAD_D16_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">; 2472defm SCRATCH_LOAD_D16_HI_U8 : FLAT_Real_ScratchAllAddr_gfx11<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">; 2473defm SCRATCH_LOAD_D16_HI_I8 : FLAT_Real_ScratchAllAddr_gfx11<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">; 2474defm SCRATCH_LOAD_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">; 2475defm SCRATCH_STORE_D16_HI_B8 : FLAT_Real_ScratchAllAddr_gfx11<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">; 2476defm SCRATCH_STORE_D16_HI_B16 : FLAT_Real_ScratchAllAddr_gfx11<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">; 2477 2478//===----------------------------------------------------------------------===// 2479// GFX12 2480//===----------------------------------------------------------------------===// 2481 2482class VFLAT_Real_gfx12 <bits<8> op, FLAT_Pseudo ps, 2483 string opName = ps.Mnemonic> : 2484 VFLAT_Real <op, ps, opName>, 2485 SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX12> { 2486 let AssemblerPredicate = isGFX12Plus; 2487 let DecoderNamespace = "GFX12"; 2488 2489 let Inst{25-24} = !if(ps.is_flat_scratch, 0b01, 2490 !if(ps.is_flat_global, 0b10, 0b00)); 2491} 2492 2493multiclass VFLAT_Aliases_gfx12<string ps, string opName, int renamed, string alias> { 2494 if renamed then 2495 def _renamed_gfx12 : MnemonicAlias<!cast<FLAT_Pseudo>(ps).Mnemonic, opName>, Requires<[isGFX12Plus]>; 2496 if !not(!empty(alias)) then 2497 def _alias_gfx12 : MnemonicAlias<alias, opName>, Requires<[isGFX12Plus]>; 2498} 2499 2500multiclass VFLAT_Real_Base_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> : 2501 VFLAT_Aliases_gfx12<ps, opName, renamed, alias> { 2502 def _gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps), opName> { 2503 let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2504 } 2505} 2506 2507multiclass VFLAT_Real_RTN_gfx12<bits<8> op, string ps, string opName> { 2508 def _RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_RTN"), opName> { 2509 let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2510 } 2511} 2512 2513multiclass VFLAT_Real_SADDR_gfx12<bits<8> op, string ps, string opName> { 2514 def _SADDR_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR"), opName>; 2515} 2516 2517multiclass VFLAT_Real_SADDR_RTN_gfx12<bits<8> op, string ps, string opName> { 2518 def _SADDR_RTN_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SADDR_RTN"), opName>; 2519} 2520 2521multiclass VFLAT_Real_ST_gfx12<bits<8> op, string ps, string opName> { 2522 def _ST_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_ST"), opName> { 2523 let Inst{6-0} = !cast<int>(SGPR_NULL_gfx11plus.HWEncoding); 2524 let OtherPredicates = [HasFlatScratchSTMode]; 2525 } 2526} 2527 2528multiclass VFLAT_Real_SVS_gfx12<bits<8> op, string ps, string opName> { 2529 def _SVS_gfx12 : VFLAT_Real_gfx12<op, !cast<FLAT_Pseudo>(ps#"_SVS"), opName> { 2530 let OtherPredicates = [HasFlatScratchSVSMode]; 2531 } 2532} 2533 2534multiclass VFLAT_Real_Atomics_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> : 2535 VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>, 2536 VFLAT_Real_RTN_gfx12<op, ps, opName>; 2537 2538multiclass VGLOBAL_Real_AllAddr_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> : 2539 VFLAT_Real_Base_gfx12<op, ps, opName, renamed, alias>, 2540 VFLAT_Real_SADDR_gfx12<op, ps, opName>; 2541 2542multiclass VGLOBAL_Real_Atomics_gfx12<bits<8> op, string ps, string opName, int renamed = false, string alias = ""> : 2543 VGLOBAL_Real_AllAddr_gfx12<op, ps, opName, renamed, alias>, 2544 VFLAT_Real_RTN_gfx12<op, ps, opName>, 2545 VFLAT_Real_SADDR_RTN_gfx12<op, ps, opName>; 2546 2547multiclass VSCRATCH_Real_AllAddr_gfx12<bits<8> op, string ps, string opName, int renamed = false> : 2548 VFLAT_Real_Base_gfx12<op, ps, opName, renamed>, 2549 VFLAT_Real_SADDR_gfx12<op, ps, opName>, 2550 VFLAT_Real_ST_gfx12<op, ps, opName>, 2551 VFLAT_Real_SVS_gfx12<op, ps, opName>; 2552 2553// ENC_VFLAT. 2554defm FLAT_LOAD_U8 : VFLAT_Real_Base_gfx12<0x010, "FLAT_LOAD_UBYTE", "flat_load_u8", true>; 2555defm FLAT_LOAD_I8 : VFLAT_Real_Base_gfx12<0x011, "FLAT_LOAD_SBYTE", "flat_load_i8", true>; 2556defm FLAT_LOAD_U16 : VFLAT_Real_Base_gfx12<0x012, "FLAT_LOAD_USHORT", "flat_load_u16", true>; 2557defm FLAT_LOAD_I16 : VFLAT_Real_Base_gfx12<0x013, "FLAT_LOAD_SSHORT", "flat_load_i16", true>; 2558defm FLAT_LOAD_B32 : VFLAT_Real_Base_gfx12<0x014, "FLAT_LOAD_DWORD", "flat_load_b32", true>; 2559defm FLAT_LOAD_B64 : VFLAT_Real_Base_gfx12<0x015, "FLAT_LOAD_DWORDX2", "flat_load_b64", true>; 2560defm FLAT_LOAD_B96 : VFLAT_Real_Base_gfx12<0x016, "FLAT_LOAD_DWORDX3", "flat_load_b96", true>; 2561defm FLAT_LOAD_B128 : VFLAT_Real_Base_gfx12<0x017, "FLAT_LOAD_DWORDX4", "flat_load_b128", true>; 2562defm FLAT_STORE_B8 : VFLAT_Real_Base_gfx12<0x018, "FLAT_STORE_BYTE", "flat_store_b8", true>; 2563defm FLAT_STORE_B16 : VFLAT_Real_Base_gfx12<0x019, "FLAT_STORE_SHORT", "flat_store_b16", true>; 2564defm FLAT_STORE_B32 : VFLAT_Real_Base_gfx12<0x01a, "FLAT_STORE_DWORD", "flat_store_b32", true>; 2565defm FLAT_STORE_B64 : VFLAT_Real_Base_gfx12<0x01b, "FLAT_STORE_DWORDX2", "flat_store_b64", true>; 2566defm FLAT_STORE_B96 : VFLAT_Real_Base_gfx12<0x01c, "FLAT_STORE_DWORDX3", "flat_store_b96", true>; 2567defm FLAT_STORE_B128 : VFLAT_Real_Base_gfx12<0x01d, "FLAT_STORE_DWORDX4", "flat_store_b128", true>; 2568defm FLAT_LOAD_D16_U8 : VFLAT_Real_Base_gfx12<0x01e, "FLAT_LOAD_UBYTE_D16", "flat_load_d16_u8">; 2569defm FLAT_LOAD_D16_I8 : VFLAT_Real_Base_gfx12<0x01f, "FLAT_LOAD_SBYTE_D16", "flat_load_d16_i8">; 2570defm FLAT_LOAD_D16_B16 : VFLAT_Real_Base_gfx12<0x020, "FLAT_LOAD_SHORT_D16", "flat_load_d16_b16">; 2571defm FLAT_LOAD_D16_HI_U8 : VFLAT_Real_Base_gfx12<0x021, "FLAT_LOAD_UBYTE_D16_HI", "flat_load_d16_hi_u8">; 2572defm FLAT_LOAD_D16_HI_I8 : VFLAT_Real_Base_gfx12<0x022, "FLAT_LOAD_SBYTE_D16_HI", "flat_load_d16_hi_i8">; 2573defm FLAT_LOAD_D16_HI_B16 : VFLAT_Real_Base_gfx12<0x023, "FLAT_LOAD_SHORT_D16_HI", "flat_load_d16_hi_b16">; 2574defm FLAT_STORE_D16_HI_B8 : VFLAT_Real_Base_gfx12<0x024, "FLAT_STORE_BYTE_D16_HI", "flat_store_d16_hi_b8">; 2575defm FLAT_STORE_D16_HI_B16 : VFLAT_Real_Base_gfx12<0x025, "FLAT_STORE_SHORT_D16_HI", "flat_store_d16_hi_b16">; 2576defm FLAT_ATOMIC_SWAP_B32 : VFLAT_Real_Atomics_gfx12<0x033, "FLAT_ATOMIC_SWAP", "flat_atomic_swap_b32", true>; 2577defm FLAT_ATOMIC_CMPSWAP_B32 : VFLAT_Real_Atomics_gfx12<0x034, "FLAT_ATOMIC_CMPSWAP", "flat_atomic_cmpswap_b32", true>; 2578defm FLAT_ATOMIC_ADD_U32 : VFLAT_Real_Atomics_gfx12<0x035, "FLAT_ATOMIC_ADD", "flat_atomic_add_u32", true>; 2579defm FLAT_ATOMIC_SUB_U32 : VFLAT_Real_Atomics_gfx12<0x036, "FLAT_ATOMIC_SUB", "flat_atomic_sub_u32", true>; 2580defm FLAT_ATOMIC_SUB_CLAMP_U32 : VFLAT_Real_Atomics_gfx12<0x037, "FLAT_ATOMIC_CSUB_U32", "flat_atomic_sub_clamp_u32", true>; 2581defm FLAT_ATOMIC_MIN_I32 : VFLAT_Real_Atomics_gfx12<0x038, "FLAT_ATOMIC_SMIN", "flat_atomic_min_i32", true>; 2582defm FLAT_ATOMIC_MIN_U32 : VFLAT_Real_Atomics_gfx12<0x039, "FLAT_ATOMIC_UMIN", "flat_atomic_min_u32", true>; 2583defm FLAT_ATOMIC_MAX_I32 : VFLAT_Real_Atomics_gfx12<0x03a, "FLAT_ATOMIC_SMAX", "flat_atomic_max_i32", true>; 2584defm FLAT_ATOMIC_MAX_U32 : VFLAT_Real_Atomics_gfx12<0x03b, "FLAT_ATOMIC_UMAX", "flat_atomic_max_u32", true>; 2585defm FLAT_ATOMIC_AND_B32 : VFLAT_Real_Atomics_gfx12<0x03c, "FLAT_ATOMIC_AND", "flat_atomic_and_b32", true>; 2586defm FLAT_ATOMIC_OR_B32 : VFLAT_Real_Atomics_gfx12<0x03d, "FLAT_ATOMIC_OR", "flat_atomic_or_b32", true>; 2587defm FLAT_ATOMIC_XOR_B32 : VFLAT_Real_Atomics_gfx12<0x03e, "FLAT_ATOMIC_XOR", "flat_atomic_xor_b32", true>; 2588defm FLAT_ATOMIC_INC_U32 : VFLAT_Real_Atomics_gfx12<0x03f, "FLAT_ATOMIC_INC", "flat_atomic_inc_u32", true>; 2589defm FLAT_ATOMIC_DEC_U32 : VFLAT_Real_Atomics_gfx12<0x040, "FLAT_ATOMIC_DEC", "flat_atomic_dec_u32", true>; 2590defm FLAT_ATOMIC_SWAP_B64 : VFLAT_Real_Atomics_gfx12<0x041, "FLAT_ATOMIC_SWAP_X2", "flat_atomic_swap_b64", true>; 2591defm FLAT_ATOMIC_CMPSWAP_B64 : VFLAT_Real_Atomics_gfx12<0x042, "FLAT_ATOMIC_CMPSWAP_X2", "flat_atomic_cmpswap_b64", true>; 2592defm FLAT_ATOMIC_ADD_U64 : VFLAT_Real_Atomics_gfx12<0x043, "FLAT_ATOMIC_ADD_X2", "flat_atomic_add_u64", true>; 2593defm FLAT_ATOMIC_SUB_U64 : VFLAT_Real_Atomics_gfx12<0x044, "FLAT_ATOMIC_SUB_X2", "flat_atomic_sub_u64", true>; 2594defm FLAT_ATOMIC_MIN_I64 : VFLAT_Real_Atomics_gfx12<0x045, "FLAT_ATOMIC_SMIN_X2", "flat_atomic_min_i64", true>; 2595defm FLAT_ATOMIC_MIN_U64 : VFLAT_Real_Atomics_gfx12<0x046, "FLAT_ATOMIC_UMIN_X2", "flat_atomic_min_u64", true>; 2596defm FLAT_ATOMIC_MAX_I64 : VFLAT_Real_Atomics_gfx12<0x047, "FLAT_ATOMIC_SMAX_X2", "flat_atomic_max_i64", true>; 2597defm FLAT_ATOMIC_MAX_U64 : VFLAT_Real_Atomics_gfx12<0x048, "FLAT_ATOMIC_UMAX_X2", "flat_atomic_max_u64", true>; 2598defm FLAT_ATOMIC_AND_B64 : VFLAT_Real_Atomics_gfx12<0x049, "FLAT_ATOMIC_AND_X2", "flat_atomic_and_b64", true>; 2599defm FLAT_ATOMIC_OR_B64 : VFLAT_Real_Atomics_gfx12<0x04a, "FLAT_ATOMIC_OR_X2", "flat_atomic_or_b64", true>; 2600defm FLAT_ATOMIC_XOR_B64 : VFLAT_Real_Atomics_gfx12<0x04b, "FLAT_ATOMIC_XOR_X2", "flat_atomic_xor_b64", true>; 2601defm FLAT_ATOMIC_INC_U64 : VFLAT_Real_Atomics_gfx12<0x04c, "FLAT_ATOMIC_INC_X2", "flat_atomic_inc_u64", true>; 2602defm FLAT_ATOMIC_DEC_U64 : VFLAT_Real_Atomics_gfx12<0x04d, "FLAT_ATOMIC_DEC_X2", "flat_atomic_dec_u64", true>; 2603defm FLAT_ATOMIC_MIN_NUM_F32 : VFLAT_Real_Atomics_gfx12<0x051, "FLAT_ATOMIC_FMIN", "flat_atomic_min_num_f32", true, "flat_atomic_min_f32">; 2604defm FLAT_ATOMIC_MAX_NUM_F32 : VFLAT_Real_Atomics_gfx12<0x052, "FLAT_ATOMIC_FMAX", "flat_atomic_max_num_f32", true, "flat_atomic_max_f32">; 2605defm FLAT_ATOMIC_ADD_F32 : VFLAT_Real_Atomics_gfx12<0x056, "FLAT_ATOMIC_ADD_F32", "flat_atomic_add_f32">; 2606 2607// ENC_VGLOBAL. 2608defm GLOBAL_LOAD_U8 : VGLOBAL_Real_AllAddr_gfx12<0x010, "GLOBAL_LOAD_UBYTE", "global_load_u8", true>; 2609defm GLOBAL_LOAD_I8 : VGLOBAL_Real_AllAddr_gfx12<0x011, "GLOBAL_LOAD_SBYTE", "global_load_i8", true>; 2610defm GLOBAL_LOAD_U16 : VGLOBAL_Real_AllAddr_gfx12<0x012, "GLOBAL_LOAD_USHORT", "global_load_u16", true>; 2611defm GLOBAL_LOAD_I16 : VGLOBAL_Real_AllAddr_gfx12<0x013, "GLOBAL_LOAD_SSHORT", "global_load_i16", true>; 2612defm GLOBAL_LOAD_B32 : VGLOBAL_Real_AllAddr_gfx12<0x014, "GLOBAL_LOAD_DWORD", "global_load_b32", true>; 2613defm GLOBAL_LOAD_B64 : VGLOBAL_Real_AllAddr_gfx12<0x015, "GLOBAL_LOAD_DWORDX2", "global_load_b64", true>; 2614defm GLOBAL_LOAD_B96 : VGLOBAL_Real_AllAddr_gfx12<0x016, "GLOBAL_LOAD_DWORDX3", "global_load_b96", true>; 2615defm GLOBAL_LOAD_B128 : VGLOBAL_Real_AllAddr_gfx12<0x017, "GLOBAL_LOAD_DWORDX4", "global_load_b128", true>; 2616defm GLOBAL_STORE_B8 : VGLOBAL_Real_AllAddr_gfx12<0x018, "GLOBAL_STORE_BYTE", "global_store_b8", true>; 2617defm GLOBAL_STORE_B16 : VGLOBAL_Real_AllAddr_gfx12<0x019, "GLOBAL_STORE_SHORT", "global_store_b16", true>; 2618defm GLOBAL_STORE_B32 : VGLOBAL_Real_AllAddr_gfx12<0x01a, "GLOBAL_STORE_DWORD", "global_store_b32", true>; 2619defm GLOBAL_STORE_B64 : VGLOBAL_Real_AllAddr_gfx12<0x01b, "GLOBAL_STORE_DWORDX2", "global_store_b64", true>; 2620defm GLOBAL_STORE_B96 : VGLOBAL_Real_AllAddr_gfx12<0x01c, "GLOBAL_STORE_DWORDX3", "global_store_b96", true>; 2621defm GLOBAL_STORE_B128 : VGLOBAL_Real_AllAddr_gfx12<0x01d, "GLOBAL_STORE_DWORDX4", "global_store_b128", true>; 2622defm GLOBAL_LOAD_D16_U8 : VGLOBAL_Real_AllAddr_gfx12<0x01e, "GLOBAL_LOAD_UBYTE_D16", "global_load_d16_u8">; 2623defm GLOBAL_LOAD_D16_I8 : VGLOBAL_Real_AllAddr_gfx12<0x01f, "GLOBAL_LOAD_SBYTE_D16", "global_load_d16_i8">; 2624defm GLOBAL_LOAD_D16_B16 : VGLOBAL_Real_AllAddr_gfx12<0x020, "GLOBAL_LOAD_SHORT_D16", "global_load_d16_b16">; 2625defm GLOBAL_LOAD_D16_HI_U8 : VGLOBAL_Real_AllAddr_gfx12<0x021, "GLOBAL_LOAD_UBYTE_D16_HI", "global_load_d16_hi_u8">; 2626defm GLOBAL_LOAD_D16_HI_I8 : VGLOBAL_Real_AllAddr_gfx12<0x022, "GLOBAL_LOAD_SBYTE_D16_HI", "global_load_d16_hi_i8">; 2627defm GLOBAL_LOAD_D16_HI_B16 : VGLOBAL_Real_AllAddr_gfx12<0x023, "GLOBAL_LOAD_SHORT_D16_HI", "global_load_d16_hi_b16">; 2628defm GLOBAL_STORE_D16_HI_B8 : VGLOBAL_Real_AllAddr_gfx12<0x024, "GLOBAL_STORE_BYTE_D16_HI", "global_store_d16_hi_b8">; 2629defm GLOBAL_STORE_D16_HI_B16 : VGLOBAL_Real_AllAddr_gfx12<0x025, "GLOBAL_STORE_SHORT_D16_HI", "global_store_d16_hi_b16">; 2630defm GLOBAL_LOAD_ADDTID_B32 : VGLOBAL_Real_AllAddr_gfx12<0x028, "GLOBAL_LOAD_DWORD_ADDTID", "global_load_addtid_b32">; 2631defm GLOBAL_STORE_ADDTID_B32 : VGLOBAL_Real_AllAddr_gfx12<0x029, "GLOBAL_STORE_DWORD_ADDTID", "global_store_addtid_b32">; 2632 2633defm GLOBAL_ATOMIC_SWAP_B32 : VGLOBAL_Real_Atomics_gfx12<0x033, "GLOBAL_ATOMIC_SWAP", "global_atomic_swap_b32", true>; 2634defm GLOBAL_ATOMIC_CMPSWAP_B32 : VGLOBAL_Real_Atomics_gfx12<0x034, "GLOBAL_ATOMIC_CMPSWAP", "global_atomic_cmpswap_b32", true>; 2635defm GLOBAL_ATOMIC_ADD_U32 : VGLOBAL_Real_Atomics_gfx12<0x035, "GLOBAL_ATOMIC_ADD", "global_atomic_add_u32", true>; 2636defm GLOBAL_ATOMIC_SUB_U32 : VGLOBAL_Real_Atomics_gfx12<0x036, "GLOBAL_ATOMIC_SUB", "global_atomic_sub_u32", true>; 2637defm GLOBAL_ATOMIC_SUB_CLAMP_U32 : VGLOBAL_Real_Atomics_gfx12<0x037, "GLOBAL_ATOMIC_CSUB", "global_atomic_sub_clamp_u32", true, "global_atomic_csub_u32">; 2638defm GLOBAL_ATOMIC_MIN_I32 : VGLOBAL_Real_Atomics_gfx12<0x038, "GLOBAL_ATOMIC_SMIN", "global_atomic_min_i32", true>; 2639defm GLOBAL_ATOMIC_MIN_U32 : VGLOBAL_Real_Atomics_gfx12<0x039, "GLOBAL_ATOMIC_UMIN", "global_atomic_min_u32", true>; 2640defm GLOBAL_ATOMIC_MAX_I32 : VGLOBAL_Real_Atomics_gfx12<0x03a, "GLOBAL_ATOMIC_SMAX", "global_atomic_max_i32", true>; 2641defm GLOBAL_ATOMIC_MAX_U32 : VGLOBAL_Real_Atomics_gfx12<0x03b, "GLOBAL_ATOMIC_UMAX", "global_atomic_max_u32", true>; 2642defm GLOBAL_ATOMIC_AND_B32 : VGLOBAL_Real_Atomics_gfx12<0x03c, "GLOBAL_ATOMIC_AND", "global_atomic_and_b32", true>; 2643defm GLOBAL_ATOMIC_OR_B32 : VGLOBAL_Real_Atomics_gfx12<0x03d, "GLOBAL_ATOMIC_OR", "global_atomic_or_b32", true>; 2644defm GLOBAL_ATOMIC_XOR_B32 : VGLOBAL_Real_Atomics_gfx12<0x03e, "GLOBAL_ATOMIC_XOR", "global_atomic_xor_b32", true>; 2645defm GLOBAL_ATOMIC_INC_U32 : VGLOBAL_Real_Atomics_gfx12<0x03f, "GLOBAL_ATOMIC_INC", "global_atomic_inc_u32", true>; 2646defm GLOBAL_ATOMIC_DEC_U32 : VGLOBAL_Real_Atomics_gfx12<0x040, "GLOBAL_ATOMIC_DEC", "global_atomic_dec_u32", true>; 2647defm GLOBAL_ATOMIC_SWAP_B64 : VGLOBAL_Real_Atomics_gfx12<0x041, "GLOBAL_ATOMIC_SWAP_X2", "global_atomic_swap_b64", true>; 2648defm GLOBAL_ATOMIC_CMPSWAP_B64 : VGLOBAL_Real_Atomics_gfx12<0x042, "GLOBAL_ATOMIC_CMPSWAP_X2", "global_atomic_cmpswap_b64", true>; 2649defm GLOBAL_ATOMIC_ADD_U64 : VGLOBAL_Real_Atomics_gfx12<0x043, "GLOBAL_ATOMIC_ADD_X2", "global_atomic_add_u64", true>; 2650defm GLOBAL_ATOMIC_SUB_U64 : VGLOBAL_Real_Atomics_gfx12<0x044, "GLOBAL_ATOMIC_SUB_X2", "global_atomic_sub_u64", true>; 2651defm GLOBAL_ATOMIC_MIN_I64 : VGLOBAL_Real_Atomics_gfx12<0x045, "GLOBAL_ATOMIC_SMIN_X2", "global_atomic_min_i64", true>; 2652defm GLOBAL_ATOMIC_MIN_U64 : VGLOBAL_Real_Atomics_gfx12<0x046, "GLOBAL_ATOMIC_UMIN_X2", "global_atomic_min_u64", true>; 2653defm GLOBAL_ATOMIC_MAX_I64 : VGLOBAL_Real_Atomics_gfx12<0x047, "GLOBAL_ATOMIC_SMAX_X2", "global_atomic_max_i64", true>; 2654defm GLOBAL_ATOMIC_MAX_U64 : VGLOBAL_Real_Atomics_gfx12<0x048, "GLOBAL_ATOMIC_UMAX_X2", "global_atomic_max_u64", true>; 2655defm GLOBAL_ATOMIC_AND_B64 : VGLOBAL_Real_Atomics_gfx12<0x049, "GLOBAL_ATOMIC_AND_X2", "global_atomic_and_b64", true>; 2656defm GLOBAL_ATOMIC_OR_B64 : VGLOBAL_Real_Atomics_gfx12<0x04a, "GLOBAL_ATOMIC_OR_X2", "global_atomic_or_b64", true>; 2657defm GLOBAL_ATOMIC_XOR_B64 : VGLOBAL_Real_Atomics_gfx12<0x04b, "GLOBAL_ATOMIC_XOR_X2", "global_atomic_xor_b64", true>; 2658defm GLOBAL_ATOMIC_INC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04c, "GLOBAL_ATOMIC_INC_X2", "global_atomic_inc_u64", true>; 2659defm GLOBAL_ATOMIC_DEC_U64 : VGLOBAL_Real_Atomics_gfx12<0x04d, "GLOBAL_ATOMIC_DEC_X2", "global_atomic_dec_u64", true>; 2660defm GLOBAL_ATOMIC_MIN_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x051, "GLOBAL_ATOMIC_FMIN", "global_atomic_min_num_f32", true, "global_atomic_min_f32">; 2661defm GLOBAL_ATOMIC_MAX_NUM_F32 : VGLOBAL_Real_Atomics_gfx12<0x052, "GLOBAL_ATOMIC_FMAX", "global_atomic_max_num_f32", true, "global_atomic_max_f32">; 2662defm GLOBAL_ATOMIC_ADD_F32 : VGLOBAL_Real_Atomics_gfx12<0x056, "GLOBAL_ATOMIC_ADD_F32", "global_atomic_add_f32">; 2663defm GLOBAL_ATOMIC_ORDERED_ADD_B64 : VGLOBAL_Real_Atomics_gfx12<0x073, "GLOBAL_ATOMIC_ORDERED_ADD_B64", "global_atomic_ordered_add_b64">; 2664 2665// ENC_VSCRATCH. 2666defm SCRATCH_LOAD_U8 : VSCRATCH_Real_AllAddr_gfx12<0x10, "SCRATCH_LOAD_UBYTE", "scratch_load_u8", true>; 2667defm SCRATCH_LOAD_I8 : VSCRATCH_Real_AllAddr_gfx12<0x11, "SCRATCH_LOAD_SBYTE", "scratch_load_i8", true>; 2668defm SCRATCH_LOAD_U16 : VSCRATCH_Real_AllAddr_gfx12<0x12, "SCRATCH_LOAD_USHORT", "scratch_load_u16", true>; 2669defm SCRATCH_LOAD_I16 : VSCRATCH_Real_AllAddr_gfx12<0x13, "SCRATCH_LOAD_SSHORT", "scratch_load_i16", true>; 2670defm SCRATCH_LOAD_B32 : VSCRATCH_Real_AllAddr_gfx12<0x14, "SCRATCH_LOAD_DWORD", "scratch_load_b32", true>; 2671defm SCRATCH_LOAD_B64 : VSCRATCH_Real_AllAddr_gfx12<0x15, "SCRATCH_LOAD_DWORDX2", "scratch_load_b64", true>; 2672defm SCRATCH_LOAD_B96 : VSCRATCH_Real_AllAddr_gfx12<0x16, "SCRATCH_LOAD_DWORDX3", "scratch_load_b96", true>; 2673defm SCRATCH_LOAD_B128 : VSCRATCH_Real_AllAddr_gfx12<0x17, "SCRATCH_LOAD_DWORDX4", "scratch_load_b128", true>; 2674defm SCRATCH_STORE_B8 : VSCRATCH_Real_AllAddr_gfx12<0x18, "SCRATCH_STORE_BYTE", "scratch_store_b8", true>; 2675defm SCRATCH_STORE_B16 : VSCRATCH_Real_AllAddr_gfx12<0x19, "SCRATCH_STORE_SHORT", "scratch_store_b16", true>; 2676defm SCRATCH_STORE_B32 : VSCRATCH_Real_AllAddr_gfx12<0x1a, "SCRATCH_STORE_DWORD", "scratch_store_b32", true>; 2677defm SCRATCH_STORE_B64 : VSCRATCH_Real_AllAddr_gfx12<0x1b, "SCRATCH_STORE_DWORDX2", "scratch_store_b64", true>; 2678defm SCRATCH_STORE_B96 : VSCRATCH_Real_AllAddr_gfx12<0x1c, "SCRATCH_STORE_DWORDX3", "scratch_store_b96", true>; 2679defm SCRATCH_STORE_B128 : VSCRATCH_Real_AllAddr_gfx12<0x1d, "SCRATCH_STORE_DWORDX4", "scratch_store_b128", true>; 2680defm SCRATCH_LOAD_D16_U8 : VSCRATCH_Real_AllAddr_gfx12<0x1e, "SCRATCH_LOAD_UBYTE_D16", "scratch_load_d16_u8">; 2681defm SCRATCH_LOAD_D16_I8 : VSCRATCH_Real_AllAddr_gfx12<0x1f, "SCRATCH_LOAD_SBYTE_D16", "scratch_load_d16_i8">; 2682defm SCRATCH_LOAD_D16_B16 : VSCRATCH_Real_AllAddr_gfx12<0x20, "SCRATCH_LOAD_SHORT_D16", "scratch_load_d16_b16">; 2683defm SCRATCH_LOAD_D16_HI_U8 : VSCRATCH_Real_AllAddr_gfx12<0x21, "SCRATCH_LOAD_UBYTE_D16_HI", "scratch_load_d16_hi_u8">; 2684defm SCRATCH_LOAD_D16_HI_I8 : VSCRATCH_Real_AllAddr_gfx12<0x22, "SCRATCH_LOAD_SBYTE_D16_HI", "scratch_load_d16_hi_i8">; 2685defm SCRATCH_LOAD_D16_HI_B16 : VSCRATCH_Real_AllAddr_gfx12<0x23, "SCRATCH_LOAD_SHORT_D16_HI", "scratch_load_d16_hi_b16">; 2686defm SCRATCH_STORE_D16_HI_B8 : VSCRATCH_Real_AllAddr_gfx12<0x24, "SCRATCH_STORE_BYTE_D16_HI", "scratch_store_d16_hi_b8">; 2687defm SCRATCH_STORE_D16_HI_B16 : VSCRATCH_Real_AllAddr_gfx12<0x25, "SCRATCH_STORE_SHORT_D16_HI", "scratch_store_d16_hi_b16">; 2688